Path: blob/master/input/google_drive_tools.py
934 views
import requests1import io2from configparser import ConfigParser3'''4Based on:5http://stackoverflow.com/a/392252726'''7cfg=ConfigParser()8cfg.optionxform=str9tmp=cfg.read('drive.cfg')10drive_file=cfg['FILES']1112def load_drive_files_keys():13cfg=ConfigParser()14cfg.optionxform=str15tmp=cfg.read('drive.cfg')16return cfg['FILES']171819def pandas_from_google_drive_csv(id,gss_sheet=0,gss_query=None):20'''21Read Google spread sheet by id.22Options:23gss_sheet=N : if in old format select the N-sheet24gss_query=SQL_command: Filter with some SQL command25example26SQL_command: 'select B,D,E,F,I where (H contains 'GFIF') order by D desc'27'''28import pandas as pd29if not gss_query:30url='https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv&gid=%s' %(id,gss_sheet)31else:32url='https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv&gid=%s&tq=%s' %(id,gss_sheet,gss_query)33r=requests.get(url)34if r.status_code==200:35csv_file=io.StringIO(r.text) # or directly with: urllib.request.urlopen(url)36return pd.read_csv( csv_file,keep_default_na=False)3738def download_file_from_google_drive(id,destination=None,binary=True):39'''40Download file from google drive as binary (default) or txt file.41If not destination the file object is returned42Example: Let id="XXX" a txt file:431) fb=download_file_from_google_drive("XXX") ; fb.decode() #to convert to text file442) ft=download_file_from_google_drive("XXX",binary=False) # txt file453) fb=download_file_from_google_drive("XXX",'output_file') # always binay46'''47URL = "https://docs.google.com/uc?export=download"4849session = requests.Session()5051response = session.get(URL, params = { 'id' : id }, stream = True)52token = get_confirm_token(response)5354if token:55params = { 'id' : id, 'confirm' : token }56response = session.get(URL, params = params, stream = True)575859return save_response_content(response, destination=destination,binary=binary)6061def get_confirm_token(response):62for key, value in response.cookies.items():63if key.startswith('download_warning'):64return value6566return None6768def save_response_content(response, destination=None,binary=True):69CHUNK_SIZE = 327687071if destination:72f=open(destination, "wb") #binary file73else:74chunks=b''75for chunk in response.iter_content(CHUNK_SIZE):76if chunk: # filter out keep-alive new chunks77if destination:78f.write(chunk)79else:80chunks=chunks+chunk81if destination:82f.close() #writes the file83else:84if binary:85return io.BytesIO(chunks) # returns the file object86else:87return io.StringIO(chunks.decode()) # returns the file object8889def read_drive_excel(file_name,**kwargs):90'''Read excel from google drive91Requieres a drive_file dictionary with the keys for the google drive92file names.93If the file_name is not found in the drive_file dictionary it is read locally.94If the file_name have an extension .csv, tray to read the google spreadsheet95directly: check pandas_from_google_drive_csv for passed options96WARNING: ONLY OLD Google Spread Sheet allows to load sheet different from 097'''98import pandas as pd99import re100if re.search('\.csv$',file_name):101if drive_file.get(file_name):102return pandas_from_google_drive_csv(drive_file.get(file_name),**kwargs)103else:104return pd.read_csv(file_name,**kwargs)105106if drive_file.get(file_name):107return pd.read_excel( download_file_from_google_drive(108drive_file.get(file_name) ) ,**kwargs) # ,{} is an accepted option109else:110return pd.read_excel(file_name,**kwargs)111112113def query_drive_csv(114gss_url="https://spreadsheets.google.com",115gss_format="csv",116gss_key="0AuLa_xuSIEvxdERYSGVQWDBTX1NCN19QMXVpb0lhWXc",117gss_sheet=0,118gss_query="select B,D,E,F,I where (H contains 'GFIF') order by D desc",119gss_keep_default_na=False120):121import pandas as pd122issn_url="%s/tq?tqx=out:%s&tq=%s&key=%s&gid=%s" %(gss_url,\123gss_format,\124gss_query,\125gss_key,\126str(gss_sheet))127128return pd.read_csv( issn_url.replace(' ','+') )129130