Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
restrepo
GitHub Repository: restrepo/ComputationalMethods
Path: blob/master/input/google_drive_tools.py
934 views
1
import requests
2
import io
3
from configparser import ConfigParser
4
'''
5
Based on:
6
http://stackoverflow.com/a/39225272
7
'''
8
cfg=ConfigParser()
9
cfg.optionxform=str
10
tmp=cfg.read('drive.cfg')
11
drive_file=cfg['FILES']
12
13
def load_drive_files_keys():
14
cfg=ConfigParser()
15
cfg.optionxform=str
16
tmp=cfg.read('drive.cfg')
17
return cfg['FILES']
18
19
20
def pandas_from_google_drive_csv(id,gss_sheet=0,gss_query=None):
21
'''
22
Read Google spread sheet by id.
23
Options:
24
gss_sheet=N : if in old format select the N-sheet
25
gss_query=SQL_command: Filter with some SQL command
26
example
27
SQL_command: 'select B,D,E,F,I where (H contains 'GFIF') order by D desc'
28
'''
29
import pandas as pd
30
if not gss_query:
31
url='https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv&gid=%s' %(id,gss_sheet)
32
else:
33
url='https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv&gid=%s&tq=%s' %(id,gss_sheet,gss_query)
34
r=requests.get(url)
35
if r.status_code==200:
36
csv_file=io.StringIO(r.text) # or directly with: urllib.request.urlopen(url)
37
return pd.read_csv( csv_file,keep_default_na=False)
38
39
def download_file_from_google_drive(id,destination=None,binary=True):
40
'''
41
Download file from google drive as binary (default) or txt file.
42
If not destination the file object is returned
43
Example: Let id="XXX" a txt file:
44
1) fb=download_file_from_google_drive("XXX") ; fb.decode() #to convert to text file
45
2) ft=download_file_from_google_drive("XXX",binary=False) # txt file
46
3) fb=download_file_from_google_drive("XXX",'output_file') # always binay
47
'''
48
URL = "https://docs.google.com/uc?export=download"
49
50
session = requests.Session()
51
52
response = session.get(URL, params = { 'id' : id }, stream = True)
53
token = get_confirm_token(response)
54
55
if token:
56
params = { 'id' : id, 'confirm' : token }
57
response = session.get(URL, params = params, stream = True)
58
59
60
return save_response_content(response, destination=destination,binary=binary)
61
62
def get_confirm_token(response):
63
for key, value in response.cookies.items():
64
if key.startswith('download_warning'):
65
return value
66
67
return None
68
69
def save_response_content(response, destination=None,binary=True):
70
CHUNK_SIZE = 32768
71
72
if destination:
73
f=open(destination, "wb") #binary file
74
else:
75
chunks=b''
76
for chunk in response.iter_content(CHUNK_SIZE):
77
if chunk: # filter out keep-alive new chunks
78
if destination:
79
f.write(chunk)
80
else:
81
chunks=chunks+chunk
82
if destination:
83
f.close() #writes the file
84
else:
85
if binary:
86
return io.BytesIO(chunks) # returns the file object
87
else:
88
return io.StringIO(chunks.decode()) # returns the file object
89
90
def read_drive_excel(file_name,**kwargs):
91
'''Read excel from google drive
92
Requieres a drive_file dictionary with the keys for the google drive
93
file names.
94
If the file_name is not found in the drive_file dictionary it is read locally.
95
If the file_name have an extension .csv, tray to read the google spreadsheet
96
directly: check pandas_from_google_drive_csv for passed options
97
WARNING: ONLY OLD Google Spread Sheet allows to load sheet different from 0
98
'''
99
import pandas as pd
100
import re
101
if re.search('\.csv$',file_name):
102
if drive_file.get(file_name):
103
return pandas_from_google_drive_csv(drive_file.get(file_name),**kwargs)
104
else:
105
return pd.read_csv(file_name,**kwargs)
106
107
if drive_file.get(file_name):
108
return pd.read_excel( download_file_from_google_drive(
109
drive_file.get(file_name) ) ,**kwargs) # ,{} is an accepted option
110
else:
111
return pd.read_excel(file_name,**kwargs)
112
113
114
def query_drive_csv(
115
gss_url="https://spreadsheets.google.com",
116
gss_format="csv",
117
gss_key="0AuLa_xuSIEvxdERYSGVQWDBTX1NCN19QMXVpb0lhWXc",
118
gss_sheet=0,
119
gss_query="select B,D,E,F,I where (H contains 'GFIF') order by D desc",
120
gss_keep_default_na=False
121
):
122
import pandas as pd
123
issn_url="%s/tq?tqx=out:%s&tq=%s&key=%s&gid=%s" %(gss_url,\
124
gss_format,\
125
gss_query,\
126
gss_key,\
127
str(gss_sheet))
128
129
return pd.read_csv( issn_url.replace(' ','+') )
130