CoCalc -- google_drive

GitHub Repository: restrepo/ComputationalMethods
Path: blob/master/input/google_drive_tools.py
⁹³⁴ views
1
import requests
2
import io
3
from configparser import ConfigParser
4
'''
5
Based on:
6
http://stackoverflow.com/a/39225272
7
'''
8
cfg=ConfigParser()
9
cfg.optionxform=str
10
tmp=cfg.read('drive.cfg')
11
drive_file=cfg['FILES']
12

13
def load_drive_files_keys():
14
    cfg=ConfigParser()
15
    cfg.optionxform=str
16
    tmp=cfg.read('drive.cfg')
17
    return cfg['FILES']
18

19
    
20
def pandas_from_google_drive_csv(id,gss_sheet=0,gss_query=None):
21
    '''
22
    Read Google spread sheet by id.
23
    Options:
24
       gss_sheet=N : if in old format select the N-sheet
25
       gss_query=SQL_command: Filter with some SQL command
26
       example 
27
         SQL_command: 'select B,D,E,F,I where (H contains 'GFIF') order by D desc'
28
    '''
29
    import pandas as pd
30
    if not gss_query:
31
        url='https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv&gid=%s' %(id,gss_sheet)
32
    else:
33
        url='https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv&gid=%s&tq=%s' %(id,gss_sheet,gss_query)
34
    r=requests.get(url)
35
    if r.status_code==200:
36
        csv_file=io.StringIO(r.text) # or directly  with: urllib.request.urlopen(url)
37
        return pd.read_csv( csv_file,keep_default_na=False)
38

39
def download_file_from_google_drive(id,destination=None,binary=True):
40
    '''
41
    Download file from google drive as binary (default) or txt file.
42
    If not destination the file object is returned
43
    Example: Let id="XXX" a txt file:
44
    1) fb=download_file_from_google_drive("XXX") ; fb.decode() #to convert to text file
45
    2) ft=download_file_from_google_drive("XXX",binary=False) # txt file
46
    3) fb=download_file_from_google_drive("XXX",'output_file') # always binay
47
    '''
48
    URL = "https://docs.google.com/uc?export=download"
49

50
    session = requests.Session()
51

52
    response = session.get(URL, params = { 'id' : id }, stream = True)
53
    token = get_confirm_token(response)
54

55
    if token:
56
        params = { 'id' : id, 'confirm' : token }
57
        response = session.get(URL, params = params, stream = True)
58

59
    
60
    return save_response_content(response, destination=destination,binary=binary)
61

62
def get_confirm_token(response):
63
    for key, value in response.cookies.items():
64
        if key.startswith('download_warning'):
65
            return value
66

67
    return None
68

69
def save_response_content(response, destination=None,binary=True):
70
    CHUNK_SIZE = 32768
71

72
    if destination:
73
        f=open(destination, "wb") #binary file
74
    else:
75
        chunks=b''
76
    for chunk in response.iter_content(CHUNK_SIZE):
77
        if chunk: # filter out keep-alive new chunks
78
            if destination:
79
                f.write(chunk)
80
            else:
81
                chunks=chunks+chunk
82
    if destination:
83
        f.close()  #writes the file
84
    else:
85
        if binary:
86
            return io.BytesIO(chunks) # returns the file object
87
        else:
88
            return io.StringIO(chunks.decode()) # returns the file object
89
        
90
def read_drive_excel(file_name,**kwargs):
91
    '''Read excel from google drive
92
     Requieres a drive_file dictionary with the keys for the google drive
93
     file names.
94
     If the file_name is not found in the drive_file dictionary it is read locally.
95
     If the file_name have an extension .csv, tray to read the google spreadsheet 
96
     directly: check pandas_from_google_drive_csv for passed options
97
     WARNING: ONLY OLD Google Spread Sheet allows to load sheet different from 0
98
     '''
99
    import pandas as pd
100
    import re
101
    if re.search('\.csv$',file_name):
102
        if drive_file.get(file_name):
103
            return pandas_from_google_drive_csv(drive_file.get(file_name),**kwargs)
104
        else:
105
            return pd.read_csv(file_name,**kwargs)
106

107
    if drive_file.get(file_name):    
108
        return pd.read_excel( download_file_from_google_drive(
109
            drive_file.get(file_name) ) ,**kwargs)  # ,{} is an accepted option   
110
    else:
111
        return pd.read_excel(file_name,**kwargs)
112
    
113

114
def query_drive_csv(
115
    gss_url="https://spreadsheets.google.com",
116
    gss_format="csv",
117
    gss_key="0AuLa_xuSIEvxdERYSGVQWDBTX1NCN19QMXVpb0lhWXc",
118
    gss_sheet=0,
119
    gss_query="select B,D,E,F,I where (H contains 'GFIF') order by D desc",
120
    gss_keep_default_na=False
121
    ):
122
    import pandas as pd
123
    issn_url="%s/tq?tqx=out:%s&tq=%s&key=%s&gid=%s" %(gss_url,\
124
                                           gss_format,\
125
                                           gss_query,\
126
                                           gss_key,\
127
                                           str(gss_sheet))
128

129
    return pd.read_csv( issn_url.replace(' ','+') )
130
Product

Resources

Company