In [1]:
import requests

base_url = "http://api.us.socrata.com/api/catalog/v1?only=datasets&domains=data.cityofnewyork.us"
filename = "row_counts-data.cityofnewyork.us.csv"
r = requests.get(base_url+"&limit=0")
total_count = r.json()['resultSetSize']

print total_count
1077
In [3]:
collection = []
while len(collection) < total_count:
    r = requests.get(base_url+"&limit=100&offset={0}".format(len(collection)))
    collection.extend(r.json()['results'])
    print "{0} datasets collected".format(len(collection))
100 datasets collected
200 datasets collected
300 datasets collected
400 datasets collected
500 datasets collected
600 datasets collected
700 datasets collected
800 datasets collected
900 datasets collected
1000 datasets collected
1077 datasets collected
In [ ]:
def get_row_count(domain,fxf):
    r = requests.get("https://{0}/resource/{1}.json?$select=count(*)".format(domain,fxf))
    response = r.json()
    try:
        return response[0]['count']        
    except:
        return 0;
    
csv_file = open(filename, 'w')
csv_file.truncate()
csv_file.write("domain,obe_fx4,row_count\n")
csv_file.close()

# probably hacky but opening and writing each time so that the CSV is saved after every write

for dataset in collection:
    csv_file = open(filename, 'a')
    domain = dataset['metadata']['domain']
    obe_fxf = dataset['resource']['id']
    row_count = get_row_count(domain,obe_fxf)
    line = "{0},{1},{2}".format(domain,obe_fxf,row_count)
    print line
    csv_file.write(line+"\n")
data.cityofnewyork.us,ic3t-wcy2,233921
data.cityofnewyork.us,xjfq-wh2d,82466
data.cityofnewyork.us,jb3k-j3gp,61517
data.cityofnewyork.us,8wbx-tsch,78154
data.cityofnewyork.us,erm2-nwe9,13208682
data.cityofnewyork.us,ipu4-2q9a,567137
data.cityofnewyork.us,h9gi-nx95,840984
data.cityofnewyork.us,rhe8-mgbb,14834289
data.cityofnewyork.us,dg92-zbpx,263063
data.cityofnewyork.us,yhuu-4pt3,8285
data.cityofnewyork.us,sjfe-fppp,146108
data.cityofnewyork.us,n776-dsqy,13101032
data.cityofnewyork.us,kpav-sd4t,4184
data.cityofnewyork.us,5tub-eh45,145260
data.cityofnewyork.us,zt9s-n5aj,460
data.cityofnewyork.us,kku6-nxdu,236
data.cityofnewyork.us,avwh-jmzt,56
data.cityofnewyork.us,jt7v-77mi,9100278
data.cityofnewyork.us,nzjr-3966,7712
data.cityofnewyork.us,f9bf-2cp4,478
data.cityofnewyork.us,8hkx-uppz,29
data.cityofnewyork.us,f4yq-wry5,6335
data.cityofnewyork.us,jgtb-hmpg,193163
data.cityofnewyork.us,n5mv-nfpy,15937
data.cityofnewyork.us,hc8x-tcnd,219
data.cityofnewyork.us,kh3d-xhq7,68
data.cityofnewyork.us,ncbg-6agr,84
data.cityofnewyork.us,dpec-ucu7,9285
data.cityofnewyork.us,f7b6-v6v3,78
data.cityofnewyork.us,tdsx-cvye,0
data.cityofnewyork.us,e98g-f8hy,96963
data.cityofnewyork.us,eabe-havv,759103
data.cityofnewyork.us,ia2d-e54m,31
data.cityofnewyork.us,td5q-ry6d,14191580
data.cityofnewyork.us,jb7j-dtam,3840
data.cityofnewyork.us,7z8d-msnt,35
data.cityofnewyork.us,5b3a-rs48,5899
data.cityofnewyork.us,jfzu-yy6n,8773
data.cityofnewyork.us,k46n-sa2m,1037
data.cityofnewyork.us,b7kx-qikm,137
data.cityofnewyork.us,4e2n-s75z,27461
data.cityofnewyork.us,25th-nujf,13962
data.cityofnewyork.us,nc67-uf89,7230576
data.cityofnewyork.us,jxdc-hnze,77
data.cityofnewyork.us,hyij-8hr7,1123465
data.cityofnewyork.us,fzk8-3ynb,15
data.cityofnewyork.us,5gde-fmj3,14112
data.cityofnewyork.us,ivix-m77e,13553
data.cityofnewyork.us,kiv2-tbus,10173437
data.cityofnewyork.us,pasr-j7fb,247023
data.cityofnewyork.us,v475-8jcj,434
data.cityofnewyork.us,swpk-hqdp,390
data.cityofnewyork.us,ufu7-zp25,98
data.cityofnewyork.us,37cg-gxjd,4336
data.cityofnewyork.us,43nn-pn8j,451210
data.cityofnewyork.us,uuxn-wzxe,214
In [ ]:
 
In [ ]: