Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/thinkbayes2
Path: blob/master/scripts/redline_data.py
1901 views
1
#!/usr/bin/python
2
3
"""This file contains code for use with "Think Bayes",
4
by Allen B. Downey, available from greenteapress.com
5
6
Copyright 2013 Allen B. Downey
7
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
8
"""
9
10
from __future__ import print_function, division
11
12
import csv
13
import json
14
import numpy
15
import os
16
import sys
17
import redis
18
import urllib2
19
20
from datetime import datetime, time
21
22
from time import sleep
23
24
25
class Redis(object):
26
"""Provides access to a Redis instance on Redis To Go"""
27
28
host = 'dory.redistogo.com'
29
port = 10534
30
31
def __init__(self):
32
try:
33
password = os.environ['REDIS_AUTH']
34
except KeyError:
35
print('Environment variable REDIS_AUTH is not set.')
36
sys.exit()
37
38
self.r = redis.StrictRedis(host=self.host,
39
port=self.port,
40
password=password,
41
db=0)
42
43
def WriteTrainSpotting(self, timestamp, tripid, seconds, live=True):
44
"""Writes a trainspotting event to the database.
45
46
timestamp: int seconds since epoch
47
tripid: string unique id
48
seconds: int how many seconds away the train is
49
live: boolean, whether to actually write the data
50
"""
51
dt = datetime.fromtimestamp(timestamp)
52
day = dt.date().isoformat()
53
54
print(dt, tripid, seconds, timestamp)
55
56
if live:
57
self.r.sadd('days', day)
58
self.r.sadd(day, tripid)
59
self.r.zadd(tripid, seconds, timestamp)
60
61
def FindArrivals(self, start_hour=16, end_hour=18):
62
"""For each trip, find the best estimate of the arrival time.
63
64
start_hour: int 0-24, beginning of observation window
65
end_hour: int 0-24, end of window
66
67
Returns: map from string day to unsorted list of arrival datetimes
68
"""
69
days = self.r.smembers('days')
70
print(days)
71
72
start_time = time(hour=start_hour)
73
end_time = time(hour=end_hour)
74
75
arrival_map = {}
76
77
for day in days:
78
tripids = self.r.smembers(day)
79
80
for tripid in tripids:
81
pred_dt = self.GetPredictedArrival(tripid)
82
pred_time = pred_dt.time()
83
84
if start_time < pred_time < end_time:
85
arrival_map.setdefault(day, []).append(pred_dt)
86
87
return arrival_map
88
89
def GetPredictedArrival(self, tripid):
90
"""Gets the best predicted arrival time for a given trip.
91
92
tripid: string TripID like R98313D88
93
"""
94
pair = self.r.zrange(tripid, 0, 1, withscores=True)
95
timestamp, seconds = pair[0]
96
pred_ts = float(timestamp) + seconds
97
pred_dt = datetime.fromtimestamp(pred_ts)
98
return pred_dt
99
100
class TrainSpotting(object):
101
"""Represents one observation of a train."""
102
103
def __init__(self, t):
104
self.timestamp = int(t[0])
105
self.tripid = t[2]
106
self.seconds = int(t[6])
107
108
109
def ReadCsv(url = 'http://developer.mbta.com/lib/rthr/red.csv'):
110
"""Reads data from the red line.
111
112
Returns: list of TrainSpotting objects
113
"""
114
fp = urllib2.urlopen(url)
115
reader = csv.reader(fp)
116
117
tss = []
118
for t in reader:
119
if t[5] != 'Kendall/MIT': continue
120
if t[3] != 'Braintree': continue
121
122
ts = TrainSpotting(t)
123
tss.append(ts)
124
125
fp.close()
126
return tss
127
128
129
def ReadJson():
130
url = 'http://developer.mbta.com/lib/rthr/red.json'
131
json_text = urllib2.urlopen(url).read()
132
json_obj = json.loads(json_text)
133
print(json_obj)
134
135
136
def ReadAndStore(red):
137
"""Read data from the MBTA and put it in the database.
138
139
red: Redis object
140
"""
141
tss = ReadCsv()
142
for ts in tss:
143
red.WriteTrainSpotting(ts.timestamp, ts.tripid, ts.seconds)
144
145
146
def Loop(red, start_time, end_time, delay=60):
147
"""Collects data from start_time until end_time.
148
149
red: Redis object to store data
150
start_time: datetime
151
end_time: datetime
152
delay: time to sleep between collections, in seconds
153
"""
154
if datetime.now() < start_time:
155
diff = start_time - datetime.now()
156
print('Sleeping', diff)
157
sleep(diff.total_seconds())
158
159
while datetime.now() < end_time:
160
print('Collecting')
161
ReadAndStore(red)
162
sleep(delay)
163
164
165
def TodayAt(hour):
166
"""Makes a datetime object with today's date and the given time.
167
168
hour: int 0-24
169
"""
170
now = datetime.now()
171
return datetime.combine(now, time(hour=hour))
172
173
174
def GetInterarrivals(arrival_map):
175
"""Finds all interarrival times in the arrival map.
176
177
arrival_map: map from string day to unsorted list of arrival datetimes
178
179
Returns: list of float interarrival times in seconds
180
"""
181
interarrival_seconds = []
182
for day, arrivals in sorted(arrival_map.iteritems()):
183
print(day, len(arrivals))
184
arrivals.sort()
185
diffs = numpy.diff(arrivals)
186
diffs = [diff.total_seconds() for diff in diffs]
187
interarrival_seconds.extend(diffs)
188
189
return interarrival_seconds
190
191
192
def main(script, command='collect'):
193
red = Redis()
194
195
if command == 'collect':
196
start = TodayAt(16)
197
end = TodayAt(18)
198
199
print(start, end)
200
Loop(red, start, end)
201
202
elif command == 'report':
203
arrival_map = red.FindArrivals()
204
interarrivals = GetInterarrivals(arrival_map)
205
print(repr(interarrivals))
206
207
208
if __name__ == '__main__':
209
main(*sys.argv)
210
211