Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
eclipse
GitHub Repository: eclipse/sumo
Path: blob/main/tools/import/gtfs/gtfs2osm.py
194314 views
1
# Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
2
# Copyright (C) 2010-2026 German Aerospace Center (DLR) and others.
3
# This program and the accompanying materials are made available under the
4
# terms of the Eclipse Public License 2.0 which is available at
5
# https://www.eclipse.org/legal/epl-2.0/
6
# This Source Code may also be made available under the following Secondary
7
# Licenses when the conditions for such availability set forth in the Eclipse
8
# Public License 2.0 are satisfied: GNU General Public License, version 2
9
# or later which is available at
10
# https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
11
# SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
12
13
# @file gtfs2osm.py
14
# @author Giuliana Armellini
15
# @author Mirko Barthauer
16
# @date 2021-02-18
17
18
"""
19
Import public transport from GTFS (schedules) and OSM (routes) data
20
"""
21
22
import os
23
import sys
24
import subprocess
25
import datetime
26
import time
27
import math
28
import io
29
import re
30
from collections import defaultdict
31
import hashlib
32
33
# from pprint import pprint
34
35
import pandas as pd
36
pd.options.mode.chained_assignment = None # default='warn'
37
38
sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools'))
39
import sumolib # noqa
40
from sumolib.xml import parse_fast_nested # noqa
41
from sumolib.miscutils import benchmark, parseTime, humanReadableTime # noqa
42
43
# ----------------------- gtfs, osm and sumo modes ----------------------------
44
OSM2SUMO_MODES = {
45
'bus': 'bus',
46
'train': 'rail',
47
'tram': 'tram',
48
'light_rail': 'rail_urban',
49
'monorail': 'rail_urban',
50
'subway': 'subway',
51
'aerialway': 'cable_car',
52
'ferry': 'ship'
53
}
54
55
GTFS2OSM_MODES = {
56
# https://developers.google.com/transit/gtfs/reference/#routestxt
57
'0': 'tram',
58
'1': 'subway',
59
'2': 'train',
60
'3': 'bus',
61
'4': 'ferry',
62
# '5': 'cableTram',
63
# '6': 'aerialLift',
64
# '7': 'funicular',
65
# https://developers.google.com/transit/gtfs/reference/extended-route-types
66
'100': 'train', # DB
67
'109': 'light_rail', # S-Bahn
68
'400': 'subway', # U-Bahn
69
'1000': 'ferry', # Faehre
70
# additional modes used in Hamburg
71
'402': 'subway', # U-Bahn
72
'1200': 'ferry', # Faehre
73
# modes used by hafas
74
's': 'train',
75
'RE': 'train',
76
'RB': 'train',
77
'IXB': 'train', # tbd
78
'ICE': 'train',
79
'IC': 'train',
80
'IRX': 'train', # tbd
81
'EC': 'train',
82
'NJ': 'train', # tbd
83
'RHI': 'train', # tbd
84
'DPN': 'train', # tbd
85
'SCH': 'train', # tbd
86
'Bsv': 'train', # tbd
87
'KAT': 'train', # tbd
88
'AIR': 'train', # tbd
89
'DPS': 'train', # tbd
90
'lt': 'train', # tbd
91
'BUS': 'bus', # tbd
92
'Str': 'tram', # tbd
93
'DPF': 'train', # tbd
94
}
95
# https://developers.google.com/transit/gtfs/reference/extended-route-types
96
for i in range(700, 717):
97
GTFS2OSM_MODES[str(i)] = 'bus'
98
for i in range(900, 907):
99
GTFS2OSM_MODES[str(i)] = 'tram'
100
101
# OSM2OSM_MODES = {
102
# 'bus': ['bus','trolleybus'], # to enable matching of buses categorised as trolleybus in osm
103
# 'train': 'train',
104
# 'tram': 'tram',
105
# 'light_rail':'light_rail',
106
# 'subway': 'subway',
107
# 'ferry': 'ferry'
108
# }
109
110
111
def md5hash(s):
112
return hashlib.md5(s.encode('utf-8')).hexdigest()
113
114
115
@benchmark
116
def import_gtfs(options, gtfsZip):
117
"""
118
Imports the gtfs-data and filters it by the specified date and modes.
119
"""
120
if options.verbose:
121
print('Loading GTFS data "%s"' % options.gtfs)
122
123
routes = pd.read_csv(gtfsZip.open('routes.txt'), dtype=str)
124
stops = pd.read_csv(gtfsZip.open('stops.txt'), dtype=str)
125
stop_times = pd.read_csv(gtfsZip.open('stop_times.txt'), dtype=str)
126
trips = pd.read_csv(gtfsZip.open('trips.txt'), dtype=str)
127
shapes = pd.read_csv(gtfsZip.open('shapes.txt'), dtype=str) if 'shapes.txt' in gtfsZip.namelist() else None
128
calendar_dates = pd.read_csv(gtfsZip.open('calendar_dates.txt'), dtype=str)
129
calendar = pd.read_csv(gtfsZip.open('calendar.txt'), dtype=str)
130
131
if 'trip_headsign' not in trips:
132
trips['trip_headsign'] = ''
133
if 'direction_id' not in trips:
134
trips = discover_direction(routes, trips, stop_times)
135
if 'route_short_name' not in routes:
136
routes['route_short_name'] = routes['route_long_name']
137
138
# for some obscure reason there are GTFS files which have the sequence index as a float
139
stop_times['stop_sequence'] = stop_times['stop_sequence'].astype(float)
140
141
# filter trips within given begin and end time
142
# first adapt stop times to a single day (from 00:00:00 to 23:59:59)
143
full_day = pd.to_timedelta("24:00:00")
144
145
stop_times['arrival_fixed'] = pd.to_timedelta(stop_times.arrival_time)
146
stop_times['departure_fixed'] = pd.to_timedelta(stop_times.departure_time)
147
148
# avoid trimming trips starting before midnight but ending after
149
fix_trips = stop_times[(stop_times['arrival_fixed'] >= full_day) & # gg/ here i arrive at or after midnight
150
(stop_times['stop_sequence'] == stop_times['stop_sequence'].min())].trip_id.values.tolist()
151
152
stop_times.loc[stop_times.trip_id.isin(fix_trips), 'arrival_fixed'] = stop_times.loc[stop_times.trip_id.isin(
153
fix_trips), 'arrival_fixed'] % full_day
154
stop_times.loc[stop_times.trip_id.isin(fix_trips), 'departure_fixed'] = stop_times.loc[stop_times.trip_id.isin(
155
fix_trips), 'departure_fixed'] % full_day
156
157
extra_stop_times = stop_times.loc[stop_times.arrival_fixed > full_day, ]
158
extra_stop_times.loc[:, 'arrival_fixed'] = extra_stop_times.loc[:, 'arrival_fixed'] % full_day
159
extra_stop_times.loc[:, 'departure_fixed'] = extra_stop_times.loc[:, 'departure_fixed'] % full_day
160
extra_trips_id = extra_stop_times.trip_id.values.tolist()
161
extra_stop_times.loc[:, 'trip_id'] = extra_stop_times.loc[:, 'trip_id'] + ".trimmed"
162
stop_times = pd.concat((stop_times, extra_stop_times))
163
164
extra_trips = trips.loc[trips.trip_id.isin(extra_trips_id), :]
165
extra_trips.loc[:, 'trip_id'] = extra_trips.loc[:, 'trip_id'] + ".trimmed"
166
trips = pd.concat((trips, extra_trips))
167
168
time_interval = options.end - options.begin
169
start_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.begin)))
170
171
# if time_interval >= 86400 (24 hs), no filter needed
172
if time_interval < 86400 and options.end <= 86400:
173
# if simulation time end on the same day
174
end_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.end)))
175
stop_times = stop_times[(start_time <= stop_times['departure_fixed']) &
176
(stop_times['departure_fixed'] <= end_time)]
177
elif time_interval < 86400 and options.end > 86400:
178
# if simulation time includes next day trips
179
end_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.end - 86400)))
180
stop_times = stop_times[~((stop_times['departure_fixed'] > end_time) &
181
(stop_times['departure_fixed'] < start_time))]
182
183
# filter trips for a representative date
184
weekday = 'monday tuesday wednesday thursday friday saturday sunday'.split(
185
)[datetime.datetime.strptime(options.date, "%Y%m%d").weekday()]
186
removed = calendar_dates[(calendar_dates.date == options.date) &
187
(calendar_dates.exception_type == '2')]
188
services = calendar[(calendar.start_date <= options.date) &
189
(calendar.end_date >= options.date) &
190
(calendar[weekday] == '1') &
191
(~calendar.service_id.isin(removed.service_id))]
192
added = calendar_dates[(calendar_dates.date == options.date) &
193
(calendar_dates.exception_type == '1')]
194
trips_on_day = trips[trips.service_id.isin(services.service_id) |
195
trips.service_id.isin(added.service_id)]
196
197
# filter routes by modes
198
filter_gtfs_modes = [key for key, value in GTFS2OSM_MODES.items()
199
if value in options.modes]
200
routes = routes[routes['route_type'].isin(filter_gtfs_modes)]
201
if routes.empty:
202
print("Warning! No GTFS data found for the given modes %s." % options.modes)
203
if trips_on_day.empty:
204
print("Warning! No GTFS data found for the given date %s." % options.date)
205
206
return routes, trips_on_day, shapes, stops, stop_times
207
208
209
@benchmark
210
def discover_direction(routes, trips, stop_times):
211
"""
212
Sets the direction value if it is not present in the GTFS data to identify separate
213
directions of the same PT line.
214
"""
215
# create a direction_id identifier from the stop sequence
216
enhancedStopTimes = pd.merge(stop_times, pd.merge(trips, routes, on='route_id', how='left'), on='trip_id')
217
groupedStopTimes = enhancedStopTimes.groupby(["trip_id"], as_index=False).agg({'stop_id': ' '.join})
218
groupedStopTimes['direction_id'] = groupedStopTimes['stop_id'].apply(md5hash)
219
# copy the direction_id back to the trips file / join the DataFrame
220
return pd.merge(trips, groupedStopTimes[['trip_id', 'direction_id']], on='trip_id', how='left')
221
222
223
@benchmark
224
def filter_gtfs(options, routes, trips_on_day, shapes, stops, stop_times):
225
"""
226
Filters the gtfs-data by the given bounding box.
227
228
If using shapes, searches the main shapes of route. A main shape represents the
229
trip that is most often taken in a given public transport route. Only the paths
230
(also referred to as routes) and stops of trips with main shapes will be mapped.
231
Trips with secondary shapes will be defined by the start and end edge belonging
232
to the main shape (if they a part of the main shape).
233
"""
234
stops['stop_lat'] = stops['stop_lat'].astype(float)
235
stops['stop_lon'] = stops['stop_lon'].astype(float)
236
237
if shapes is not None:
238
shapes['shape_pt_lat'] = shapes['shape_pt_lat'].astype(float)
239
shapes['shape_pt_lon'] = shapes['shape_pt_lon'].astype(float)
240
shapes['shape_pt_sequence'] = shapes['shape_pt_sequence'].astype(float)
241
242
shapes = shapes[(options.bbox[1] <= shapes['shape_pt_lat']) &
243
(shapes['shape_pt_lat'] <= options.bbox[3]) &
244
(options.bbox[0] <= shapes['shape_pt_lon']) &
245
(shapes['shape_pt_lon'] <= options.bbox[2])]
246
247
# merge gtfs data from stop_times / trips / routes / stops
248
gtfs_data = pd.merge(pd.merge(pd.merge(trips_on_day, stop_times, on='trip_id'),
249
stops, on='stop_id'), routes, on='route_id')
250
if shapes is None:
251
gtfs_data['shape_id'] = gtfs_data['route_id'] + "_" + gtfs_data['direction_id']
252
253
# filter relevant information
254
gtfs_data = gtfs_data[['route_id', 'shape_id', 'trip_id', 'stop_id',
255
'route_short_name', 'route_type', 'trip_headsign',
256
'direction_id', 'stop_name', 'stop_lat', 'stop_lon',
257
'stop_sequence', 'arrival_fixed', 'departure_fixed']]
258
259
# filter data inside SUMO net by stop location and shape
260
gtfs_data = gtfs_data[(options.bbox[1] <= gtfs_data['stop_lat']) &
261
(gtfs_data['stop_lat'] <= options.bbox[3]) &
262
(options.bbox[0] <= gtfs_data['stop_lon']) &
263
(gtfs_data['stop_lon'] <= options.bbox[2])]
264
265
# get list of trips with departure time to allow a sorted output
266
trip_list = gtfs_data.loc[gtfs_data.groupby('trip_id').stop_sequence.idxmin()]
267
268
# add new column for unambiguous stop_id and edge in sumo
269
gtfs_data["stop_item_id"] = None
270
gtfs_data["edge_id"] = None
271
# create dict with shapes and their main shape
272
shapes_dict = {}
273
274
if shapes is not None:
275
# search main and secondary shapes for each pt line (route and direction)
276
filtered_stops = gtfs_data.groupby(['route_id', 'direction_id', 'shape_id'])[
277
"shape_id"].size().reset_index(name='counts')
278
group_shapes = filtered_stops.groupby(['route_id', 'direction_id']).shape_id.aggregate(set).reset_index()
279
280
filtered_stops = filtered_stops.loc[filtered_stops.groupby(['route_id', 'direction_id'])['counts'].idxmax()][[ # noqa
281
'route_id', 'shape_id', 'direction_id']]
282
filtered_stops = pd.merge(filtered_stops, group_shapes, on=['route_id', 'direction_id'])
283
284
for row in filtered_stops.itertuples():
285
for sec_shape in row.shape_id_y:
286
shapes_dict[sec_shape] = row.shape_id_x
287
288
# create data frame with main shape for stop location
289
filtered_stops = gtfs_data[gtfs_data['shape_id'].isin(filtered_stops.shape_id_x)]
290
filtered_stops = filtered_stops[['route_id', 'shape_id', 'stop_id',
291
'route_short_name', 'route_type',
292
'trip_headsign', 'direction_id',
293
'stop_name', 'stop_lat', 'stop_lon']].drop_duplicates()
294
else:
295
# If not using shapes, searches for the most common sequence of stops in a route.
296
# Only the paths and stops of these main sequences are mapped. Creates 'shapes' with
297
# the coordinates of first and last stop in main route sequences, used for mapping later.
298
299
# create a new stop id with their trip sequence
300
gtfs_data['new_stop_id'] = gtfs_data['stop_sequence'].astype(str) + '_' + gtfs_data['stop_id']
301
302
# for a given trip, put the stops into a list and then into a string
303
group_stops = gtfs_data.groupby(['trip_id', 'shape_id']).new_stop_id.aggregate(list).reset_index()
304
group_stops['new_stop_id'] = group_stops['new_stop_id'].str.join(' ')
305
306
# for a given shape (route and direction),
307
# count the number of times the particular stop sequence (sequence and stop_id) is used
308
group_size = group_stops.groupby(['shape_id', 'new_stop_id']).new_stop_id.size().reset_index(name='counts')
309
310
# get one main route (most common sequence of stops) for each shape
311
group_routes = group_size.loc[group_size.groupby(['shape_id']).counts.idxmax()]
312
313
# split string of stops into list again
314
group_routes['new_stop_id'] = group_routes['new_stop_id'].str.split(' ')
315
316
# get all stops in all the main routes
317
routes_stops = group_routes.explode('new_stop_id', ignore_index=True)
318
routes_stops[['stop_sequence', 'stop_id']] = routes_stops.new_stop_id.str.split('_', expand=True)
319
routes_stops['stop_sequence'] = routes_stops['stop_sequence'].astype(float)
320
321
stop_indexes = []
322
# loop through all unique shapes and collect the first and last stop in sequence
323
for shape in routes_stops['shape_id'].unique():
324
first_stop_index = routes_stops.loc[routes_stops['shape_id'] == shape, 'stop_sequence'].idxmin()
325
last_stop_index = routes_stops.loc[routes_stops['shape_id'] == shape, 'stop_sequence'].idxmax()
326
stop_indexes.append(first_stop_index)
327
stop_indexes.append(last_stop_index)
328
329
# drop indexes that have duplicates (i.e. first and last stop are the same)
330
end_stops_index = [x for x in stop_indexes if stop_indexes.count(x) == 1]
331
332
# create new 'shapes' file with the coordinates of first and last stop
333
stop_info = gtfs_data[['shape_id', 'stop_id', 'stop_lat', 'stop_lon']].drop_duplicates()
334
stop_shape = routes_stops.loc[end_stops_index, ['shape_id', 'stop_id', 'stop_sequence']]
335
shapes = pd.merge(stop_shape, stop_info, on=['shape_id', 'stop_id'])
336
shapes = shapes.rename(columns={"stop_sequence": "shape_pt_sequence",
337
"stop_lon": "shape_pt_lon", "stop_lat": "shape_pt_lat"})
338
339
# all stops of main routes, dropped routes with only 1 stop
340
routes_stops = routes_stops.loc[routes_stops['shape_id'].isin(shapes['shape_id'])]
341
342
# shapes dictionary is just the shape id in both columns
343
for shape in shapes['shape_id'].unique():
344
shapes_dict[shape] = shape
345
346
# all stops of main routes, with other infos.
347
# stop_sequence is used in merge because some stops are repeated twice in a route
348
filtered_stops = pd.merge(routes_stops, gtfs_data,
349
on=['shape_id', 'stop_id', 'stop_sequence'],
350
how='left')[['route_id', 'stop_id', 'shape_id',
351
'route_short_name', 'route_type', 'trip_headsign', 'direction_id',
352
'stop_name', 'stop_lat', 'stop_lon', 'stop_sequence']
353
].drop_duplicates(['shape_id', 'stop_id', 'stop_sequence'])
354
355
return gtfs_data, trip_list, filtered_stops, shapes, shapes_dict
356
357
358
def get_line_dir(line_orig, line_dest):
359
"""
360
Calculates the direction of the public transport line based on the start
361
and end nodes of the osm route.
362
"""
363
lat_dif = float(line_dest[1]) - float(line_orig[1])
364
lon_dif = float(line_dest[0]) - float(line_orig[0])
365
366
if lon_dif == 0: # avoid dividing by 0
367
line_dir = 90
368
else:
369
line_dir = math.degrees(math.atan(abs(lat_dif/lon_dif)))
370
371
if lat_dif >= 0 and lon_dif >= 0: # 1 quadrant
372
line_dir = 90 - line_dir
373
elif lat_dif < 0 and lon_dif > 0: # 2 quadrant
374
line_dir = 90 + line_dir
375
elif lat_dif <= 0 and lon_dif <= 0: # 3 quadrant
376
line_dir = 90 - line_dir + 180
377
else: # 4 quadrant
378
line_dir = 270 + line_dir
379
380
return line_dir
381
382
383
def repair_routes(options, net):
384
"""
385
Runs duarouter to repair the given osm routes.
386
"""
387
osm_routes = {}
388
# write dua input file
389
with io.open("dua_input.xml", 'w+', encoding="utf8") as dua_file:
390
dua_file.write(u"<routes>\n")
391
for key, value in OSM2SUMO_MODES.items():
392
dua_file.write(u' <vType id="%s" vClass="%s"/>\n' % (key, value))
393
num_read = discard_type = discard_net = 0
394
sumo_edges = set([sumo_edge.getID() for sumo_edge in net.getEdges()])
395
for ptLine in sumolib.xml.parse(options.osm_routes, "ptLine"):
396
num_read += 1
397
if ptLine.type not in options.modes:
398
discard_type += 1
399
continue
400
401
if not ptLine.route:
402
discard_net += 1
403
continue
404
route_edges = [edge for edge in ptLine.route[0].edges.split() if edge in sumo_edges]
405
if not route_edges:
406
discard_net += 1
407
continue
408
409
# transform ptLine origin and destination to geo coordinates
410
x, y = net.getEdge(route_edges[0]).getFromNode().getCoord()
411
line_orig = net.convertXY2LonLat(x, y)
412
x, y = net.getEdge(route_edges[-1]).getFromNode().getCoord()
413
line_dest = net.convertXY2LonLat(x, y)
414
415
# find ptLine direction
416
line_dir = get_line_dir(line_orig, line_dest)
417
418
osm_routes[ptLine.id] = [ptLine.attr_name, ptLine.line, ptLine.type, line_dir, ptLine.color,
419
None, [s.attr_name for s in (ptLine.stops or [])]]
420
dua_file.write(u' <trip id="%s" type="%s" depart="0" via="%s"/>\n' %
421
(ptLine.id, ptLine.type, (" ").join(route_edges)))
422
dua_file.write(u"</routes>\n")
423
424
if options.verbose:
425
print("%s routes read, discarded for wrong mode: %s, outside of net %s, keeping %s" %
426
(num_read, discard_type, discard_net, len(osm_routes)))
427
# run duarouter
428
subprocess.check_call([sumolib.checkBinary('duarouter'),
429
'-n', options.network,
430
'--route-files', 'dua_input.xml', '--repair',
431
'-o', 'dua_output.xml', '--ignore-errors',
432
'--error-log', options.dua_repair_output])
433
434
# parse repaired routes
435
n_routes = len(osm_routes)
436
broken = set(osm_routes.keys())
437
for ptline, ptline_route in parse_fast_nested("dua_output.xml", "vehicle", "id", "route", "edges"):
438
osm_routes[ptline.id][5] = ptline_route.edges
439
broken.remove(ptline.id)
440
441
# remove dua files
442
os.remove("dua_input.xml")
443
os.remove("dua_output.xml")
444
os.remove("dua_output.alt.xml")
445
446
# remove invalid routes from dict
447
[osm_routes.pop(line) for line in list(osm_routes) if line in broken]
448
449
if n_routes != len(osm_routes):
450
print("%s of %s routes have been imported, see '%s' for more information." %
451
(len(osm_routes), n_routes, options.dua_repair_output))
452
453
return osm_routes
454
455
456
@benchmark
457
def import_osm(options, net):
458
"""
459
Imports the routes of the public transport lines from osm.
460
"""
461
if options.repair:
462
if options.verbose:
463
print("Import and repair osm routes")
464
osm_routes = repair_routes(options, net)
465
else:
466
if options.verbose:
467
print("Import osm routes")
468
osm_routes = {}
469
for ptLine in sumolib.xml.parse(options.osm_routes, "ptLine"):
470
if ptLine.type not in options.modes or not ptLine.route:
471
continue
472
route_edges = ptLine.route[0].edges.split()
473
route_edges = [e for e in route_edges if net.hasEdge(e)]
474
if route_edges:
475
# TODO recheck what happens if it is only one edge
476
x, y = net.getEdge(route_edges[0]).getFromNode().getCoord()
477
line_orig = net.convertXY2LonLat(x, y)
478
479
x, y = net.getEdge(route_edges[-1]).getFromNode().getCoord()
480
line_dest = net.convertXY2LonLat(x, y)
481
482
line_dir = get_line_dir(line_orig, line_dest)
483
484
osm_routes[ptLine.id] = (ptLine.attr_name, ptLine.line,
485
ptLine.type, line_dir, ptLine.color,
486
ptLine.route[0].edges, [s.attr_name for s in (ptLine.stops or [])])
487
return osm_routes
488
489
490
def _addToDataFrame(gtfs_data, row, shapes_dict, stop, edge):
491
shape_list = [sec_shape for sec_shape, main_shape in shapes_dict.items() if main_shape == row.shape_id]
492
gtfs_data.loc[(gtfs_data["stop_id"] == row.stop_id) &
493
(gtfs_data["shape_id"].isin(shape_list)),
494
"stop_item_id"] = stop
495
gtfs_data.loc[(gtfs_data["stop_id"] == row.stop_id) &
496
(gtfs_data["shape_id"].isin(shape_list)),
497
"edge_id"] = edge
498
499
500
def getBestLane(net, lon, lat, radius, stop_length, center, edge_set, pt_class, last_pos=-1):
501
# get edges near stop location
502
x, y = net.convertLonLat2XY(lon, lat)
503
edges = [e for e in net.getNeighboringEdges(x, y, radius, includeJunctions=False) if e[0].getID() in edge_set]
504
# sort by distance but have edges longer than stop length first
505
for edge, _ in sorted(edges, key=lambda x: (x[0].getLength() <= stop_length, x[1])):
506
for lane in edge.getLanes():
507
if lane.allows(pt_class):
508
pos = lane.getClosestLanePosAndDist((x, y))[0]
509
if pos > last_pos or edge.getID() != edge_set[0]:
510
start = max(0, pos - (stop_length / 2. if center else stop_length))
511
end = min(start + stop_length, lane.getLength())
512
return lane.getID(), start, end
513
return None
514
515
516
def getAccess(net, lon, lat, radius, lane_id, max_access=10):
517
x, y = net.convertLonLat2XY(lon, lat)
518
lane = net.getLane(lane_id)
519
access = []
520
if not lane.getEdge().allows("pedestrian"):
521
for access_edge, _ in sorted(net.getNeighboringEdges(x, y, radius), key=lambda i: i[1]):
522
if access_edge.allows("pedestrian"):
523
access_lane_idx, access_pos, access_dist = access_edge.getClosestLanePosDist((x, y))
524
if not access_edge.getLane(access_lane_idx).allows("pedestrian"):
525
for idx, lane in enumerate(access_edge.getLanes()):
526
if lane.allows("pedestrian"):
527
access_lane_idx = idx
528
break
529
access.append((u' <access friendlyPos="true" lane="%s_%s" pos="%.2f" length="%.2f"/>\n') %
530
(access_edge.getID(), access_lane_idx, access_pos, 1.5 * access_dist))
531
if len(access) == max_access:
532
break
533
return access
534
535
536
@benchmark
537
def map_gtfs_osm(options, net, osm_routes, gtfs_data, shapes, shapes_dict, filtered_stops):
538
"""
539
Maps the routes from gtfs with the sumo routes imported from osm and maps
540
the gtfs stops with the lane and position in sumo.
541
"""
542
if options.verbose:
543
print("Map stops and routes")
544
545
map_routes = {}
546
map_stops = {}
547
# gtfs stops are grouped (not in exact geo position), so a large radius
548
# for mapping is needed
549
radius = 200
550
551
missing_stops = []
552
missing_lines = []
553
stop_items = defaultdict(list)
554
555
# get different permutations of stop names, and assign the collection of all stop names in route to the stop
556
filtered_stops['stop_name'] = [[x] + re.split(r', | ,|,', x) + [x.replace(',', '')]
557
for x in filtered_stops['stop_name']]
558
filtered_shapes = filtered_stops.groupby(['shape_id', 'route_short_name',
559
'route_type', 'direction_id']).stop_name.aggregate("sum").reset_index(
560
name='stop_name_all')
561
filtered_stops = pd.merge(filtered_stops, filtered_shapes)
562
563
for row in filtered_stops.itertuples():
564
# check if gtfs route already mapped to osm route
565
if row.shape_id not in map_routes:
566
# if route not mapped, find the osm route for shape id
567
pt_line_name = row.route_short_name
568
pt_type = GTFS2OSM_MODES[row.route_type]
569
570
# get shape definition and define pt direction
571
aux_shapes = shapes[shapes['shape_id'] == row.shape_id]
572
if len(aux_shapes) == 0:
573
print("Warning! Missing shape data for shape_id '%s'" % row.shape_id, file=sys.stderr)
574
line_dir = 90
575
else:
576
pt_orig = aux_shapes[aux_shapes.shape_pt_sequence == aux_shapes.shape_pt_sequence.min()]
577
pt_dest = aux_shapes[aux_shapes.shape_pt_sequence == aux_shapes.shape_pt_sequence.max()]
578
line_dir = get_line_dir((pt_orig.shape_pt_lon.iloc[0], pt_orig.shape_pt_lat.iloc[0]),
579
(pt_dest.shape_pt_lon.iloc[0], pt_dest.shape_pt_lat.iloc[0]))
580
581
# get osm lines with same route name and pt type,
582
# and if they have at least one matching stop name in osm and gtfs routes
583
osm_lines = [(abs(line_dir - value[3]), ptline_id, value[4], value[5])
584
for ptline_id, value in osm_routes.items()
585
if value[1] == pt_line_name and value[2] == pt_type]
586
# if value[1] == pt_line_name and value[2] in OSM2OSM_MODES[pt_type] and set(value[6]) & set(row.stop_name_all)]
587
if osm_lines:
588
# get the direction for the found routes and take the route
589
# with lower difference
590
diff, osm_id, color, edges = min(osm_lines, key=lambda x: x[0] if x[0] < 180 else 360 - x[0])
591
d = diff if diff < 180 else 360 - diff
592
if d < 160: # to prevent mapping to route going the opposite direction
593
# add mapped osm route to dict
594
map_routes[row.shape_id] = (osm_id, edges.split(), color)
595
else:
596
missing_lines.append((row.route_id, pt_line_name, sumolib.xml.quoteattr(
597
str(row.trip_headsign), True), row.direction_id))
598
continue
599
else:
600
# no osm route found, do not map stops of route
601
missing_lines.append((row.route_id, pt_line_name, sumolib.xml.quoteattr(
602
str(row.trip_headsign), True), row.direction_id))
603
continue
604
605
# set stop's type, class and length
606
pt_type = GTFS2OSM_MODES[row.route_type]
607
pt_class = OSM2SUMO_MODES[pt_type]
608
if pt_class == "bus":
609
stop_length = options.bus_stop_length
610
elif pt_class == "tram":
611
stop_length = options.tram_stop_length
612
else:
613
stop_length = options.train_stop_length
614
615
stop_mapped = False
616
for stop in stop_items[row.stop_id]:
617
# for item of mapped stop
618
stop_edge = map_stops[stop][1].rsplit("_", 1)[0]
619
if stop_edge in map_routes[row.shape_id][1]:
620
# if edge in route, the stops are the same
621
# intersect the edge set
622
map_stops[stop][6] = map_stops[stop][6] & set(map_routes[row.shape_id][1])
623
else:
624
# check if the wrong edge was adopted
625
edge_inter = set(map_routes[row.shape_id][1]) & map_stops[stop][6]
626
best = getBestLane(net, row.stop_lon, row.stop_lat, radius,
627
stop_length, options.center_stops, edge_inter, pt_class)
628
if best is None:
629
continue
630
# update the lane id, start and end and add shape
631
lane_id, start, end = best
632
access = getAccess(net, row.stop_lon, row.stop_lat, 100, lane_id)
633
map_stops[stop][1:7] = [lane_id, start, end, access, pt_type, edge_inter]
634
# update edge in data frame
635
stop_edge = lane_id.rsplit("_", 1)[0]
636
gtfs_data.loc[gtfs_data["stop_item_id"] == stop, "edge_id"] = stop_edge
637
# add to data frame
638
_addToDataFrame(gtfs_data, row, shapes_dict, stop, stop_edge)
639
stop_mapped = True
640
break
641
642
# if stop not mapped
643
if not stop_mapped:
644
edge_inter = set(map_routes[row.shape_id][1])
645
best = getBestLane(net, row.stop_lon, row.stop_lat, radius,
646
stop_length, options.center_stops, edge_inter, pt_class)
647
if best is not None:
648
lane_id, start, end = best
649
access = getAccess(net, row.stop_lon, row.stop_lat, 100, lane_id)
650
stop_item_id = "%s_%s" % (row.stop_id, len(stop_items[row.stop_id]))
651
stop_items[row.stop_id].append(stop_item_id)
652
map_stops[stop_item_id] = [sumolib.xml.quoteattr(row.stop_name[0], True),
653
lane_id, start, end, access, pt_type, edge_inter]
654
_addToDataFrame(gtfs_data, row, shapes_dict, stop_item_id, lane_id.split("_")[0])
655
stop_mapped = True
656
657
# if stop not mapped, add to missing stops
658
if not stop_mapped:
659
missing_stops.append((row.stop_id, sumolib.xml.quoteattr(
660
row.stop_name[0], True), row.route_short_name, row.direction_id))
661
# pprint(map_routes)
662
# pprint(map_stops)
663
return map_routes, map_stops, missing_stops, missing_lines
664
665
666
def write_vtypes(options, seen=None):
667
if options.vtype_output:
668
with sumolib.openz(options.vtype_output, mode='w') as vout:
669
sumolib.xml.writeHeader(vout, root="additional", options=options)
670
for osm_type, sumo_class in sorted(OSM2SUMO_MODES.items()):
671
if osm_type in options.modes and (seen is None or osm_type in seen):
672
vout.write(u' <vType id="%s" vClass="%s"/>\n' %
673
(osm_type, sumo_class))
674
vout.write(u'</additional>\n')
675
676
677
def write_gtfs_osm_outputs(options, map_routes, map_stops, missing_stops, missing_lines,
678
gtfs_data, trip_list, shapes_dict, net):
679
"""
680
Generates stops and routes for sumo and saves the unmapped elements.
681
"""
682
if options.verbose:
683
print("Generates stops and routes output")
684
685
# determine if we need to format times (depart, duration, until) to be human readable or whole seconds
686
ft = humanReadableTime if "hrtime" in options and options.hrtime else int
687
688
with sumolib.openz(options.additional_output, mode='w') as output_file:
689
sumolib.xml.writeHeader(output_file, root="additional", options=options)
690
for stop, value in sorted(map_stops.items()):
691
name, lane, start_pos, end_pos, access, v_type = value[:6]
692
typ = "busStop" if v_type == "bus" else "trainStop"
693
output_file.write(u' <%s id="%s" lane="%s" startPos="%.2f" endPos="%.2f" name=%s friendlyPos="true"%s>\n' % # noqa
694
(typ, stop, lane, start_pos, end_pos, name, "" if access else "/"))
695
for a in access:
696
output_file.write(a)
697
if access:
698
output_file.write(u' </%s>\n' % typ)
699
output_file.write(u'</additional>\n')
700
701
sequence_errors = []
702
write_vtypes(options)
703
704
with sumolib.openz(options.route_output, mode='w') as output_file:
705
sumolib.xml.writeHeader(output_file, root="routes", options=options)
706
numDays = int(options.end) // 86401
707
start_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.begin)))
708
shapes_written = set()
709
710
for day in range(numDays+1):
711
if day == numDays and options.end % 86400 > 0:
712
# if last day, filter trips until given end time
713
end_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.end-86400*numDays)))
714
trip_list = trip_list[trip_list["arrival_fixed"] <= end_time]
715
716
seqs = {}
717
for row in trip_list.sort_values("arrival_fixed").itertuples():
718
719
if day != 0 and row.trip_id.endswith(".trimmed"):
720
# only add trimmed trips the first day
721
continue
722
723
if day == 0 and row.arrival_fixed < start_time:
724
# avoid writing first day trips that not applied
725
continue
726
727
main_shape = shapes_dict.get(row.shape_id)
728
if main_shape not in map_routes:
729
# if route not mapped
730
continue
731
pt_color = map_routes[main_shape][2]
732
if pt_color is None:
733
pt_color = ""
734
else:
735
pt_color = ' color="%s"' % pt_color
736
pt_type = GTFS2OSM_MODES[row.route_type]
737
edges_list = map_routes[main_shape][1]
738
stop_list = gtfs_data[gtfs_data["trip_id"] == row.trip_id].sort_values("stop_sequence")
739
stop_index = [edges_list.index(stop.edge_id)
740
for stop in stop_list.itertuples()
741
if stop.edge_id in edges_list]
742
743
if len(stop_index) < options.min_stops:
744
# Not enough stops mapped
745
continue
746
747
if main_shape not in shapes_written:
748
output_file.write(u' <route id="%s" edges="%s"/>\n' % (main_shape, " ".join(edges_list)))
749
shapes_written.add(main_shape)
750
751
stopSeq = tuple([stop.stop_item_id for stop in stop_list.itertuples()])
752
if stopSeq not in seqs:
753
seqs[stopSeq] = row.trip_id
754
755
# determine departure from first valid stop
756
depart = None
757
for stop in stop_list.itertuples():
758
if stop.stop_item_id:
759
depart = ft(parseTime(str(stop.arrival_fixed.days + day) +
760
":" + str(stop.arrival_fixed).split(' ')[2]))
761
break
762
763
veh_attr = (row.trip_id, day,
764
main_shape, row.route_id, seqs[stopSeq], depart,
765
min(stop_index), max(stop_index), pt_type, pt_color)
766
output_file.write(u' <vehicle id="%s.%s" route="%s" line="%s_%s" depart="%s" departEdge="%s" arrivalEdge="%s" type="%s"%s>\n' % veh_attr) # noqa
767
params = [("gtfs.route_name", row.route_short_name)]
768
if row.trip_headsign:
769
params.append(("gtfs.trip_headsign", row.trip_headsign))
770
if options.writeTerminals:
771
firstStop = stop_list.iloc[0]
772
lastStop = stop_list.iloc[-1]
773
firstDepart = parseTime(str(firstStop.departure_fixed.days + day) +
774
":" + str(firstStop.departure_fixed).split(' ')[2])
775
lastArrival = parseTime(str(lastStop.arrival_fixed.days + day) +
776
":" + str(lastStop.arrival_fixed).split(' ')[2])
777
params += [("gtfs.origin_stop", firstStop.stop_name),
778
("gtfs.origin_depart", ft(firstDepart)),
779
("gtfs.destination_stop", lastStop.stop_name),
780
("gtfs.destination_arrrival", ft(lastArrival))]
781
for k, v in params:
782
output_file.write(u' <param key="%s" value=%s/>\n' % (
783
k, sumolib.xml.quoteattr(str(v), True)))
784
785
check_seq = -1
786
for stop in stop_list.itertuples():
787
if not stop.stop_item_id:
788
# if stop not mapped
789
continue
790
stop_index = edges_list.index(stop.edge_id)
791
if stop_index >= check_seq:
792
check_seq = stop_index
793
# TODO check stop position if we are on the same edge as before
794
stop_attr = (stop.stop_item_id,
795
ft(parseTime(str(stop.arrival_fixed.days + day) +
796
":" + str(stop.arrival_fixed).split(' ')[2])),
797
ft(options.duration) if options.duration > 60 else options.duration,
798
ft(parseTime(str(stop.departure_fixed.days + day) +
799
":" + str(stop.departure_fixed).split(' ')[2])),
800
stop.stop_sequence, stop_list.stop_sequence.max(),
801
sumolib.xml.quoteattr(stop.stop_name, True))
802
output_file.write(u' <stop busStop="%s" arrival="%s" duration="%s" until="%s"/><!--stopSequence="%s/%s" %s-->\n' % stop_attr) # noqa
803
elif stop_index < check_seq:
804
# stop not downstream
805
sequence_errors.append((stop.stop_item_id, sumolib.xml.quoteattr(stop.stop_name, True),
806
row.route_short_name,
807
sumolib.xml.quoteattr(str(row.trip_headsign), True), row.direction_id,
808
stop.trip_id))
809
810
output_file.write(u' </vehicle>\n')
811
output_file.write(u'</routes>\n')
812
813
# ----------------------- Save missing data ------------------
814
if any([missing_stops, missing_lines, sequence_errors]):
815
print("Not all given gtfs elements have been mapped, see %s for more information" % options.warning_output)
816
with io.open(options.warning_output, 'w', encoding="utf8") as output_file:
817
sumolib.xml.writeHeader(output_file, root="missingElements", rootAttrs=None, options=options)
818
for stop in sorted(set(missing_stops)):
819
output_file.write(u' <stop id="%s" name=%s ptLine="%s" direction_id="%s"/>\n' % stop)
820
for line in sorted(set(missing_lines)):
821
output_file.write(u' <ptLine id="%s" name="%s" trip_headsign=%s direction_id="%s"/>\n' % line)
822
for stop in sorted(set(sequence_errors)):
823
output_file.write(u' <stopSequence stop_id="%s" stop_name=%s ptLine="%s" trip_headsign=%s direction_id="%s" trip_id="%s"/>\n' % stop) # noqa
824
output_file.write(u'</missingElements>\n')
825
826