Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
eclipse
GitHub Repository: eclipse/sumo
Path: blob/main/tools/import/gtfs/gtfs2osm.py
169679 views
1
# Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
2
# Copyright (C) 2010-2025 German Aerospace Center (DLR) and others.
3
# This program and the accompanying materials are made available under the
4
# terms of the Eclipse Public License 2.0 which is available at
5
# https://www.eclipse.org/legal/epl-2.0/
6
# This Source Code may also be made available under the following Secondary
7
# Licenses when the conditions for such availability set forth in the Eclipse
8
# Public License 2.0 are satisfied: GNU General Public License, version 2
9
# or later which is available at
10
# https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
11
# SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
12
13
# @file gtfs2osm.py
14
# @author Giuliana Armellini
15
# @author Mirko Barthauer
16
# @date 2021-02-18
17
18
"""
19
Import public transport from GTFS (schedules) and OSM (routes) data
20
"""
21
22
import os
23
import sys
24
import subprocess
25
import datetime
26
import time
27
import math
28
import io
29
import re
30
from collections import defaultdict
31
import hashlib
32
33
# from pprint import pprint
34
35
import pandas as pd
36
pd.options.mode.chained_assignment = None # default='warn'
37
38
sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools'))
39
import sumolib # noqa
40
from sumolib.xml import parse_fast_nested # noqa
41
from sumolib.miscutils import benchmark, parseTime, humanReadableTime # noqa
42
43
# ----------------------- gtfs, osm and sumo modes ----------------------------
44
OSM2SUMO_MODES = {
45
'bus': 'bus',
46
'train': 'rail',
47
'tram': 'tram',
48
'light_rail': 'rail_urban',
49
'monorail': 'rail_urban',
50
'subway': 'subway',
51
'aerialway': 'cable_car',
52
'ferry': 'ship'
53
}
54
55
GTFS2OSM_MODES = {
56
# https://developers.google.com/transit/gtfs/reference/#routestxt
57
'0': 'tram',
58
'1': 'subway',
59
'2': 'train',
60
'3': 'bus',
61
'4': 'ferry',
62
# '5': 'cableTram',
63
# '6': 'aerialLift',
64
# '7': 'funicular',
65
# https://developers.google.com/transit/gtfs/reference/extended-route-types
66
'100': 'train', # DB
67
'109': 'light_rail', # S-Bahn
68
'400': 'subway', # U-Bahn
69
'1000': 'ferry', # Faehre
70
# additional modes used in Hamburg
71
'402': 'subway', # U-Bahn
72
'1200': 'ferry', # Faehre
73
# modes used by hafas
74
's': 'train',
75
'RE': 'train',
76
'RB': 'train',
77
'IXB': 'train', # tbd
78
'ICE': 'train',
79
'IC': 'train',
80
'IRX': 'train', # tbd
81
'EC': 'train',
82
'NJ': 'train', # tbd
83
'RHI': 'train', # tbd
84
'DPN': 'train', # tbd
85
'SCH': 'train', # tbd
86
'Bsv': 'train', # tbd
87
'KAT': 'train', # tbd
88
'AIR': 'train', # tbd
89
'DPS': 'train', # tbd
90
'lt': 'train', # tbd
91
'BUS': 'bus', # tbd
92
'Str': 'tram', # tbd
93
'DPF': 'train', # tbd
94
}
95
# https://developers.google.com/transit/gtfs/reference/extended-route-types
96
for i in range(700, 717):
97
GTFS2OSM_MODES[str(i)] = 'bus'
98
for i in range(900, 907):
99
GTFS2OSM_MODES[str(i)] = 'tram'
100
101
# OSM2OSM_MODES = {
102
# 'bus': ['bus','trolleybus'], # to enable matching of buses categorised as trolleybus in osm
103
# 'train': 'train',
104
# 'tram': 'tram',
105
# 'light_rail':'light_rail',
106
# 'subway': 'subway',
107
# 'ferry': 'ferry'
108
# }
109
110
111
def md5hash(s):
112
return hashlib.md5(s.encode('utf-8')).hexdigest()
113
114
115
@benchmark
116
def import_gtfs(options, gtfsZip):
117
"""
118
Imports the gtfs-data and filters it by the specified date and modes.
119
"""
120
if options.verbose:
121
print('Loading GTFS data "%s"' % options.gtfs)
122
123
routes = pd.read_csv(gtfsZip.open('routes.txt'), dtype=str)
124
stops = pd.read_csv(gtfsZip.open('stops.txt'), dtype=str)
125
stop_times = pd.read_csv(gtfsZip.open('stop_times.txt'), dtype=str)
126
trips = pd.read_csv(gtfsZip.open('trips.txt'), dtype=str)
127
shapes = pd.read_csv(gtfsZip.open('shapes.txt'), dtype=str) if 'shapes.txt' in gtfsZip.namelist() else None
128
calendar_dates = pd.read_csv(gtfsZip.open('calendar_dates.txt'), dtype=str)
129
calendar = pd.read_csv(gtfsZip.open('calendar.txt'), dtype=str)
130
131
if 'trip_headsign' not in trips:
132
trips['trip_headsign'] = ''
133
if 'direction_id' not in trips:
134
trips = discover_direction(routes, trips, stop_times)
135
if 'route_short_name' not in routes:
136
routes['route_short_name'] = routes['route_long_name']
137
138
# for some obscure reason there are GTFS files which have the sequence index as a float
139
stop_times['stop_sequence'] = stop_times['stop_sequence'].astype(float, copy=False)
140
141
# filter trips within given begin and end time
142
# first adapt stop times to a single day (from 00:00:00 to 23:59:59)
143
full_day = pd.to_timedelta("24:00:00")
144
145
stop_times['arrival_fixed'] = pd.to_timedelta(stop_times.arrival_time)
146
stop_times['departure_fixed'] = pd.to_timedelta(stop_times.departure_time)
147
148
# avoid trimming trips starting before midnight but ending after
149
fix_trips = stop_times[(stop_times['arrival_fixed'] >= full_day) & # gg/ here i arrive at or after midnight
150
(stop_times['stop_sequence'] == stop_times['stop_sequence'].min())].trip_id.values.tolist()
151
152
stop_times.loc[stop_times.trip_id.isin(fix_trips), 'arrival_fixed'] = stop_times.loc[stop_times.trip_id.isin(
153
fix_trips), 'arrival_fixed'] % full_day
154
stop_times.loc[stop_times.trip_id.isin(fix_trips), 'departure_fixed'] = stop_times.loc[stop_times.trip_id.isin(
155
fix_trips), 'departure_fixed'] % full_day
156
157
extra_stop_times = stop_times.loc[stop_times.arrival_fixed > full_day, ]
158
extra_stop_times.loc[:, 'arrival_fixed'] = extra_stop_times.loc[:, 'arrival_fixed'] % full_day
159
extra_stop_times.loc[:, 'departure_fixed'] = extra_stop_times.loc[:, 'departure_fixed'] % full_day
160
extra_trips_id = extra_stop_times.trip_id.values.tolist()
161
extra_stop_times.loc[:, 'trip_id'] = extra_stop_times.loc[:, 'trip_id'] + ".trimmed"
162
stop_times = pd.concat((stop_times, extra_stop_times))
163
164
extra_trips = trips.loc[trips.trip_id.isin(extra_trips_id), :]
165
extra_trips.loc[:, 'trip_id'] = extra_trips.loc[:, 'trip_id'] + ".trimmed"
166
trips = pd.concat((trips, extra_trips))
167
168
time_interval = options.end - options.begin
169
start_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.begin)))
170
171
# if time_interval >= 86400 (24 hs), no filter needed
172
if time_interval < 86400 and options.end <= 86400:
173
# if simulation time end on the same day
174
end_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.end)))
175
stop_times = stop_times[(start_time <= stop_times['departure_fixed']) &
176
(stop_times['departure_fixed'] <= end_time)]
177
elif time_interval < 86400 and options.end > 86400:
178
# if simulation time includes next day trips
179
end_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.end - 86400)))
180
stop_times = stop_times[~((stop_times['departure_fixed'] > end_time) &
181
(stop_times['departure_fixed'] < start_time))]
182
183
# filter trips for a representative date
184
weekday = 'monday tuesday wednesday thursday friday saturday sunday'.split(
185
)[datetime.datetime.strptime(options.date, "%Y%m%d").weekday()]
186
removed = calendar_dates[(calendar_dates.date == options.date) &
187
(calendar_dates.exception_type == '2')]
188
services = calendar[(calendar.start_date <= options.date) &
189
(calendar.end_date >= options.date) &
190
(calendar[weekday] == '1') &
191
(~calendar.service_id.isin(removed.service_id))]
192
added = calendar_dates[(calendar_dates.date == options.date) &
193
(calendar_dates.exception_type == '1')]
194
trips_on_day = trips[trips.service_id.isin(services.service_id) |
195
trips.service_id.isin(added.service_id)]
196
197
# filter routes by modes
198
filter_gtfs_modes = [key for key, value in GTFS2OSM_MODES.items()
199
if value in options.modes]
200
routes = routes[routes['route_type'].isin(filter_gtfs_modes)]
201
if routes.empty:
202
print("Warning! No GTFS data found for the given modes %s." % options.modes)
203
if trips_on_day.empty:
204
print("Warning! No GTFS data found for the given date %s." % options.date)
205
206
return routes, trips_on_day, shapes, stops, stop_times
207
208
209
@benchmark
210
def discover_direction(routes, trips, stop_times):
211
"""
212
Sets the direction value if it is not present in the GTFS data to identify separate
213
directions of the same PT line.
214
"""
215
# create a direction_id identifier from the stop sequence
216
enhancedStopTimes = pd.merge(stop_times, pd.merge(trips, routes, on='route_id', how='left'), on='trip_id')
217
groupedStopTimes = enhancedStopTimes.groupby(["trip_id"], as_index=False).agg({'stop_id': ' '.join})
218
groupedStopTimes['direction_id'] = groupedStopTimes['stop_id'].apply(md5hash)
219
# copy the direction_id back to the trips file / join the DataFrame
220
return pd.merge(trips, groupedStopTimes[['trip_id', 'direction_id']], on='trip_id', how='left')
221
222
223
@benchmark
224
def filter_gtfs(options, routes, trips_on_day, shapes, stops, stop_times):
225
"""
226
Filters the gtfs-data by the given bounding box.
227
228
If using shapes, searches the main shapes of route. A main shape represents the
229
trip that is most often taken in a given public transport route. Only the paths
230
(also referred to as routes) and stops of trips with main shapes will be mapped.
231
Trips with secondary shapes will be defined by the start and end edge belonging
232
to the main shape (if they a part of the main shape).
233
"""
234
stops['stop_lat'] = stops['stop_lat'].astype(float)
235
stops['stop_lon'] = stops['stop_lon'].astype(float)
236
237
if shapes is not None:
238
shapes['shape_pt_lat'] = shapes['shape_pt_lat'].astype(float)
239
shapes['shape_pt_lon'] = shapes['shape_pt_lon'].astype(float)
240
shapes['shape_pt_sequence'] = shapes['shape_pt_sequence'].astype(float)
241
242
shapes = shapes[(options.bbox[1] <= shapes['shape_pt_lat']) &
243
(shapes['shape_pt_lat'] <= options.bbox[3]) &
244
(options.bbox[0] <= shapes['shape_pt_lon']) &
245
(shapes['shape_pt_lon'] <= options.bbox[2])]
246
247
# merge gtfs data from stop_times / trips / routes / stops
248
gtfs_data = pd.merge(pd.merge(pd.merge(trips_on_day, stop_times, on='trip_id'),
249
stops, on='stop_id'), routes, on='route_id')
250
if shapes is None:
251
gtfs_data['shape_id'] = gtfs_data['route_id'] + "_" + gtfs_data['direction_id']
252
253
# filter relevant information
254
gtfs_data = gtfs_data[['route_id', 'shape_id', 'trip_id', 'stop_id',
255
'route_short_name', 'route_type', 'trip_headsign',
256
'direction_id', 'stop_name', 'stop_lat', 'stop_lon',
257
'stop_sequence', 'arrival_fixed', 'departure_fixed']]
258
259
# filter data inside SUMO net by stop location and shape
260
gtfs_data = gtfs_data[(options.bbox[1] <= gtfs_data['stop_lat']) &
261
(gtfs_data['stop_lat'] <= options.bbox[3]) &
262
(options.bbox[0] <= gtfs_data['stop_lon']) &
263
(gtfs_data['stop_lon'] <= options.bbox[2])]
264
265
# get list of trips with departure time to allow a sorted output
266
trip_list = gtfs_data.loc[gtfs_data.groupby('trip_id').stop_sequence.idxmin()]
267
268
# add new column for unambiguous stop_id and edge in sumo
269
gtfs_data["stop_item_id"] = None
270
gtfs_data["edge_id"] = None
271
# create dict with shapes and their main shape
272
shapes_dict = {}
273
274
if shapes is not None:
275
# search main and secondary shapes for each pt line (route and direction)
276
filtered_stops = gtfs_data.groupby(['route_id', 'direction_id', 'shape_id'])[
277
"shape_id"].size().reset_index(name='counts')
278
group_shapes = filtered_stops.groupby(['route_id', 'direction_id']).shape_id.aggregate(set).reset_index()
279
280
filtered_stops = filtered_stops.loc[filtered_stops.groupby(['route_id', 'direction_id'])['counts'].idxmax()][[ # noqa
281
'route_id', 'shape_id', 'direction_id']]
282
filtered_stops = pd.merge(filtered_stops, group_shapes, on=['route_id', 'direction_id'])
283
284
for row in filtered_stops.itertuples():
285
for sec_shape in row.shape_id_y:
286
shapes_dict[sec_shape] = row.shape_id_x
287
288
# create data frame with main shape for stop location
289
filtered_stops = gtfs_data[gtfs_data['shape_id'].isin(filtered_stops.shape_id_x)]
290
filtered_stops = filtered_stops[['route_id', 'shape_id', 'stop_id',
291
'route_short_name', 'route_type',
292
'trip_headsign', 'direction_id',
293
'stop_name', 'stop_lat', 'stop_lon']].drop_duplicates()
294
else:
295
# If not using shapes, searches for the most common sequence of stops in a route.
296
# Only the paths and stops of these main sequences are mapped. Creates 'shapes' with
297
# the coordinates of first and last stop in main route sequences, used for mapping later.
298
299
# create a new stop id with their trip sequence
300
gtfs_data['new_stop_id'] = gtfs_data['stop_sequence'].astype(str) + '_' + gtfs_data['stop_id']
301
302
# for a given trip, put the stops into a list and then into a string
303
group_stops = gtfs_data.groupby(['trip_id', 'shape_id']).new_stop_id.aggregate(list).reset_index()
304
group_stops['new_stop_id'] = group_stops['new_stop_id'].str.join(' ')
305
306
# for a given shape (route and direction),
307
# count the number of times the particular stop sequence (sequence and stop_id) is used
308
group_size = group_stops.groupby(['shape_id', 'new_stop_id']).new_stop_id.size().reset_index(name='counts')
309
310
# get one main route (most common sequence of stops) for each shape
311
group_routes = group_size.loc[group_size.groupby(['shape_id']).counts.idxmax()]
312
313
# split string of stops into list again
314
group_routes['new_stop_id'] = group_routes['new_stop_id'].str.split(' ')
315
316
# get all stops in all the main routes
317
routes_stops = group_routes.explode('new_stop_id', ignore_index=True)
318
routes_stops[['stop_sequence', 'stop_id']] = routes_stops.new_stop_id.str.split('_', expand=True)
319
routes_stops['stop_sequence'] = routes_stops['stop_sequence'].astype(float)
320
321
stop_indexes = []
322
# loop through all unique shapes and collect the first and last stop in sequence
323
for shape in routes_stops['shape_id'].unique():
324
first_stop_index = routes_stops.loc[routes_stops['shape_id'] == shape, 'stop_sequence'].idxmin()
325
last_stop_index = routes_stops.loc[routes_stops['shape_id'] == shape, 'stop_sequence'].idxmax()
326
stop_indexes.append(first_stop_index)
327
stop_indexes.append(last_stop_index)
328
329
# drop indexes that have duplicates (i.e. first and last stop are the same)
330
end_stops_index = [x for x in stop_indexes if stop_indexes.count(x) == 1]
331
332
# create new 'shapes' file with the coordinates of first and last stop
333
stop_info = gtfs_data[['shape_id', 'stop_id', 'stop_lat', 'stop_lon']].drop_duplicates()
334
stop_shape = routes_stops.loc[end_stops_index, ['shape_id', 'stop_id', 'stop_sequence']]
335
shapes = pd.merge(stop_shape, stop_info, on=['shape_id', 'stop_id'])
336
shapes = shapes.rename(columns={"stop_sequence": "shape_pt_sequence",
337
"stop_lon": "shape_pt_lon", "stop_lat": "shape_pt_lat"})
338
339
# all stops of main routes, dropped routes with only 1 stop
340
routes_stops = routes_stops.loc[routes_stops['shape_id'].isin(shapes['shape_id'])]
341
342
# shapes dictionary is just the shape id in both columns
343
for shape in shapes['shape_id'].unique():
344
shapes_dict[shape] = shape
345
346
# all stops of main routes, with other infos.
347
# stop_sequence is used in merge because some stops are repeated twice in a route
348
filtered_stops = pd.merge(routes_stops, gtfs_data,
349
on=['shape_id', 'stop_id', 'stop_sequence'],
350
how='left')[['route_id', 'stop_id', 'shape_id',
351
'route_short_name', 'route_type', 'trip_headsign', 'direction_id',
352
'stop_name', 'stop_lat', 'stop_lon', 'stop_sequence']
353
].drop_duplicates(['shape_id', 'stop_id', 'stop_sequence'])
354
355
return gtfs_data, trip_list, filtered_stops, shapes, shapes_dict
356
357
358
def get_line_dir(line_orig, line_dest):
359
"""
360
Calculates the direction of the public transport line based on the start
361
and end nodes of the osm route.
362
"""
363
lat_dif = float(line_dest[1]) - float(line_orig[1])
364
lon_dif = float(line_dest[0]) - float(line_orig[0])
365
366
if lon_dif == 0: # avoid dividing by 0
367
line_dir = 90
368
else:
369
line_dir = math.degrees(math.atan(abs(lat_dif/lon_dif)))
370
371
if lat_dif >= 0 and lon_dif >= 0: # 1 quadrant
372
line_dir = 90 - line_dir
373
elif lat_dif < 0 and lon_dif > 0: # 2 quadrant
374
line_dir = 90 + line_dir
375
elif lat_dif <= 0 and lon_dif <= 0: # 3 quadrant
376
line_dir = 90 - line_dir + 180
377
else: # 4 quadrant
378
line_dir = 270 + line_dir
379
380
return line_dir
381
382
383
def repair_routes(options, net):
384
"""
385
Runs duarouter to repair the given osm routes.
386
"""
387
osm_routes = {}
388
# write dua input file
389
with io.open("dua_input.xml", 'w+', encoding="utf8") as dua_file:
390
dua_file.write(u"<routes>\n")
391
for key, value in OSM2SUMO_MODES.items():
392
dua_file.write(u' <vType id="%s" vClass="%s"/>\n' % (key, value))
393
num_read = discard_type = discard_net = 0
394
sumo_edges = set([sumo_edge.getID() for sumo_edge in net.getEdges()])
395
for ptLine in sumolib.xml.parse(options.osm_routes, "ptLine"):
396
num_read += 1
397
if ptLine.type not in options.modes:
398
discard_type += 1
399
continue
400
401
if not ptLine.route:
402
discard_net += 1
403
continue
404
route_edges = [edge for edge in ptLine.route[0].edges.split() if edge in sumo_edges]
405
if not route_edges:
406
discard_net += 1
407
continue
408
409
# transform ptLine origin and destination to geo coordinates
410
x, y = net.getEdge(route_edges[0]).getFromNode().getCoord()
411
line_orig = net.convertXY2LonLat(x, y)
412
x, y = net.getEdge(route_edges[-1]).getFromNode().getCoord()
413
line_dest = net.convertXY2LonLat(x, y)
414
415
# find ptLine direction
416
line_dir = get_line_dir(line_orig, line_dest)
417
418
osm_routes[ptLine.id] = [ptLine.attr_name, ptLine.line, ptLine.type, line_dir, ptLine.color,
419
None, [s.attr_name for s in (ptLine.stops or [])]]
420
dua_file.write(u' <trip id="%s" type="%s" depart="0" via="%s"/>\n' %
421
(ptLine.id, ptLine.type, (" ").join(route_edges)))
422
dua_file.write(u"</routes>\n")
423
424
if options.verbose:
425
print("%s routes read, discarded for wrong mode: %s, outside of net %s, keeping %s" %
426
(num_read, discard_type, discard_net, len(osm_routes)))
427
# run duarouter
428
subprocess.check_call([sumolib.checkBinary('duarouter'),
429
'-n', options.network,
430
'--route-files', 'dua_input.xml', '--repair',
431
'-o', 'dua_output.xml', '--ignore-errors',
432
'--error-log', options.dua_repair_output])
433
434
# parse repaired routes
435
n_routes = len(osm_routes)
436
broken = set(osm_routes.keys())
437
for ptline, ptline_route in parse_fast_nested("dua_output.xml", "vehicle", "id", "route", "edges"):
438
osm_routes[ptline.id][5] = ptline_route.edges
439
broken.remove(ptline.id)
440
441
# remove dua files
442
os.remove("dua_input.xml")
443
os.remove("dua_output.xml")
444
os.remove("dua_output.alt.xml")
445
446
# remove invalid routes from dict
447
[osm_routes.pop(line) for line in list(osm_routes) if line in broken]
448
449
if n_routes != len(osm_routes):
450
print("%s of %s routes have been imported, see '%s' for more information." %
451
(len(osm_routes), n_routes, options.dua_repair_output))
452
453
return osm_routes
454
455
456
@benchmark
457
def import_osm(options, net):
458
"""
459
Imports the routes of the public transport lines from osm.
460
"""
461
if options.repair:
462
if options.verbose:
463
print("Import and repair osm routes")
464
osm_routes = repair_routes(options, net)
465
else:
466
if options.verbose:
467
print("Import osm routes")
468
osm_routes = {}
469
for ptLine in sumolib.xml.parse(options.osm_routes, "ptLine"):
470
if ptLine.type not in options.modes or not ptLine.route:
471
continue
472
route_edges = ptLine.route[0].edges.split()
473
route_edges = [e for e in route_edges if net.hasEdge(e)]
474
if route_edges:
475
# TODO recheck what happens if it is only one edge
476
x, y = net.getEdge(route_edges[0]).getFromNode().getCoord()
477
line_orig = net.convertXY2LonLat(x, y)
478
479
x, y = net.getEdge(route_edges[-1]).getFromNode().getCoord()
480
line_dest = net.convertXY2LonLat(x, y)
481
482
line_dir = get_line_dir(line_orig, line_dest)
483
484
osm_routes[ptLine.id] = (ptLine.attr_name, ptLine.line,
485
ptLine.type, line_dir, ptLine.color,
486
ptLine.route[0].edges, [s.attr_name for s in (ptLine.stops or [])])
487
return osm_routes
488
489
490
def _addToDataFrame(gtfs_data, row, shapes_dict, stop, edge):
491
shape_list = [sec_shape for sec_shape, main_shape in shapes_dict.items() if main_shape == row.shape_id]
492
gtfs_data.loc[(gtfs_data["stop_id"] == row.stop_id) &
493
(gtfs_data["shape_id"].isin(shape_list)),
494
"stop_item_id"] = stop
495
gtfs_data.loc[(gtfs_data["stop_id"] == row.stop_id) &
496
(gtfs_data["shape_id"].isin(shape_list)),
497
"edge_id"] = edge
498
499
500
def getBestLane(net, lon, lat, radius, stop_length, center, edge_set, pt_class, last_pos=-1):
501
# get edges near stop location
502
x, y = net.convertLonLat2XY(lon, lat)
503
edges = [e for e in net.getNeighboringEdges(x, y, radius, includeJunctions=False) if e[0].getID() in edge_set]
504
# sort by distance but have edges longer than stop length first
505
for edge, _ in sorted(edges, key=lambda x: (x[0].getLength() <= stop_length, x[1])):
506
for lane in edge.getLanes():
507
if lane.allows(pt_class):
508
pos = lane.getClosestLanePosAndDist((x, y))[0]
509
if pos > last_pos or edge.getID() != edge_set[0]:
510
start = max(0, pos - (stop_length / 2. if center else stop_length))
511
end = min(start + stop_length, lane.getLength())
512
return lane.getID(), start, end
513
return None
514
515
516
def getAccess(net, lon, lat, radius, lane_id, max_access=10):
517
x, y = net.convertLonLat2XY(lon, lat)
518
lane = net.getLane(lane_id)
519
access = []
520
if not lane.getEdge().allows("pedestrian"):
521
for access_edge, _ in sorted(net.getNeighboringEdges(x, y, radius), key=lambda i: i[1]):
522
if access_edge.allows("pedestrian"):
523
access_lane_idx, access_pos, access_dist = access_edge.getClosestLanePosDist((x, y))
524
if not access_edge.getLane(access_lane_idx).allows("pedestrian"):
525
for idx, lane in enumerate(access_edge.getLanes()):
526
if lane.allows("pedestrian"):
527
access_lane_idx = idx
528
break
529
access.append((u' <access friendlyPos="true" lane="%s_%s" pos="%.2f" length="%.2f"/>\n') %
530
(access_edge.getID(), access_lane_idx, access_pos, 1.5 * access_dist))
531
if len(access) == max_access:
532
break
533
return access
534
535
536
@benchmark
537
def map_gtfs_osm(options, net, osm_routes, gtfs_data, shapes, shapes_dict, filtered_stops):
538
"""
539
Maps the routes from gtfs with the sumo routes imported from osm and maps
540
the gtfs stops with the lane and position in sumo.
541
"""
542
if options.verbose:
543
print("Map stops and routes")
544
545
map_routes = {}
546
map_stops = {}
547
# gtfs stops are grouped (not in exact geo position), so a large radius
548
# for mapping is needed
549
radius = 200
550
551
missing_stops = []
552
missing_lines = []
553
stop_items = defaultdict(list)
554
555
# get different permutations of stop names, and assign the collection of all stop names in route to the stop
556
filtered_stops['stop_name'] = [[x] + re.split(r', | ,|,', x) + [x.replace(',', '')]
557
for x in filtered_stops['stop_name']]
558
filtered_shapes = filtered_stops.groupby(['shape_id', 'route_short_name',
559
'route_type', 'direction_id']).stop_name.aggregate("sum").reset_index(
560
name='stop_name_all')
561
filtered_stops = pd.merge(filtered_stops, filtered_shapes)
562
563
for row in filtered_stops.itertuples():
564
# check if gtfs route already mapped to osm route
565
if row.shape_id not in map_routes:
566
# if route not mapped, find the osm route for shape id
567
pt_line_name = row.route_short_name
568
pt_type = GTFS2OSM_MODES[row.route_type]
569
570
# get shape definition and define pt direction
571
aux_shapes = shapes[shapes['shape_id'] == row.shape_id]
572
pt_orig = aux_shapes[aux_shapes.shape_pt_sequence == aux_shapes.shape_pt_sequence.min()]
573
pt_dest = aux_shapes[aux_shapes.shape_pt_sequence == aux_shapes.shape_pt_sequence.max()]
574
line_dir = get_line_dir((pt_orig.shape_pt_lon.iloc[0], pt_orig.shape_pt_lat.iloc[0]),
575
(pt_dest.shape_pt_lon.iloc[0], pt_dest.shape_pt_lat.iloc[0]))
576
577
# get osm lines with same route name and pt type,
578
# and if they have at least one matching stop name in osm and gtfs routes
579
osm_lines = [(abs(line_dir - value[3]), ptline_id, value[4], value[5])
580
for ptline_id, value in osm_routes.items()
581
if value[1] == pt_line_name and value[2] == pt_type]
582
# if value[1] == pt_line_name and value[2] in OSM2OSM_MODES[pt_type] and set(value[6]) & set(row.stop_name_all)]
583
if osm_lines:
584
# get the direction for the found routes and take the route
585
# with lower difference
586
diff, osm_id, color, edges = min(osm_lines, key=lambda x: x[0] if x[0] < 180 else 360 - x[0])
587
d = diff if diff < 180 else 360 - diff
588
if d < 160: # to prevent mapping to route going the opposite direction
589
# add mapped osm route to dict
590
map_routes[row.shape_id] = (osm_id, edges.split(), color)
591
else:
592
missing_lines.append((row.route_id, pt_line_name, sumolib.xml.quoteattr(
593
str(row.trip_headsign), True), row.direction_id))
594
continue
595
else:
596
# no osm route found, do not map stops of route
597
missing_lines.append((row.route_id, pt_line_name, sumolib.xml.quoteattr(
598
str(row.trip_headsign), True), row.direction_id))
599
continue
600
601
# set stop's type, class and length
602
pt_type = GTFS2OSM_MODES[row.route_type]
603
pt_class = OSM2SUMO_MODES[pt_type]
604
if pt_class == "bus":
605
stop_length = options.bus_stop_length
606
elif pt_class == "tram":
607
stop_length = options.tram_stop_length
608
else:
609
stop_length = options.train_stop_length
610
611
stop_mapped = False
612
for stop in stop_items[row.stop_id]:
613
# for item of mapped stop
614
stop_edge = map_stops[stop][1].rsplit("_", 1)[0]
615
if stop_edge in map_routes[row.shape_id][1]:
616
# if edge in route, the stops are the same
617
# intersect the edge set
618
map_stops[stop][6] = map_stops[stop][6] & set(map_routes[row.shape_id][1])
619
else:
620
# check if the wrong edge was adopted
621
edge_inter = set(map_routes[row.shape_id][1]) & map_stops[stop][6]
622
best = getBestLane(net, row.stop_lon, row.stop_lat, radius,
623
stop_length, options.center_stops, edge_inter, pt_class)
624
if best is None:
625
continue
626
# update the lane id, start and end and add shape
627
lane_id, start, end = best
628
access = getAccess(net, row.stop_lon, row.stop_lat, 100, lane_id)
629
map_stops[stop][1:7] = [lane_id, start, end, access, pt_type, edge_inter]
630
# update edge in data frame
631
stop_edge = lane_id.rsplit("_", 1)[0]
632
gtfs_data.loc[gtfs_data["stop_item_id"] == stop, "edge_id"] = stop_edge
633
# add to data frame
634
_addToDataFrame(gtfs_data, row, shapes_dict, stop, stop_edge)
635
stop_mapped = True
636
break
637
638
# if stop not mapped
639
if not stop_mapped:
640
edge_inter = set(map_routes[row.shape_id][1])
641
best = getBestLane(net, row.stop_lon, row.stop_lat, radius,
642
stop_length, options.center_stops, edge_inter, pt_class)
643
if best is not None:
644
lane_id, start, end = best
645
access = getAccess(net, row.stop_lon, row.stop_lat, 100, lane_id)
646
stop_item_id = "%s_%s" % (row.stop_id, len(stop_items[row.stop_id]))
647
stop_items[row.stop_id].append(stop_item_id)
648
map_stops[stop_item_id] = [sumolib.xml.quoteattr(row.stop_name[0], True),
649
lane_id, start, end, access, pt_type, edge_inter]
650
_addToDataFrame(gtfs_data, row, shapes_dict, stop_item_id, lane_id.split("_")[0])
651
stop_mapped = True
652
653
# if stop not mapped, add to missing stops
654
if not stop_mapped:
655
missing_stops.append((row.stop_id, sumolib.xml.quoteattr(
656
row.stop_name[0], True), row.route_short_name, row.direction_id))
657
# pprint(map_routes)
658
# pprint(map_stops)
659
return map_routes, map_stops, missing_stops, missing_lines
660
661
662
def write_vtypes(options, seen=None):
663
if options.vtype_output:
664
with sumolib.openz(options.vtype_output, mode='w') as vout:
665
sumolib.xml.writeHeader(vout, root="additional", options=options)
666
for osm_type, sumo_class in sorted(OSM2SUMO_MODES.items()):
667
if osm_type in options.modes and (seen is None or osm_type in seen):
668
vout.write(u' <vType id="%s" vClass="%s"/>\n' %
669
(osm_type, sumo_class))
670
vout.write(u'</additional>\n')
671
672
673
def write_gtfs_osm_outputs(options, map_routes, map_stops, missing_stops, missing_lines,
674
gtfs_data, trip_list, shapes_dict, net):
675
"""
676
Generates stops and routes for sumo and saves the unmapped elements.
677
"""
678
if options.verbose:
679
print("Generates stops and routes output")
680
681
# determine if we need to format times (depart, duration, until) to be human readable or whole seconds
682
ft = humanReadableTime if "hrtime" in options and options.hrtime else int
683
684
with sumolib.openz(options.additional_output, mode='w') as output_file:
685
sumolib.xml.writeHeader(output_file, root="additional", options=options)
686
for stop, value in sorted(map_stops.items()):
687
name, lane, start_pos, end_pos, access, v_type = value[:6]
688
typ = "busStop" if v_type == "bus" else "trainStop"
689
output_file.write(u' <%s id="%s" lane="%s" startPos="%.2f" endPos="%.2f" name=%s friendlyPos="true"%s>\n' % # noqa
690
(typ, stop, lane, start_pos, end_pos, name, "" if access else "/"))
691
for a in access:
692
output_file.write(a)
693
if access:
694
output_file.write(u' </%s>\n' % typ)
695
output_file.write(u'</additional>\n')
696
697
sequence_errors = []
698
write_vtypes(options)
699
700
with sumolib.openz(options.route_output, mode='w') as output_file:
701
sumolib.xml.writeHeader(output_file, root="routes", options=options)
702
numDays = int(options.end) // 86401
703
start_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.begin)))
704
shapes_written = set()
705
706
for day in range(numDays+1):
707
if day == numDays and options.end % 86400 > 0:
708
# if last day, filter trips until given end time
709
end_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.end-86400*numDays)))
710
trip_list = trip_list[trip_list["arrival_fixed"] <= end_time]
711
712
seqs = {}
713
for row in trip_list.sort_values("arrival_fixed").itertuples():
714
715
if day != 0 and row.trip_id.endswith(".trimmed"):
716
# only add trimmed trips the first day
717
continue
718
719
if day == 0 and row.arrival_fixed < start_time:
720
# avoid writing first day trips that not applied
721
continue
722
723
main_shape = shapes_dict.get(row.shape_id)
724
if main_shape not in map_routes:
725
# if route not mapped
726
continue
727
pt_color = map_routes[main_shape][2]
728
if pt_color is None:
729
pt_color = ""
730
else:
731
pt_color = ' color="%s"' % pt_color
732
pt_type = GTFS2OSM_MODES[row.route_type]
733
edges_list = map_routes[main_shape][1]
734
stop_list = gtfs_data[gtfs_data["trip_id"] == row.trip_id].sort_values("stop_sequence")
735
stop_index = [edges_list.index(stop.edge_id)
736
for stop in stop_list.itertuples()
737
if stop.edge_id in edges_list]
738
739
if len(stop_index) < options.min_stops:
740
# Not enough stops mapped
741
continue
742
743
if main_shape not in shapes_written:
744
output_file.write(u' <route id="%s" edges="%s"/>\n' % (main_shape, " ".join(edges_list)))
745
shapes_written.add(main_shape)
746
747
stopSeq = tuple([stop.stop_item_id for stop in stop_list.itertuples()])
748
if stopSeq not in seqs:
749
seqs[stopSeq] = row.trip_id
750
751
# determine departure from first valid stop
752
depart = None
753
for stop in stop_list.itertuples():
754
if stop.stop_item_id:
755
depart = ft(parseTime(str(stop.arrival_fixed.days + day) +
756
":" + str(stop.arrival_fixed).split(' ')[2]))
757
break
758
759
veh_attr = (row.trip_id, day,
760
main_shape, row.route_id, seqs[stopSeq], depart,
761
min(stop_index), max(stop_index), pt_type, pt_color)
762
output_file.write(u' <vehicle id="%s.%s" route="%s" line="%s_%s" depart="%s" departEdge="%s" arrivalEdge="%s" type="%s"%s>\n' % veh_attr) # noqa
763
params = [("gtfs.route_name", row.route_short_name)]
764
if row.trip_headsign:
765
params.append(("gtfs.trip_headsign", row.trip_headsign))
766
if options.writeTerminals:
767
firstStop = stop_list.iloc[0]
768
lastStop = stop_list.iloc[-1]
769
firstDepart = parseTime(str(firstStop.departure_fixed.days + day) +
770
":" + str(firstStop.departure_fixed).split(' ')[2])
771
lastArrival = parseTime(str(lastStop.arrival_fixed.days + day) +
772
":" + str(lastStop.arrival_fixed).split(' ')[2])
773
params += [("gtfs.origin_stop", firstStop.stop_name),
774
("gtfs.origin_depart", ft(firstDepart)),
775
("gtfs.destination_stop", lastStop.stop_name),
776
("gtfs.destination_arrrival", ft(lastArrival))]
777
for k, v in params:
778
output_file.write(u' <param key="%s" value=%s/>\n' % (
779
k, sumolib.xml.quoteattr(str(v), True)))
780
781
check_seq = -1
782
for stop in stop_list.itertuples():
783
if not stop.stop_item_id:
784
# if stop not mapped
785
continue
786
stop_index = edges_list.index(stop.edge_id)
787
if stop_index >= check_seq:
788
check_seq = stop_index
789
# TODO check stop position if we are on the same edge as before
790
stop_attr = (stop.stop_item_id,
791
ft(parseTime(str(stop.arrival_fixed.days + day) +
792
":" + str(stop.arrival_fixed).split(' ')[2])),
793
ft(options.duration) if options.duration > 60 else options.duration,
794
ft(parseTime(str(stop.departure_fixed.days + day) +
795
":" + str(stop.departure_fixed).split(' ')[2])),
796
stop.stop_sequence, stop_list.stop_sequence.max(),
797
sumolib.xml.quoteattr(stop.stop_name, True))
798
output_file.write(u' <stop busStop="%s" arrival="%s" duration="%s" until="%s"/><!--stopSequence="%s/%s" %s-->\n' % stop_attr) # noqa
799
elif stop_index < check_seq:
800
# stop not downstream
801
sequence_errors.append((stop.stop_item_id, sumolib.xml.quoteattr(stop.stop_name, True),
802
row.route_short_name,
803
sumolib.xml.quoteattr(str(row.trip_headsign), True), row.direction_id,
804
stop.trip_id))
805
806
output_file.write(u' </vehicle>\n')
807
output_file.write(u'</routes>\n')
808
809
# ----------------------- Save missing data ------------------
810
if any([missing_stops, missing_lines, sequence_errors]):
811
print("Not all given gtfs elements have been mapped, see %s for more information" % options.warning_output)
812
with io.open(options.warning_output, 'w', encoding="utf8") as output_file:
813
sumolib.xml.writeHeader(output_file, root="missingElements", rootAttrs=None, options=options)
814
for stop in sorted(set(missing_stops)):
815
output_file.write(u' <stop id="%s" name=%s ptLine="%s" direction_id="%s"/>\n' % stop)
816
for line in sorted(set(missing_lines)):
817
output_file.write(u' <ptLine id="%s" name="%s" trip_headsign=%s direction_id="%s"/>\n' % line)
818
for stop in sorted(set(sequence_errors)):
819
output_file.write(u' <stopSequence stop_id="%s" stop_name=%s ptLine="%s" trip_headsign=%s direction_id="%s" trip_id="%s"/>\n' % stop) # noqa
820
output_file.write(u'</missingElements>\n')
821
822