CoCalc -- gtfs2osm.py

GitHub Repository: eclipse/sumo
Path: blob/main/tools/import/gtfs/gtfs2osm.py
¹⁶⁹⁶⁷⁹ views
1
# Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
2
# Copyright (C) 2010-2025 German Aerospace Center (DLR) and others.
3
# This program and the accompanying materials are made available under the
4
# terms of the Eclipse Public License 2.0 which is available at
5
# https://www.eclipse.org/legal/epl-2.0/
6
# This Source Code may also be made available under the following Secondary
7
# Licenses when the conditions for such availability set forth in the Eclipse
8
# Public License 2.0 are satisfied: GNU General Public License, version 2
9
# or later which is available at
10
# https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
11
# SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
12

13
# @file    gtfs2osm.py
14
# @author  Giuliana Armellini
15
# @author  Mirko Barthauer
16
# @date    2021-02-18
17

18
"""
19
Import public transport from GTFS (schedules) and OSM (routes) data
20
"""
21

22
import os
23
import sys
24
import subprocess
25
import datetime
26
import time
27
import math
28
import io
29
import re
30
from collections import defaultdict
31
import hashlib
32

33
# from pprint import pprint
34

35
import pandas as pd
36
pd.options.mode.chained_assignment = None  # default='warn'
37

38
sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools'))
39
import sumolib  # noqa
40
from sumolib.xml import parse_fast_nested  # noqa
41
from sumolib.miscutils import benchmark, parseTime, humanReadableTime  # noqa
42

43
# ----------------------- gtfs, osm and sumo modes ----------------------------
44
OSM2SUMO_MODES = {
45
    'bus': 'bus',
46
    'train': 'rail',
47
    'tram': 'tram',
48
    'light_rail': 'rail_urban',
49
    'monorail': 'rail_urban',
50
    'subway': 'subway',
51
    'aerialway': 'cable_car',
52
    'ferry': 'ship'
53
}
54

55
GTFS2OSM_MODES = {
56
    # https://developers.google.com/transit/gtfs/reference/#routestxt
57
    '0':  'tram',
58
    '1':  'subway',
59
    '2':  'train',
60
    '3':  'bus',
61
    '4':  'ferry',
62
    # '5':  'cableTram',
63
    # '6':  'aerialLift',
64
    # '7':  'funicular',
65
    # https://developers.google.com/transit/gtfs/reference/extended-route-types
66
    '100':  'train',        # DB
67
    '109':  'light_rail',   # S-Bahn
68
    '400':  'subway',       # U-Bahn
69
    '1000': 'ferry',        # Faehre
70
    # additional modes used in Hamburg
71
    '402':  'subway',       # U-Bahn
72
    '1200': 'ferry',        # Faehre
73
    # modes used by hafas
74
    's': 'train',
75
    'RE': 'train',
76
    'RB': 'train',
77
    'IXB': 'train',        # tbd
78
    'ICE': 'train',
79
    'IC': 'train',
80
    'IRX': 'train',        # tbd
81
    'EC': 'train',
82
    'NJ': 'train',        # tbd
83
    'RHI': 'train',        # tbd
84
    'DPN': 'train',        # tbd
85
    'SCH': 'train',        # tbd
86
    'Bsv': 'train',        # tbd
87
    'KAT': 'train',        # tbd
88
    'AIR': 'train',        # tbd
89
    'DPS': 'train',        # tbd
90
    'lt': 'train',  # tbd
91
    'BUS': 'bus',        # tbd
92
    'Str': 'tram',        # tbd
93
    'DPF': 'train',        # tbd
94
}
95
# https://developers.google.com/transit/gtfs/reference/extended-route-types
96
for i in range(700, 717):
97
    GTFS2OSM_MODES[str(i)] = 'bus'
98
for i in range(900, 907):
99
    GTFS2OSM_MODES[str(i)] = 'tram'
100

101
# OSM2OSM_MODES = {
102
#     'bus': ['bus','trolleybus'], # to enable matching of buses categorised as trolleybus in osm
103
#     'train': 'train',
104
#     'tram': 'tram',
105
#     'light_rail':'light_rail',
106
#     'subway': 'subway',
107
#     'ferry': 'ferry'
108
# }
109

110

111
def md5hash(s):
112
    return hashlib.md5(s.encode('utf-8')).hexdigest()
113

114

115
@benchmark
116
def import_gtfs(options, gtfsZip):
117
    """
118
    Imports the gtfs-data and filters it by the specified date and modes.
119
    """
120
    if options.verbose:
121
        print('Loading GTFS data "%s"' % options.gtfs)
122

123
    routes = pd.read_csv(gtfsZip.open('routes.txt'), dtype=str)
124
    stops = pd.read_csv(gtfsZip.open('stops.txt'), dtype=str)
125
    stop_times = pd.read_csv(gtfsZip.open('stop_times.txt'), dtype=str)
126
    trips = pd.read_csv(gtfsZip.open('trips.txt'), dtype=str)
127
    shapes = pd.read_csv(gtfsZip.open('shapes.txt'), dtype=str) if 'shapes.txt' in gtfsZip.namelist() else None
128
    calendar_dates = pd.read_csv(gtfsZip.open('calendar_dates.txt'), dtype=str)
129
    calendar = pd.read_csv(gtfsZip.open('calendar.txt'), dtype=str)
130

131
    if 'trip_headsign' not in trips:
132
        trips['trip_headsign'] = ''
133
    if 'direction_id' not in trips:
134
        trips = discover_direction(routes, trips, stop_times)
135
    if 'route_short_name' not in routes:
136
        routes['route_short_name'] = routes['route_long_name']
137

138
    # for some obscure reason there are GTFS files which have the sequence index as a float
139
    stop_times['stop_sequence'] = stop_times['stop_sequence'].astype(float, copy=False)
140

141
    # filter trips within given begin and end time
142
    # first adapt stop times to a single day (from 00:00:00 to 23:59:59)
143
    full_day = pd.to_timedelta("24:00:00")
144

145
    stop_times['arrival_fixed'] = pd.to_timedelta(stop_times.arrival_time)
146
    stop_times['departure_fixed'] = pd.to_timedelta(stop_times.departure_time)
147

148
    # avoid trimming trips starting before midnight but ending after
149
    fix_trips = stop_times[(stop_times['arrival_fixed'] >= full_day) &  # gg/ here i arrive at or after midnight
150
                           (stop_times['stop_sequence'] == stop_times['stop_sequence'].min())].trip_id.values.tolist()
151

152
    stop_times.loc[stop_times.trip_id.isin(fix_trips), 'arrival_fixed'] = stop_times.loc[stop_times.trip_id.isin(
153
        fix_trips), 'arrival_fixed'] % full_day
154
    stop_times.loc[stop_times.trip_id.isin(fix_trips), 'departure_fixed'] = stop_times.loc[stop_times.trip_id.isin(
155
        fix_trips), 'departure_fixed'] % full_day
156

157
    extra_stop_times = stop_times.loc[stop_times.arrival_fixed > full_day, ]
158
    extra_stop_times.loc[:, 'arrival_fixed'] = extra_stop_times.loc[:, 'arrival_fixed'] % full_day
159
    extra_stop_times.loc[:, 'departure_fixed'] = extra_stop_times.loc[:, 'departure_fixed'] % full_day
160
    extra_trips_id = extra_stop_times.trip_id.values.tolist()
161
    extra_stop_times.loc[:, 'trip_id'] = extra_stop_times.loc[:, 'trip_id'] + ".trimmed"
162
    stop_times = pd.concat((stop_times, extra_stop_times))
163

164
    extra_trips = trips.loc[trips.trip_id.isin(extra_trips_id), :]
165
    extra_trips.loc[:, 'trip_id'] = extra_trips.loc[:, 'trip_id'] + ".trimmed"
166
    trips = pd.concat((trips, extra_trips))
167

168
    time_interval = options.end - options.begin
169
    start_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.begin)))
170

171
    # if time_interval >= 86400 (24 hs), no filter needed
172
    if time_interval < 86400 and options.end <= 86400:
173
        # if simulation time end on the same day
174
        end_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.end)))
175
        stop_times = stop_times[(start_time <= stop_times['departure_fixed']) &
176
                                (stop_times['departure_fixed'] <= end_time)]
177
    elif time_interval < 86400 and options.end > 86400:
178
        # if simulation time includes next day trips
179
        end_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.end - 86400)))
180
        stop_times = stop_times[~((stop_times['departure_fixed'] > end_time) &
181
                                  (stop_times['departure_fixed'] < start_time))]
182

183
    # filter trips for a representative date
184
    weekday = 'monday tuesday wednesday thursday friday saturday sunday'.split(
185
    )[datetime.datetime.strptime(options.date, "%Y%m%d").weekday()]
186
    removed = calendar_dates[(calendar_dates.date == options.date) &
187
                             (calendar_dates.exception_type == '2')]
188
    services = calendar[(calendar.start_date <= options.date) &
189
                        (calendar.end_date >= options.date) &
190
                        (calendar[weekday] == '1') &
191
                        (~calendar.service_id.isin(removed.service_id))]
192
    added = calendar_dates[(calendar_dates.date == options.date) &
193
                           (calendar_dates.exception_type == '1')]
194
    trips_on_day = trips[trips.service_id.isin(services.service_id) |
195
                         trips.service_id.isin(added.service_id)]
196

197
    # filter routes by modes
198
    filter_gtfs_modes = [key for key, value in GTFS2OSM_MODES.items()
199
                         if value in options.modes]
200
    routes = routes[routes['route_type'].isin(filter_gtfs_modes)]
201
    if routes.empty:
202
        print("Warning! No GTFS data found for the given modes %s." % options.modes)
203
    if trips_on_day.empty:
204
        print("Warning! No GTFS data found for the given date %s." % options.date)
205

206
    return routes, trips_on_day, shapes, stops, stop_times
207

208

209
@benchmark
210
def discover_direction(routes, trips, stop_times):
211
    """
212
    Sets the direction value if it is not present in the GTFS data to identify separate
213
    directions of the same PT line.
214
    """
215
    # create a direction_id identifier from the stop sequence
216
    enhancedStopTimes = pd.merge(stop_times, pd.merge(trips, routes, on='route_id', how='left'), on='trip_id')
217
    groupedStopTimes = enhancedStopTimes.groupby(["trip_id"], as_index=False).agg({'stop_id': ' '.join})
218
    groupedStopTimes['direction_id'] = groupedStopTimes['stop_id'].apply(md5hash)
219
    # copy the direction_id back to the trips file / join the DataFrame
220
    return pd.merge(trips, groupedStopTimes[['trip_id', 'direction_id']], on='trip_id', how='left')
221

222

223
@benchmark
224
def filter_gtfs(options, routes, trips_on_day, shapes, stops, stop_times):
225
    """
226
    Filters the gtfs-data by the given bounding box.
227

228
    If using shapes, searches the main shapes of route. A main shape represents the
229
    trip that is most often taken in a given public transport route. Only the paths
230
    (also referred to as routes) and stops of trips with main shapes will be mapped.
231
    Trips with secondary shapes will be defined by the start and end edge belonging
232
    to the main shape (if they a part of the main shape).
233
    """
234
    stops['stop_lat'] = stops['stop_lat'].astype(float)
235
    stops['stop_lon'] = stops['stop_lon'].astype(float)
236

237
    if shapes is not None:
238
        shapes['shape_pt_lat'] = shapes['shape_pt_lat'].astype(float)
239
        shapes['shape_pt_lon'] = shapes['shape_pt_lon'].astype(float)
240
        shapes['shape_pt_sequence'] = shapes['shape_pt_sequence'].astype(float)
241

242
        shapes = shapes[(options.bbox[1] <= shapes['shape_pt_lat']) &
243
                        (shapes['shape_pt_lat'] <= options.bbox[3]) &
244
                        (options.bbox[0] <= shapes['shape_pt_lon']) &
245
                        (shapes['shape_pt_lon'] <= options.bbox[2])]
246

247
    # merge gtfs data from stop_times / trips / routes / stops
248
    gtfs_data = pd.merge(pd.merge(pd.merge(trips_on_day, stop_times, on='trip_id'),
249
                         stops, on='stop_id'), routes, on='route_id')
250
    if shapes is None:
251
        gtfs_data['shape_id'] = gtfs_data['route_id'] + "_" + gtfs_data['direction_id']
252

253
    # filter relevant information
254
    gtfs_data = gtfs_data[['route_id', 'shape_id', 'trip_id', 'stop_id',
255
                           'route_short_name', 'route_type', 'trip_headsign',
256
                           'direction_id', 'stop_name', 'stop_lat', 'stop_lon',
257
                           'stop_sequence', 'arrival_fixed', 'departure_fixed']]
258

259
    # filter data inside SUMO net by stop location and shape
260
    gtfs_data = gtfs_data[(options.bbox[1] <= gtfs_data['stop_lat']) &
261
                          (gtfs_data['stop_lat'] <= options.bbox[3]) &
262
                          (options.bbox[0] <= gtfs_data['stop_lon']) &
263
                          (gtfs_data['stop_lon'] <= options.bbox[2])]
264

265
    # get list of trips with departure time to allow a sorted output
266
    trip_list = gtfs_data.loc[gtfs_data.groupby('trip_id').stop_sequence.idxmin()]
267

268
    # add new column for unambiguous stop_id and edge in sumo
269
    gtfs_data["stop_item_id"] = None
270
    gtfs_data["edge_id"] = None
271
    # create dict with shapes and their main shape
272
    shapes_dict = {}
273

274
    if shapes is not None:
275
        # search main and secondary shapes for each pt line (route and direction)
276
        filtered_stops = gtfs_data.groupby(['route_id', 'direction_id', 'shape_id'])[
277
            "shape_id"].size().reset_index(name='counts')
278
        group_shapes = filtered_stops.groupby(['route_id', 'direction_id']).shape_id.aggregate(set).reset_index()
279

280
        filtered_stops = filtered_stops.loc[filtered_stops.groupby(['route_id', 'direction_id'])['counts'].idxmax()][[  # noqa
281
                                            'route_id', 'shape_id', 'direction_id']]
282
        filtered_stops = pd.merge(filtered_stops, group_shapes, on=['route_id', 'direction_id'])
283

284
        for row in filtered_stops.itertuples():
285
            for sec_shape in row.shape_id_y:
286
                shapes_dict[sec_shape] = row.shape_id_x
287

288
        # create data frame with main shape for stop location
289
        filtered_stops = gtfs_data[gtfs_data['shape_id'].isin(filtered_stops.shape_id_x)]
290
        filtered_stops = filtered_stops[['route_id', 'shape_id', 'stop_id',
291
                                        'route_short_name', 'route_type',
292
                                         'trip_headsign', 'direction_id',
293
                                         'stop_name', 'stop_lat', 'stop_lon']].drop_duplicates()
294
    else:
295
        # If not using shapes, searches for the most common sequence of stops in a route.
296
        # Only the paths and stops of these main sequences are mapped. Creates 'shapes' with
297
        # the coordinates of first and last stop in main route sequences, used for mapping later.
298

299
        # create a new stop id with their trip sequence
300
        gtfs_data['new_stop_id'] = gtfs_data['stop_sequence'].astype(str) + '_' + gtfs_data['stop_id']
301

302
        # for a given trip, put the stops into a list and then into a string
303
        group_stops = gtfs_data.groupby(['trip_id', 'shape_id']).new_stop_id.aggregate(list).reset_index()
304
        group_stops['new_stop_id'] = group_stops['new_stop_id'].str.join(' ')
305

306
        # for a given shape (route and direction),
307
        # count the number of times the particular stop sequence (sequence and stop_id) is used
308
        group_size = group_stops.groupby(['shape_id', 'new_stop_id']).new_stop_id.size().reset_index(name='counts')
309

310
        # get one main route (most common sequence of stops) for each shape
311
        group_routes = group_size.loc[group_size.groupby(['shape_id']).counts.idxmax()]
312

313
        # split string of stops into list again
314
        group_routes['new_stop_id'] = group_routes['new_stop_id'].str.split(' ')
315

316
        # get all stops in all the main routes
317
        routes_stops = group_routes.explode('new_stop_id', ignore_index=True)
318
        routes_stops[['stop_sequence', 'stop_id']] = routes_stops.new_stop_id.str.split('_', expand=True)
319
        routes_stops['stop_sequence'] = routes_stops['stop_sequence'].astype(float)
320

321
        stop_indexes = []
322
        # loop through all unique shapes and collect the first and last stop in sequence
323
        for shape in routes_stops['shape_id'].unique():
324
            first_stop_index = routes_stops.loc[routes_stops['shape_id'] == shape, 'stop_sequence'].idxmin()
325
            last_stop_index = routes_stops.loc[routes_stops['shape_id'] == shape, 'stop_sequence'].idxmax()
326
            stop_indexes.append(first_stop_index)
327
            stop_indexes.append(last_stop_index)
328

329
        # drop indexes that have duplicates (i.e. first and last stop are the same)
330
        end_stops_index = [x for x in stop_indexes if stop_indexes.count(x) == 1]
331

332
        # create new 'shapes' file with the coordinates of first and last stop
333
        stop_info = gtfs_data[['shape_id', 'stop_id', 'stop_lat', 'stop_lon']].drop_duplicates()
334
        stop_shape = routes_stops.loc[end_stops_index, ['shape_id', 'stop_id', 'stop_sequence']]
335
        shapes = pd.merge(stop_shape, stop_info, on=['shape_id', 'stop_id'])
336
        shapes = shapes.rename(columns={"stop_sequence": "shape_pt_sequence",
337
                               "stop_lon": "shape_pt_lon", "stop_lat": "shape_pt_lat"})
338

339
        # all stops of main routes, dropped routes with only 1 stop
340
        routes_stops = routes_stops.loc[routes_stops['shape_id'].isin(shapes['shape_id'])]
341

342
        # shapes dictionary is just the shape id in both columns
343
        for shape in shapes['shape_id'].unique():
344
            shapes_dict[shape] = shape
345

346
        # all stops of main routes, with other infos.
347
        # stop_sequence is used in merge because some stops are repeated twice in a route
348
        filtered_stops = pd.merge(routes_stops, gtfs_data,
349
                                  on=['shape_id', 'stop_id', 'stop_sequence'],
350
                                  how='left')[['route_id', 'stop_id', 'shape_id',
351
                                               'route_short_name', 'route_type', 'trip_headsign', 'direction_id',
352
                                               'stop_name', 'stop_lat', 'stop_lon', 'stop_sequence']
353
                                              ].drop_duplicates(['shape_id', 'stop_id', 'stop_sequence'])
354

355
    return gtfs_data, trip_list, filtered_stops, shapes, shapes_dict
356

357

358
def get_line_dir(line_orig, line_dest):
359
    """
360
    Calculates the direction of the public transport line based on the start
361
    and end nodes of the osm route.
362
    """
363
    lat_dif = float(line_dest[1]) - float(line_orig[1])
364
    lon_dif = float(line_dest[0]) - float(line_orig[0])
365

366
    if lon_dif == 0:  # avoid dividing by 0
367
        line_dir = 90
368
    else:
369
        line_dir = math.degrees(math.atan(abs(lat_dif/lon_dif)))
370

371
    if lat_dif >= 0 and lon_dif >= 0:  # 1 quadrant
372
        line_dir = 90 - line_dir
373
    elif lat_dif < 0 and lon_dif > 0:  # 2 quadrant
374
        line_dir = 90 + line_dir
375
    elif lat_dif <= 0 and lon_dif <= 0:  # 3 quadrant
376
        line_dir = 90 - line_dir + 180
377
    else:  # 4 quadrant
378
        line_dir = 270 + line_dir
379

380
    return line_dir
381

382

383
def repair_routes(options, net):
384
    """
385
    Runs duarouter to repair the given osm routes.
386
    """
387
    osm_routes = {}
388
    # write dua input file
389
    with io.open("dua_input.xml", 'w+', encoding="utf8") as dua_file:
390
        dua_file.write(u"<routes>\n")
391
        for key, value in OSM2SUMO_MODES.items():
392
            dua_file.write(u'    <vType id="%s" vClass="%s"/>\n' % (key, value))
393
        num_read = discard_type = discard_net = 0
394
        sumo_edges = set([sumo_edge.getID() for sumo_edge in net.getEdges()])
395
        for ptLine in sumolib.xml.parse(options.osm_routes, "ptLine"):
396
            num_read += 1
397
            if ptLine.type not in options.modes:
398
                discard_type += 1
399
                continue
400

401
            if not ptLine.route:
402
                discard_net += 1
403
                continue
404
            route_edges = [edge for edge in ptLine.route[0].edges.split() if edge in sumo_edges]
405
            if not route_edges:
406
                discard_net += 1
407
                continue
408

409
            # transform ptLine origin and destination to geo coordinates
410
            x, y = net.getEdge(route_edges[0]).getFromNode().getCoord()
411
            line_orig = net.convertXY2LonLat(x, y)
412
            x, y = net.getEdge(route_edges[-1]).getFromNode().getCoord()
413
            line_dest = net.convertXY2LonLat(x, y)
414

415
            # find ptLine direction
416
            line_dir = get_line_dir(line_orig, line_dest)
417

418
            osm_routes[ptLine.id] = [ptLine.attr_name, ptLine.line, ptLine.type, line_dir, ptLine.color,
419
                                     None, [s.attr_name for s in (ptLine.stops or [])]]
420
            dua_file.write(u'    <trip id="%s" type="%s" depart="0" via="%s"/>\n' %
421
                           (ptLine.id, ptLine.type, (" ").join(route_edges)))
422
        dua_file.write(u"</routes>\n")
423

424
    if options.verbose:
425
        print("%s routes read, discarded for wrong mode: %s, outside of net %s, keeping %s" %
426
              (num_read, discard_type, discard_net, len(osm_routes)))
427
    # run duarouter
428
    subprocess.check_call([sumolib.checkBinary('duarouter'),
429
                           '-n', options.network,
430
                           '--route-files', 'dua_input.xml', '--repair',
431
                           '-o', 'dua_output.xml', '--ignore-errors',
432
                           '--error-log', options.dua_repair_output])
433

434
    # parse repaired routes
435
    n_routes = len(osm_routes)
436
    broken = set(osm_routes.keys())
437
    for ptline, ptline_route in parse_fast_nested("dua_output.xml", "vehicle", "id", "route", "edges"):
438
        osm_routes[ptline.id][5] = ptline_route.edges
439
        broken.remove(ptline.id)
440

441
    # remove dua files
442
    os.remove("dua_input.xml")
443
    os.remove("dua_output.xml")
444
    os.remove("dua_output.alt.xml")
445

446
    # remove invalid routes from dict
447
    [osm_routes.pop(line) for line in list(osm_routes) if line in broken]
448

449
    if n_routes != len(osm_routes):
450
        print("%s of %s routes have been imported, see '%s' for more information." %
451
              (len(osm_routes), n_routes, options.dua_repair_output))
452

453
    return osm_routes
454

455

456
@benchmark
457
def import_osm(options, net):
458
    """
459
    Imports the routes of the public transport lines from osm.
460
    """
461
    if options.repair:
462
        if options.verbose:
463
            print("Import and repair osm routes")
464
        osm_routes = repair_routes(options, net)
465
    else:
466
        if options.verbose:
467
            print("Import osm routes")
468
        osm_routes = {}
469
        for ptLine in sumolib.xml.parse(options.osm_routes, "ptLine"):
470
            if ptLine.type not in options.modes or not ptLine.route:
471
                continue
472
            route_edges = ptLine.route[0].edges.split()
473
            route_edges = [e for e in route_edges if net.hasEdge(e)]
474
            if route_edges:
475
                # TODO recheck what happens if it is only one edge
476
                x, y = net.getEdge(route_edges[0]).getFromNode().getCoord()
477
                line_orig = net.convertXY2LonLat(x, y)
478

479
                x, y = net.getEdge(route_edges[-1]).getFromNode().getCoord()
480
                line_dest = net.convertXY2LonLat(x, y)
481

482
                line_dir = get_line_dir(line_orig, line_dest)
483

484
                osm_routes[ptLine.id] = (ptLine.attr_name, ptLine.line,
485
                                         ptLine.type, line_dir, ptLine.color,
486
                                         ptLine.route[0].edges, [s.attr_name for s in (ptLine.stops or [])])
487
    return osm_routes
488

489

490
def _addToDataFrame(gtfs_data, row, shapes_dict, stop, edge):
491
    shape_list = [sec_shape for sec_shape, main_shape in shapes_dict.items() if main_shape == row.shape_id]
492
    gtfs_data.loc[(gtfs_data["stop_id"] == row.stop_id) &
493
                  (gtfs_data["shape_id"].isin(shape_list)),
494
                  "stop_item_id"] = stop
495
    gtfs_data.loc[(gtfs_data["stop_id"] == row.stop_id) &
496
                  (gtfs_data["shape_id"].isin(shape_list)),
497
                  "edge_id"] = edge
498

499

500
def getBestLane(net, lon, lat, radius, stop_length, center, edge_set, pt_class, last_pos=-1):
501
    # get edges near stop location
502
    x, y = net.convertLonLat2XY(lon, lat)
503
    edges = [e for e in net.getNeighboringEdges(x, y, radius, includeJunctions=False) if e[0].getID() in edge_set]
504
    # sort by distance but have edges longer than stop length first
505
    for edge, _ in sorted(edges, key=lambda x: (x[0].getLength() <= stop_length, x[1])):
506
        for lane in edge.getLanes():
507
            if lane.allows(pt_class):
508
                pos = lane.getClosestLanePosAndDist((x, y))[0]
509
                if pos > last_pos or edge.getID() != edge_set[0]:
510
                    start = max(0, pos - (stop_length / 2. if center else stop_length))
511
                    end = min(start + stop_length, lane.getLength())
512
                    return lane.getID(), start, end
513
    return None
514

515

516
def getAccess(net, lon, lat, radius, lane_id, max_access=10):
517
    x, y = net.convertLonLat2XY(lon, lat)
518
    lane = net.getLane(lane_id)
519
    access = []
520
    if not lane.getEdge().allows("pedestrian"):
521
        for access_edge, _ in sorted(net.getNeighboringEdges(x, y, radius), key=lambda i: i[1]):
522
            if access_edge.allows("pedestrian"):
523
                access_lane_idx, access_pos, access_dist = access_edge.getClosestLanePosDist((x, y))
524
                if not access_edge.getLane(access_lane_idx).allows("pedestrian"):
525
                    for idx, lane in enumerate(access_edge.getLanes()):
526
                        if lane.allows("pedestrian"):
527
                            access_lane_idx = idx
528
                            break
529
                access.append((u'        <access friendlyPos="true" lane="%s_%s" pos="%.2f" length="%.2f"/>\n') %
530
                              (access_edge.getID(), access_lane_idx, access_pos, 1.5 * access_dist))
531
                if len(access) == max_access:
532
                    break
533
    return access
534

535

536
@benchmark
537
def map_gtfs_osm(options, net, osm_routes, gtfs_data, shapes, shapes_dict, filtered_stops):
538
    """
539
    Maps the routes from gtfs with the sumo routes imported from osm and maps
540
    the gtfs stops with the lane and position in sumo.
541
    """
542
    if options.verbose:
543
        print("Map stops and routes")
544

545
    map_routes = {}
546
    map_stops = {}
547
    # gtfs stops are grouped (not in exact geo position), so a large radius
548
    # for mapping is needed
549
    radius = 200
550

551
    missing_stops = []
552
    missing_lines = []
553
    stop_items = defaultdict(list)
554

555
    # get different permutations of stop names, and assign the collection of all stop names in route to the stop
556
    filtered_stops['stop_name'] = [[x] + re.split(r', | ,|,', x) + [x.replace(',', '')]
557
                                   for x in filtered_stops['stop_name']]
558
    filtered_shapes = filtered_stops.groupby(['shape_id', 'route_short_name',
559
                                              'route_type', 'direction_id']).stop_name.aggregate("sum").reset_index(
560
        name='stop_name_all')
561
    filtered_stops = pd.merge(filtered_stops, filtered_shapes)
562

563
    for row in filtered_stops.itertuples():
564
        # check if gtfs route already mapped to osm route
565
        if row.shape_id not in map_routes:
566
            # if route not mapped, find the osm route for shape id
567
            pt_line_name = row.route_short_name
568
            pt_type = GTFS2OSM_MODES[row.route_type]
569

570
            # get shape definition and define pt direction
571
            aux_shapes = shapes[shapes['shape_id'] == row.shape_id]
572
            pt_orig = aux_shapes[aux_shapes.shape_pt_sequence == aux_shapes.shape_pt_sequence.min()]
573
            pt_dest = aux_shapes[aux_shapes.shape_pt_sequence == aux_shapes.shape_pt_sequence.max()]
574
            line_dir = get_line_dir((pt_orig.shape_pt_lon.iloc[0], pt_orig.shape_pt_lat.iloc[0]),
575
                                    (pt_dest.shape_pt_lon.iloc[0], pt_dest.shape_pt_lat.iloc[0]))
576

577
            # get osm lines with same route name and pt type,
578
            # and if they have at least one matching stop name in osm and gtfs routes
579
            osm_lines = [(abs(line_dir - value[3]), ptline_id, value[4], value[5])
580
                         for ptline_id, value in osm_routes.items()
581
                         if value[1] == pt_line_name and value[2] == pt_type]
582
# if value[1] == pt_line_name and value[2] in OSM2OSM_MODES[pt_type] and set(value[6]) & set(row.stop_name_all)]
583
            if osm_lines:
584
                # get the direction for the found routes and take the route
585
                # with lower difference
586
                diff, osm_id, color, edges = min(osm_lines, key=lambda x: x[0] if x[0] < 180 else 360 - x[0])
587
                d = diff if diff < 180 else 360 - diff
588
                if d < 160:  # to prevent mapping to route going the opposite direction
589
                    # add mapped osm route to dict
590
                    map_routes[row.shape_id] = (osm_id, edges.split(), color)
591
                else:
592
                    missing_lines.append((row.route_id, pt_line_name,  sumolib.xml.quoteattr(
593
                        str(row.trip_headsign), True), row.direction_id))
594
                    continue
595
            else:
596
                # no osm route found, do not map stops of route
597
                missing_lines.append((row.route_id, pt_line_name,  sumolib.xml.quoteattr(
598
                    str(row.trip_headsign), True), row.direction_id))
599
                continue
600

601
        # set stop's type, class and length
602
        pt_type = GTFS2OSM_MODES[row.route_type]
603
        pt_class = OSM2SUMO_MODES[pt_type]
604
        if pt_class == "bus":
605
            stop_length = options.bus_stop_length
606
        elif pt_class == "tram":
607
            stop_length = options.tram_stop_length
608
        else:
609
            stop_length = options.train_stop_length
610

611
        stop_mapped = False
612
        for stop in stop_items[row.stop_id]:
613
            # for item of mapped stop
614
            stop_edge = map_stops[stop][1].rsplit("_", 1)[0]
615
            if stop_edge in map_routes[row.shape_id][1]:
616
                # if edge in route, the stops are the same
617
                # intersect the edge set
618
                map_stops[stop][6] = map_stops[stop][6] & set(map_routes[row.shape_id][1])
619
            else:
620
                # check if the wrong edge was adopted
621
                edge_inter = set(map_routes[row.shape_id][1]) & map_stops[stop][6]
622
                best = getBestLane(net, row.stop_lon, row.stop_lat, radius,
623
                                   stop_length, options.center_stops, edge_inter, pt_class)
624
                if best is None:
625
                    continue
626
                # update the lane id, start and end and add shape
627
                lane_id, start, end = best
628
                access = getAccess(net, row.stop_lon, row.stop_lat, 100, lane_id)
629
                map_stops[stop][1:7] = [lane_id, start, end, access, pt_type, edge_inter]
630
                # update edge in data frame
631
                stop_edge = lane_id.rsplit("_", 1)[0]
632
                gtfs_data.loc[gtfs_data["stop_item_id"] == stop, "edge_id"] = stop_edge
633
            # add to data frame
634
            _addToDataFrame(gtfs_data, row, shapes_dict, stop, stop_edge)
635
            stop_mapped = True
636
            break
637

638
        # if stop not mapped
639
        if not stop_mapped:
640
            edge_inter = set(map_routes[row.shape_id][1])
641
            best = getBestLane(net, row.stop_lon, row.stop_lat, radius,
642
                               stop_length, options.center_stops, edge_inter, pt_class)
643
            if best is not None:
644
                lane_id, start, end = best
645
                access = getAccess(net, row.stop_lon, row.stop_lat, 100, lane_id)
646
                stop_item_id = "%s_%s" % (row.stop_id, len(stop_items[row.stop_id]))
647
                stop_items[row.stop_id].append(stop_item_id)
648
                map_stops[stop_item_id] = [sumolib.xml.quoteattr(row.stop_name[0], True),
649
                                           lane_id, start, end, access, pt_type, edge_inter]
650
                _addToDataFrame(gtfs_data, row, shapes_dict, stop_item_id, lane_id.split("_")[0])
651
                stop_mapped = True
652

653
        # if stop not mapped, add to missing stops
654
        if not stop_mapped:
655
            missing_stops.append((row.stop_id, sumolib.xml.quoteattr(
656
                row.stop_name[0], True), row.route_short_name, row.direction_id))
657
#    pprint(map_routes)
658
#    pprint(map_stops)
659
    return map_routes, map_stops, missing_stops, missing_lines
660

661

662
def write_vtypes(options, seen=None):
663
    if options.vtype_output:
664
        with sumolib.openz(options.vtype_output, mode='w') as vout:
665
            sumolib.xml.writeHeader(vout, root="additional", options=options)
666
            for osm_type, sumo_class in sorted(OSM2SUMO_MODES.items()):
667
                if osm_type in options.modes and (seen is None or osm_type in seen):
668
                    vout.write(u'    <vType id="%s" vClass="%s"/>\n' %
669
                               (osm_type, sumo_class))
670
            vout.write(u'</additional>\n')
671

672

673
def write_gtfs_osm_outputs(options, map_routes, map_stops, missing_stops, missing_lines,
674
                           gtfs_data, trip_list, shapes_dict, net):
675
    """
676
    Generates stops and routes for sumo and saves the unmapped elements.
677
    """
678
    if options.verbose:
679
        print("Generates stops and routes output")
680

681
    # determine if we need to format times (depart, duration, until) to be human readable or whole seconds
682
    ft = humanReadableTime if "hrtime" in options and options.hrtime else int
683

684
    with sumolib.openz(options.additional_output, mode='w') as output_file:
685
        sumolib.xml.writeHeader(output_file, root="additional", options=options)
686
        for stop, value in sorted(map_stops.items()):
687
            name, lane, start_pos, end_pos, access, v_type = value[:6]
688
            typ = "busStop" if v_type == "bus" else "trainStop"
689
            output_file.write(u'    <%s id="%s" lane="%s" startPos="%.2f" endPos="%.2f" name=%s friendlyPos="true"%s>\n' %  # noqa
690
                              (typ, stop, lane, start_pos, end_pos, name, "" if access else "/"))
691
            for a in access:
692
                output_file.write(a)
693
            if access:
694
                output_file.write(u'    </%s>\n' % typ)
695
        output_file.write(u'</additional>\n')
696

697
    sequence_errors = []
698
    write_vtypes(options)
699

700
    with sumolib.openz(options.route_output, mode='w') as output_file:
701
        sumolib.xml.writeHeader(output_file, root="routes", options=options)
702
        numDays = int(options.end) // 86401
703
        start_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.begin)))
704
        shapes_written = set()
705

706
        for day in range(numDays+1):
707
            if day == numDays and options.end % 86400 > 0:
708
                # if last day, filter trips until given end time
709
                end_time = pd.to_timedelta(time.strftime('%H:%M:%S', time.gmtime(options.end-86400*numDays)))
710
                trip_list = trip_list[trip_list["arrival_fixed"] <= end_time]
711

712
            seqs = {}
713
            for row in trip_list.sort_values("arrival_fixed").itertuples():
714

715
                if day != 0 and row.trip_id.endswith(".trimmed"):
716
                    # only add trimmed trips the first day
717
                    continue
718

719
                if day == 0 and row.arrival_fixed < start_time:
720
                    # avoid writing first day trips that not applied
721
                    continue
722

723
                main_shape = shapes_dict.get(row.shape_id)
724
                if main_shape not in map_routes:
725
                    # if route not mapped
726
                    continue
727
                pt_color = map_routes[main_shape][2]
728
                if pt_color is None:
729
                    pt_color = ""
730
                else:
731
                    pt_color = ' color="%s"' % pt_color
732
                pt_type = GTFS2OSM_MODES[row.route_type]
733
                edges_list = map_routes[main_shape][1]
734
                stop_list = gtfs_data[gtfs_data["trip_id"] == row.trip_id].sort_values("stop_sequence")
735
                stop_index = [edges_list.index(stop.edge_id)
736
                              for stop in stop_list.itertuples()
737
                              if stop.edge_id in edges_list]
738

739
                if len(stop_index) < options.min_stops:
740
                    # Not enough stops mapped
741
                    continue
742

743
                if main_shape not in shapes_written:
744
                    output_file.write(u'    <route id="%s" edges="%s"/>\n' % (main_shape, " ".join(edges_list)))
745
                    shapes_written.add(main_shape)
746

747
                stopSeq = tuple([stop.stop_item_id for stop in stop_list.itertuples()])
748
                if stopSeq not in seqs:
749
                    seqs[stopSeq] = row.trip_id
750

751
                # determine departure from first valid stop
752
                depart = None
753
                for stop in stop_list.itertuples():
754
                    if stop.stop_item_id:
755
                        depart = ft(parseTime(str(stop.arrival_fixed.days + day) +
756
                                    ":" + str(stop.arrival_fixed).split(' ')[2]))
757
                        break
758

759
                veh_attr = (row.trip_id, day,
760
                            main_shape, row.route_id, seqs[stopSeq], depart,
761
                            min(stop_index), max(stop_index), pt_type, pt_color)
762
                output_file.write(u'    <vehicle id="%s.%s" route="%s" line="%s_%s" depart="%s" departEdge="%s" arrivalEdge="%s" type="%s"%s>\n' % veh_attr)  # noqa
763
                params = [("gtfs.route_name", row.route_short_name)]
764
                if row.trip_headsign:
765
                    params.append(("gtfs.trip_headsign", row.trip_headsign))
766
                if options.writeTerminals:
767
                    firstStop = stop_list.iloc[0]
768
                    lastStop = stop_list.iloc[-1]
769
                    firstDepart = parseTime(str(firstStop.departure_fixed.days + day) +
770
                                            ":" + str(firstStop.departure_fixed).split(' ')[2])
771
                    lastArrival = parseTime(str(lastStop.arrival_fixed.days + day) +
772
                                            ":" + str(lastStop.arrival_fixed).split(' ')[2])
773
                    params += [("gtfs.origin_stop", firstStop.stop_name),
774
                               ("gtfs.origin_depart", ft(firstDepart)),
775
                               ("gtfs.destination_stop", lastStop.stop_name),
776
                               ("gtfs.destination_arrrival", ft(lastArrival))]
777
                for k, v in params:
778
                    output_file.write(u'        <param key="%s" value=%s/>\n' % (
779
                        k, sumolib.xml.quoteattr(str(v), True)))
780

781
                check_seq = -1
782
                for stop in stop_list.itertuples():
783
                    if not stop.stop_item_id:
784
                        # if stop not mapped
785
                        continue
786
                    stop_index = edges_list.index(stop.edge_id)
787
                    if stop_index >= check_seq:
788
                        check_seq = stop_index
789
                        # TODO check stop position if we are on the same edge as before
790
                        stop_attr = (stop.stop_item_id,
791
                                     ft(parseTime(str(stop.arrival_fixed.days + day) +
792
                                        ":" + str(stop.arrival_fixed).split(' ')[2])),
793
                                     ft(options.duration) if options.duration > 60 else options.duration,
794
                                     ft(parseTime(str(stop.departure_fixed.days + day) +
795
                                        ":" + str(stop.departure_fixed).split(' ')[2])),
796
                                     stop.stop_sequence, stop_list.stop_sequence.max(),
797
                                     sumolib.xml.quoteattr(stop.stop_name, True))
798
                        output_file.write(u'        <stop busStop="%s" arrival="%s" duration="%s" until="%s"/><!--stopSequence="%s/%s" %s-->\n' % stop_attr)  # noqa
799
                    elif stop_index < check_seq:
800
                        # stop not downstream
801
                        sequence_errors.append((stop.stop_item_id, sumolib.xml.quoteattr(stop.stop_name, True),
802
                                                row.route_short_name,
803
                                                sumolib.xml.quoteattr(str(row.trip_headsign), True), row.direction_id,
804
                                                stop.trip_id))
805

806
                output_file.write(u'    </vehicle>\n')
807
        output_file.write(u'</routes>\n')
808

809
    # -----------------------   Save missing data ------------------
810
    if any([missing_stops, missing_lines, sequence_errors]):
811
        print("Not all given gtfs elements have been mapped, see %s for more information" % options.warning_output)
812
        with io.open(options.warning_output, 'w', encoding="utf8") as output_file:
813
            sumolib.xml.writeHeader(output_file, root="missingElements", rootAttrs=None, options=options)
814
            for stop in sorted(set(missing_stops)):
815
                output_file.write(u'    <stop id="%s" name=%s ptLine="%s" direction_id="%s"/>\n' % stop)
816
            for line in sorted(set(missing_lines)):
817
                output_file.write(u'    <ptLine id="%s" name="%s" trip_headsign=%s direction_id="%s"/>\n' % line)
818
            for stop in sorted(set(sequence_errors)):
819
                output_file.write(u'    <stopSequence stop_id="%s" stop_name=%s ptLine="%s" trip_headsign=%s direction_id="%s" trip_id="%s"/>\n' % stop)  # noqa
820
            output_file.write(u'</missingElements>\n')
821

822
Product

Resources

Company