Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
eclipse
GitHub Repository: eclipse/sumo
Path: blob/main/tools/purgatory/osmPopulationExtractor.py
169673 views
1
#!/usr/bin/env python
2
# Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
3
# Copyright (C) 2013-2025 German Aerospace Center (DLR) and others.
4
# This program and the accompanying materials are made available under the
5
# terms of the Eclipse Public License 2.0 which is available at
6
# https://www.eclipse.org/legal/epl-2.0/
7
# This Source Code may also be made available under the following Secondary
8
# Licenses when the conditions for such availability set forth in the Eclipse
9
# Public License 2.0 are satisfied: GNU General Public License, version 2
10
# or later which is available at
11
# https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12
# SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13
14
# @file osmPopulationExtractor.py
15
# @author Yun-Pang Floetteroed
16
# @author Melanie Knocke
17
# @author Michael Behrisch
18
# @date 2013-02-08
19
20
"""
21
This script is to
22
- extract the population data from a given Open Street Map (OSM).
23
- match the population data from OSM and BSA (with csv format)
24
The redundant information is removed and saved in the output file *_redundantOSMData.txt.
25
If there are data entries without names, they will be saved in *_nameNone.txt.
26
"""
27
from __future__ import absolute_import
28
from __future__ import print_function
29
30
import os
31
import sys
32
from xml.sax import make_parser, handler
33
from optparse import OptionParser
34
35
36
class Net():
37
38
def __init__(self):
39
self._relations = []
40
self._nodes = []
41
self._nodeMap = {}
42
self._relationMap = {}
43
self._uidNodeMap = {}
44
self._uidRelationMap = {}
45
46
def addNode(self, id, lat, lon, population):
47
if id not in self._nodeMap:
48
node = Node(id, lat, lon, population)
49
self._nodes.append(node)
50
self._nodeMap[id] = node
51
return self._nodeMap[id]
52
53
def addRelation(self, id, uid, population):
54
if id not in self._relationMap:
55
relation = Relation(id, uid, population)
56
self._relations.append(relation)
57
self._relationMap[id] = relation
58
59
return self._relationMap[id]
60
61
62
class Node:
63
64
"""
65
This class is to store node information.
66
"""
67
68
def __init__(self, id, lat, lon, population):
69
self.id = id
70
self.lat = lat
71
self.lon = lon
72
self.attribute = "node"
73
self.uid = None
74
self.place = None
75
self.name = None
76
self.population = population
77
78
def __repr__(self):
79
return "<%s|%s_%s_%s>" % (self.id, self.name, self.uid, self.population)
80
81
82
class Relation:
83
84
"""
85
This class is to store relation information.
86
"""
87
88
def __init__(self, id, uid, population):
89
self.id = id
90
self.uid = uid
91
self.attribute = "relation"
92
self.name = None
93
self.type = None
94
self.population = population
95
96
def __repr__(self):
97
return "%s|%s_%s_%s" % (self.id, self.name, self.uid, self.population)
98
99
100
class PopulationReader(handler.ContentHandler):
101
102
"""The class is for parsing the OSM XML file.
103
The data parsed is written into the net.
104
"""
105
106
def __init__(self, net, foutredundant, encoding):
107
self._net = net
108
self._nodeId = None
109
self._nodeObj = None
110
self._nodeLat = None
111
self._nodeLon = None
112
self._nodeuid = None
113
self._place = None
114
self._relationId = None
115
self._relationObj = None
116
self._relationuid = None
117
self._type = None
118
self._name = None
119
self._population = None
120
self._nodeNamesList = []
121
self._fout = foutredundant
122
self._encoding = encoding
123
124
def startElement(self, name, attrs):
125
if name == 'node':
126
self._nodeId = attrs['id']
127
self._nodeLat = attrs['lat']
128
self._nodeLon = attrs['lon']
129
if 'uid' in attrs:
130
self._nodeuid = attrs['uid']
131
if self._nodeId and name == 'tag':
132
if attrs['k'] == 'name':
133
self._name = attrs['v']
134
if not self._name and attrs['k'] == 'name:de':
135
self._name = attrs['v']
136
if not self._name and attrs['k'] == 'openGeoDB:name':
137
self._name = attrs['v']
138
if attrs['k'] == 'place':
139
self._place = attrs['v']
140
if not self._population and attrs['k'] == 'openGeoDB:population':
141
self._population = attrs['v']
142
if not self._population and attrs['k'] == 'population':
143
self._population = attrs['v']
144
if name == 'relation':
145
self._relationId = attrs['id']
146
self._uid = attrs['uid']
147
if 'uid' in attrs:
148
self._relationuid = attrs['uid']
149
if self._relationId and name == 'tag':
150
if attrs['k'] == 'name':
151
self._name = attrs['v']
152
if attrs['k'] == 'type':
153
self._type = attrs['v']
154
if not self._population and attrs['k'] == 'openGeoDB:population':
155
self._population = attrs['v']
156
if not self._population and attrs['k'] == 'population':
157
self._population = attrs['v']
158
159
def endElement(self, name):
160
if name == 'node' and self._population:
161
newInput = True
162
for n in self._net._nodes:
163
# diffLat = abs(float(self._nodeLat) - float(n.lat))
164
# diffLon = abs(float(self._nodeLon) - float(n.lon))
165
# and diffLat < 0.003 and diffLon < 0.003 and
166
# int(self._population) == int(n.population):
167
if self._name and self._name == n.name and self._population == n.population:
168
newInput = False
169
self._fout.write(('node\t%s\t%s\t%s\t%s\t%s\n' % (
170
self._name, self._nodeId, self._nodeLat, self._nodeLon,
171
self._population)).encode(self._encoding))
172
break
173
if newInput:
174
self._nodeObj = self._net.addNode(
175
self._nodeId, self._nodeLat, self._nodeLon, self._population)
176
if self._nodeuid:
177
self._nodeObj.uid = self._nodeuid
178
if self._nodeuid not in self._net._uidNodeMap:
179
self._net._uidNodeMap[self._nodeuid] = []
180
self._net._uidNodeMap[self._nodeuid].append(self._nodeObj)
181
if self._name:
182
self._nodeObj.name = self._name
183
if self._place:
184
self._nodeObj.place = self._place
185
self._nodeId = None
186
self._nodeObj = None
187
self._nodeLat = None
188
self._nodeLon = None
189
self._nodeuid = None
190
self._place = None
191
self._name = None
192
self._population = None
193
194
if name == 'relation' and self._population:
195
newInput = True
196
for r in self._net._relations:
197
if self._name and self._name == r.name and self._population == r.population:
198
newInput = False
199
self._fout.write(('relation\t%s\t%s\t%s\t%s\n' % (
200
self._name, self._relationId, self._relationuid, self._population)).encode(options.encoding))
201
break
202
if newInput:
203
self._relationObj = self._net.addRelation(
204
self._relationId, self._relationuid, self._population)
205
self._relationObj.population = self._population
206
if self._relationuid not in self._net._uidRelationMap:
207
self._net._uidRelationMap[self._relationuid] = []
208
self._net._uidRelationMap[
209
self._relationuid].append(self._relationObj)
210
211
if self._name:
212
self._relationObj.name = self._name
213
if self._type:
214
self._relationObj.place = self._type
215
self._relationId = None
216
self._relationObj = None
217
self._relationuid = None
218
self._type = None
219
self._name = None
220
self._population = None
221
222
223
def main():
224
parser = make_parser()
225
osmFile = options.osmfile
226
print('osmFile:', osmFile)
227
if options.bsafile:
228
bsaFile = options.bsafile
229
print('bsaFile:', bsaFile)
230
if options.outputfile:
231
prefix = options.outputfile
232
else:
233
prefix = osmFile.split('.')[0]
234
redundantDataFile = '%s_redundantOSMData.txt' % prefix
235
foutredundant = open(redundantDataFile, 'w')
236
net = Net()
237
parser.setContentHandler(
238
PopulationReader(net, foutredundant, options.encoding))
239
parser.parse(osmFile)
240
foutredundant.close()
241
print('finish with data parsing')
242
if options.generateoutputs:
243
print('write the population to the output file')
244
outputfile = '%s_populations.txt' % prefix
245
fout = open(outputfile, 'w')
246
fout.write("attribute\tid\tname\tuid\tpopulation\tlat\tlon\n")
247
for n in net._nodes:
248
fout.write(("%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
249
n.attribute, n.id, n.name, n.uid, n.population, n.lat, n.lon)).encode(options.encoding))
250
fout.close()
251
252
if os.path.exists(outputfile):
253
fout = open(outputfile, 'a')
254
else:
255
print("there is no file named %s", outputfile)
256
print("A new file will be open.")
257
fout = open(outputfile, 'w')
258
259
for r in net._relations:
260
fout.write(("%s\t%s\t%s\t%s\t%s\tNone\tNone\n" % (
261
r.attribute, r.id, r.name, r.uid, r.population)).encode(options.encoding))
262
fout.close()
263
264
fout = open('%s_nodesWithSameUid.txt' % prefix, 'w')
265
fout.write('nodeUid\tnodeId\tname\n')
266
for r in net._uidNodeMap:
267
fout.write('%s' % r)
268
for n in net._uidNodeMap[r]:
269
fout.write(
270
('\t%s\t%s' % (n.id, n.name)).encode(options.encoding))
271
fout.write('\n')
272
fout.close()
273
274
fout = open('%s_uidRelations.txt' % prefix, 'w')
275
fout.write('relationUid\trelationId\tname\n')
276
for r in net._uidRelationMap:
277
fout.write('%s' % r)
278
for n in net._uidRelationMap[r]:
279
fout.write(
280
('\t%s\t%s' % (n.id, n.name)).encode(options.encoding))
281
fout.write('\n')
282
fout.close()
283
284
if options.bsafile:
285
print('compare the data with the data from BSA')
286
bsaTotalCount = 0
287
matchedCount = 0
288
289
fout = open("%s_matchedAreas.txt" % prefix, 'w')
290
fout.write(
291
"#bsaName\tbsaArea\tbsaPop\tbsaLat\tbsaLon\tosmName\tosmAtt\tosmPop\tosmLat\tosmLon\n")
292
noneList = []
293
for line in open(options.bsafile):
294
if '#' not in line:
295
line = line.split('\n')[0]
296
line = line.split(';')
297
name = line[0].decode("latin1")
298
area = float(line[1])
299
pop = int(line[2])
300
lon = line[3]
301
lat = line[4]
302
bsaTotalCount += 1
303
304
for n in net._nodes:
305
if n.name is None and n not in noneList:
306
noneList.append(n)
307
# and n.name not in areasList:
308
elif n.name is not None and name == n.name:
309
matchedCount += 1
310
fout.write(("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
311
name, area, pop, lat, lon, n.name, n.attribute, n.population,
312
n.lat, n.lon)).encode(options.encoding))
313
314
for r in net._relations:
315
if r.name is None and r not in noneList:
316
noneList.append(r)
317
# and r.name not in areasList:
318
elif r.name is not None and name == r.name:
319
matchedCount += 1
320
fout.write(("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tNone\tNone\n" % (
321
name, area, pop, lat, lon, r.name, r.attribute, r.population)).encode(options.encoding))
322
fout.close()
323
if len(noneList) > 0:
324
foutnone = open("%s_nameNone.txt" % prefix, 'w')
325
foutnone.write("nodeId\tnodeName\tPopulation\tLat\tLon\n")
326
for n in noneList:
327
foutnone.write(("%s\t%s\t%s\t%s\t%s\n" % (
328
n.id, n.name, n.population, n.lat, n.lon)).encode(options.encoding))
329
foutnone.close()
330
# Duplicated data does not exist.
331
osmTotalCount = len(net._nodes) + len(net._relations)
332
print('matched count in OSM and BSA data:', matchedCount)
333
print('Number of entries in the BSA data:', bsaTotalCount)
334
print('Number of entries in the OSM data:', osmTotalCount)
335
336
337
optParser = OptionParser()
338
optParser.add_option("-s", "--osm-file", dest="osmfile",
339
help="read OSM file from FILE (mandatory)", metavar="FILE")
340
optParser.add_option("-b", "--bsa-file", dest="bsafile",
341
help="read population (in csv form) provided by German federal statistic authority " +
342
"(Bundesstatistikamt) from FILE", metavar="FILE")
343
optParser.add_option("-o", "--output-file", dest="outputfile",
344
help="define the prefix name of the output file")
345
optParser.add_option(
346
"-e", "--encoding", help="output file encoding (default: %default)", default="utf8")
347
optParser.add_option("-g", "--generate-outputs", dest="generateoutputs", action="store_true",
348
default=False, help="generate output files")
349
(options, args) = optParser.parse_args()
350
351
if not options.osmfile:
352
optParser.print_help()
353
sys.exit()
354
main()
355
356