Path: blob/main/tools/purgatory/osmPopulationExtractor.py
169673 views
#!/usr/bin/env python1# Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo2# Copyright (C) 2013-2025 German Aerospace Center (DLR) and others.3# This program and the accompanying materials are made available under the4# terms of the Eclipse Public License 2.0 which is available at5# https://www.eclipse.org/legal/epl-2.0/6# This Source Code may also be made available under the following Secondary7# Licenses when the conditions for such availability set forth in the Eclipse8# Public License 2.0 are satisfied: GNU General Public License, version 29# or later which is available at10# https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html11# SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later1213# @file osmPopulationExtractor.py14# @author Yun-Pang Floetteroed15# @author Melanie Knocke16# @author Michael Behrisch17# @date 2013-02-081819"""20This script is to21- extract the population data from a given Open Street Map (OSM).22- match the population data from OSM and BSA (with csv format)23The redundant information is removed and saved in the output file *_redundantOSMData.txt.24If there are data entries without names, they will be saved in *_nameNone.txt.25"""26from __future__ import absolute_import27from __future__ import print_function2829import os30import sys31from xml.sax import make_parser, handler32from optparse import OptionParser333435class Net():3637def __init__(self):38self._relations = []39self._nodes = []40self._nodeMap = {}41self._relationMap = {}42self._uidNodeMap = {}43self._uidRelationMap = {}4445def addNode(self, id, lat, lon, population):46if id not in self._nodeMap:47node = Node(id, lat, lon, population)48self._nodes.append(node)49self._nodeMap[id] = node50return self._nodeMap[id]5152def addRelation(self, id, uid, population):53if id not in self._relationMap:54relation = Relation(id, uid, population)55self._relations.append(relation)56self._relationMap[id] = relation5758return self._relationMap[id]596061class Node:6263"""64This class is to store node information.65"""6667def __init__(self, id, lat, lon, population):68self.id = id69self.lat = lat70self.lon = lon71self.attribute = "node"72self.uid = None73self.place = None74self.name = None75self.population = population7677def __repr__(self):78return "<%s|%s_%s_%s>" % (self.id, self.name, self.uid, self.population)798081class Relation:8283"""84This class is to store relation information.85"""8687def __init__(self, id, uid, population):88self.id = id89self.uid = uid90self.attribute = "relation"91self.name = None92self.type = None93self.population = population9495def __repr__(self):96return "%s|%s_%s_%s" % (self.id, self.name, self.uid, self.population)979899class PopulationReader(handler.ContentHandler):100101"""The class is for parsing the OSM XML file.102The data parsed is written into the net.103"""104105def __init__(self, net, foutredundant, encoding):106self._net = net107self._nodeId = None108self._nodeObj = None109self._nodeLat = None110self._nodeLon = None111self._nodeuid = None112self._place = None113self._relationId = None114self._relationObj = None115self._relationuid = None116self._type = None117self._name = None118self._population = None119self._nodeNamesList = []120self._fout = foutredundant121self._encoding = encoding122123def startElement(self, name, attrs):124if name == 'node':125self._nodeId = attrs['id']126self._nodeLat = attrs['lat']127self._nodeLon = attrs['lon']128if 'uid' in attrs:129self._nodeuid = attrs['uid']130if self._nodeId and name == 'tag':131if attrs['k'] == 'name':132self._name = attrs['v']133if not self._name and attrs['k'] == 'name:de':134self._name = attrs['v']135if not self._name and attrs['k'] == 'openGeoDB:name':136self._name = attrs['v']137if attrs['k'] == 'place':138self._place = attrs['v']139if not self._population and attrs['k'] == 'openGeoDB:population':140self._population = attrs['v']141if not self._population and attrs['k'] == 'population':142self._population = attrs['v']143if name == 'relation':144self._relationId = attrs['id']145self._uid = attrs['uid']146if 'uid' in attrs:147self._relationuid = attrs['uid']148if self._relationId and name == 'tag':149if attrs['k'] == 'name':150self._name = attrs['v']151if attrs['k'] == 'type':152self._type = attrs['v']153if not self._population and attrs['k'] == 'openGeoDB:population':154self._population = attrs['v']155if not self._population and attrs['k'] == 'population':156self._population = attrs['v']157158def endElement(self, name):159if name == 'node' and self._population:160newInput = True161for n in self._net._nodes:162# diffLat = abs(float(self._nodeLat) - float(n.lat))163# diffLon = abs(float(self._nodeLon) - float(n.lon))164# and diffLat < 0.003 and diffLon < 0.003 and165# int(self._population) == int(n.population):166if self._name and self._name == n.name and self._population == n.population:167newInput = False168self._fout.write(('node\t%s\t%s\t%s\t%s\t%s\n' % (169self._name, self._nodeId, self._nodeLat, self._nodeLon,170self._population)).encode(self._encoding))171break172if newInput:173self._nodeObj = self._net.addNode(174self._nodeId, self._nodeLat, self._nodeLon, self._population)175if self._nodeuid:176self._nodeObj.uid = self._nodeuid177if self._nodeuid not in self._net._uidNodeMap:178self._net._uidNodeMap[self._nodeuid] = []179self._net._uidNodeMap[self._nodeuid].append(self._nodeObj)180if self._name:181self._nodeObj.name = self._name182if self._place:183self._nodeObj.place = self._place184self._nodeId = None185self._nodeObj = None186self._nodeLat = None187self._nodeLon = None188self._nodeuid = None189self._place = None190self._name = None191self._population = None192193if name == 'relation' and self._population:194newInput = True195for r in self._net._relations:196if self._name and self._name == r.name and self._population == r.population:197newInput = False198self._fout.write(('relation\t%s\t%s\t%s\t%s\n' % (199self._name, self._relationId, self._relationuid, self._population)).encode(options.encoding))200break201if newInput:202self._relationObj = self._net.addRelation(203self._relationId, self._relationuid, self._population)204self._relationObj.population = self._population205if self._relationuid not in self._net._uidRelationMap:206self._net._uidRelationMap[self._relationuid] = []207self._net._uidRelationMap[208self._relationuid].append(self._relationObj)209210if self._name:211self._relationObj.name = self._name212if self._type:213self._relationObj.place = self._type214self._relationId = None215self._relationObj = None216self._relationuid = None217self._type = None218self._name = None219self._population = None220221222def main():223parser = make_parser()224osmFile = options.osmfile225print('osmFile:', osmFile)226if options.bsafile:227bsaFile = options.bsafile228print('bsaFile:', bsaFile)229if options.outputfile:230prefix = options.outputfile231else:232prefix = osmFile.split('.')[0]233redundantDataFile = '%s_redundantOSMData.txt' % prefix234foutredundant = open(redundantDataFile, 'w')235net = Net()236parser.setContentHandler(237PopulationReader(net, foutredundant, options.encoding))238parser.parse(osmFile)239foutredundant.close()240print('finish with data parsing')241if options.generateoutputs:242print('write the population to the output file')243outputfile = '%s_populations.txt' % prefix244fout = open(outputfile, 'w')245fout.write("attribute\tid\tname\tuid\tpopulation\tlat\tlon\n")246for n in net._nodes:247fout.write(("%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (248n.attribute, n.id, n.name, n.uid, n.population, n.lat, n.lon)).encode(options.encoding))249fout.close()250251if os.path.exists(outputfile):252fout = open(outputfile, 'a')253else:254print("there is no file named %s", outputfile)255print("A new file will be open.")256fout = open(outputfile, 'w')257258for r in net._relations:259fout.write(("%s\t%s\t%s\t%s\t%s\tNone\tNone\n" % (260r.attribute, r.id, r.name, r.uid, r.population)).encode(options.encoding))261fout.close()262263fout = open('%s_nodesWithSameUid.txt' % prefix, 'w')264fout.write('nodeUid\tnodeId\tname\n')265for r in net._uidNodeMap:266fout.write('%s' % r)267for n in net._uidNodeMap[r]:268fout.write(269('\t%s\t%s' % (n.id, n.name)).encode(options.encoding))270fout.write('\n')271fout.close()272273fout = open('%s_uidRelations.txt' % prefix, 'w')274fout.write('relationUid\trelationId\tname\n')275for r in net._uidRelationMap:276fout.write('%s' % r)277for n in net._uidRelationMap[r]:278fout.write(279('\t%s\t%s' % (n.id, n.name)).encode(options.encoding))280fout.write('\n')281fout.close()282283if options.bsafile:284print('compare the data with the data from BSA')285bsaTotalCount = 0286matchedCount = 0287288fout = open("%s_matchedAreas.txt" % prefix, 'w')289fout.write(290"#bsaName\tbsaArea\tbsaPop\tbsaLat\tbsaLon\tosmName\tosmAtt\tosmPop\tosmLat\tosmLon\n")291noneList = []292for line in open(options.bsafile):293if '#' not in line:294line = line.split('\n')[0]295line = line.split(';')296name = line[0].decode("latin1")297area = float(line[1])298pop = int(line[2])299lon = line[3]300lat = line[4]301bsaTotalCount += 1302303for n in net._nodes:304if n.name is None and n not in noneList:305noneList.append(n)306# and n.name not in areasList:307elif n.name is not None and name == n.name:308matchedCount += 1309fout.write(("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (310name, area, pop, lat, lon, n.name, n.attribute, n.population,311n.lat, n.lon)).encode(options.encoding))312313for r in net._relations:314if r.name is None and r not in noneList:315noneList.append(r)316# and r.name not in areasList:317elif r.name is not None and name == r.name:318matchedCount += 1319fout.write(("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tNone\tNone\n" % (320name, area, pop, lat, lon, r.name, r.attribute, r.population)).encode(options.encoding))321fout.close()322if len(noneList) > 0:323foutnone = open("%s_nameNone.txt" % prefix, 'w')324foutnone.write("nodeId\tnodeName\tPopulation\tLat\tLon\n")325for n in noneList:326foutnone.write(("%s\t%s\t%s\t%s\t%s\n" % (327n.id, n.name, n.population, n.lat, n.lon)).encode(options.encoding))328foutnone.close()329# Duplicated data does not exist.330osmTotalCount = len(net._nodes) + len(net._relations)331print('matched count in OSM and BSA data:', matchedCount)332print('Number of entries in the BSA data:', bsaTotalCount)333print('Number of entries in the OSM data:', osmTotalCount)334335336optParser = OptionParser()337optParser.add_option("-s", "--osm-file", dest="osmfile",338help="read OSM file from FILE (mandatory)", metavar="FILE")339optParser.add_option("-b", "--bsa-file", dest="bsafile",340help="read population (in csv form) provided by German federal statistic authority " +341"(Bundesstatistikamt) from FILE", metavar="FILE")342optParser.add_option("-o", "--output-file", dest="outputfile",343help="define the prefix name of the output file")344optParser.add_option(345"-e", "--encoding", help="output file encoding (default: %default)", default="utf8")346optParser.add_option("-g", "--generate-outputs", dest="generateoutputs", action="store_true",347default=False, help="generate output files")348(options, args) = optParser.parse_args()349350if not options.osmfile:351optParser.print_help()352sys.exit()353main()354355356