CoCalc -- attributeStats.py

GitHub Repository: eclipse/sumo
Path: blob/main/tools/output/attributeStats.py
¹⁹³⁷³⁵ views
1
#!/usr/bin/env python
2
# Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
3
# Copyright (C) 2014-2026 German Aerospace Center (DLR) and others.
4
# This program and the accompanying materials are made available under the
5
# terms of the Eclipse Public License 2.0 which is available at
6
# https://www.eclipse.org/legal/epl-2.0/
7
# This Source Code may also be made available under the following Secondary
8
# Licenses when the conditions for such availability set forth in the Eclipse
9
# Public License 2.0 are satisfied: GNU General Public License, version 2
10
# or later which is available at
11
# https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12
# SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13

14
# @file    attributeStats.py
15
# @author  Jakob Erdmann
16
# @date    2019-04-11
17

18
"""
19
compute statistics for a specific xml attribute (e.g. timeLoss in tripinfo-output)
20
"""
21
from __future__ import absolute_import
22
from __future__ import print_function
23

24
import os
25
import sys
26
from collections import defaultdict
27
from lxml import etree as ET
28

29
if 'SUMO_HOME' in os.environ:
30
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
31
    sys.path.append(os.path.join(tools))
32
    import sumolib
33
    from sumolib.xml import parse_fast
34
    from sumolib.miscutils import Statistics, humanReadableTime
35
    from sumolib.statistics import setPrecision, identity
36
else:
37
    sys.exit("please declare environment variable 'SUMO_HOME'")
38

39

40
def get_options():
41
    op = sumolib.options.ArgumentParser()
42
    op.add_argument("datafiles", nargs="+", category="input", type=op.file,
43
                    help="XML files to analyze")
44
    op.add_argument("-v", "--verbose", action="store_true", default=False,
45
                    help="Give more output")
46
    op.add_argument("-e", "--element", category="input",
47
                    help="element to analyze")
48
    op.add_argument("-a", "--attribute", category="input",
49
                    help="attribute to analyze")
50
    op.add_argument("-i", "--id-attribute", dest="idAttr", default="id", category="input",
51
                    help="attribute to identify data elements")
52
    op.add_argument("-b", "--binwidth", type=float, default=50, category="processing",
53
                    help="binning width of result histogram")
54
    op.add_argument("--hist-output", category="output",
55
                    help="output file for histogram (gnuplot compatible)")
56
    op.add_argument("-o", "--full-output", category="output",
57
                    help="output file for full data dump")
58
    op.add_argument("-x", "--xml-output", category="output", type=op.file,
59
                    help="output statistic to xml file")
60
    op.add_argument("--xml-output.flat", action="store_true", dest="xmlFlat", default=False, category="output",
61
                    help="legacy xml output")
62
    op.add_argument("-q", "--fast", action="store_true", default=False, category="processing",
63
                    help="use fast parser (does not track missing data)")
64
    op.add_argument("-p", "--precision", type=int, default=2, category="output",
65
                    help="Set output precision")
66
    op.add_argument("--abs", default=False, action="store_true",
67
                    help="include statistics on absolute values")
68
    op.add_argument("--sum", default=False, action="store_true",
69
                    help="include sum of values")
70
    op.add_argument("-H", "--human-readable-time", dest="hrTime", default=False, action="store_true", category="output",
71
                    help="interpret values as times and write them as h:m:s")
72
    options = op.parse_args()
73

74
    if options.attribute:
75
        options.attribute = options.attribute.split(',')
76
    if options.element:
77
        options.element = options.element.split(',')
78

79
    if options.fast:
80
        if options.attribute is None:
81
            print("Parsing all attributes is not supported when using option --fast")
82
            sys.exit()
83
        if options.element is None:
84
            print("Parsing all elements is not supported when using option --fast")
85
            sys.exit()
86
        if len(options.element) > 1:
87
            print("Parsing multiple elements is not supported when using option --fast")
88
            sys.exit()
89

90
    return options
91

92

93
def main():
94
    options = get_options()
95

96
    vals = defaultdict(lambda: defaultdict(list))
97
    allStats = dict()
98
    missingAttr = defaultdict(set)
99
    invalidType = defaultdict(set)
100
    formatter = humanReadableTime if options.hrTime else identity
101

102
    if options.fast:
103
        assert len(options.element) == 1
104
        elem = options.element[0]
105

106
        def elements():
107
            for datafile in options.datafiles:
108
                for element in parse_fast(datafile, elem, [options.idAttr] + options.attribute):
109
                    for attr in options.attribute:
110
                        yield elem, attr, getattr(element, attr), getattr(element, options.idAttr),
111
    else:
112
        def elements():
113
            for datafile in options.datafiles:
114
                defaultID = None if len(options.datafiles) == 1 else datafile
115

116
                with sumolib.openz(datafile, 'rb') as f:
117
                    for _, node in ET.iterparse(f):
118
                        if options.element is not None and node.tag not in options.element:
119
                            continue
120
                        elementID = node.get(options.idAttr, defaultID)
121
                        if options.attribute is None:
122
                            for k, v in node.items():
123
                                if k != options.idAttr:
124
                                    yield node.tag, k, v, elementID
125
                        else:
126
                            for attr in options.attribute:
127
                                yield node.tag, attr, node.get(attr), elementID
128

129
    for tag, attr, stringVal, elementID in elements():
130
        if stringVal is not None:
131
            try:
132
                if '_' in stringVal:
133
                    # float() accepts '_' but this doesn't play nice with lane ids
134
                    raise Exception
135
                val = sumolib.miscutils.parseTime(stringVal)
136
                vals[elementID][attr].append(val)
137
                key = (tag, attr)
138
                if key not in allStats:
139
                    allStats[key] = Statistics("%s %s" % (tag, attr),
140
                                               histogram=options.binwidth > 0, scale=options.binwidth,
141
                                               printDev=True, abs=options.abs, printSum=options.sum)
142

143
                stats = allStats[key]
144
                stats.add(val, elementID)
145
            except Exception:
146
                invalidType[attr].add(stringVal)
147
        else:
148
            if elementID is not None:
149
                missingAttr[attr].add(elementID)
150

151
    histStyle = 1 if len(allStats) == 1 else 0
152
    for key in sorted(allStats.keys()):
153
        print(allStats[key].toString(options.precision, histStyle=histStyle, fmt=formatter))
154

155
    if missingAttr:
156
        for attr in sorted(missingAttr.keys()):
157
            print("%s elements did not provide attribute '%s' Example ids: '%s'" %
158
                  (len(missingAttr[attr]), attr, "', '".join(sorted(missingAttr[attr])[:10])))
159

160
    if invalidType and options.attribute is not None:
161
        for attr in sorted(invalidType.keys()):
162
            print(("%s distinct values of attribute '%s' could not be interpreted " +
163
                   "as numerical value or time. Example values: '%s'") %
164
                  (len(invalidType[attr]), attr, "', '".join(sorted(invalidType[attr])[:10])))
165

166
    if options.hist_output is not None:
167
        for key in sorted(allStats.keys()):
168
            fname = options.hist_output if len(allStats) == 1 else options.hist_output + ".%s.%s" % key
169
            with open(fname, 'w') as f:
170
                for bin, count in allStats[key].histogram():
171
                    f.write("%s %s\n" % (bin, count))
172

173
    if options.full_output is not None:
174
        with open(options.full_output, 'w') as f:
175
            for elementID in sorted(vals.keys()):
176
                for attr, data in vals[elementID].items():
177
                    if len(vals[elementID]) > 1:
178
                        f.write("# %s\n" % attr)
179
                    for x in data:
180
                        f.write(setPrecision("%.2f %s\n", options.precision) % (x, elementID))
181

182
    if options.xml_output is not None:
183
        with open(options.xml_output, 'w') as f:
184
            sumolib.writeXMLHeader(f, "$Id$", "attributeStats")  # noqa
185
            if options.xmlFlat:
186
                for key in sorted(allStats.keys()):
187
                    f.write(allStats[key].toXML(options.precision, fmt=formatter))
188

189
            else:
190
                elemKeys = defaultdict(list)
191
                for key in allStats.keys():
192
                    elemKeys[key[0]].append(key)
193
                for elem in sorted(elemKeys.keys()):
194
                    f.write('    <%s>\n' % elem)
195
                    for key in sorted(elemKeys[elem]):
196
                        attr = key[1]
197
                        stats = allStats[key]
198
                        f.write(stats.toXML(options.precision, tag=attr, indent=8, label='', fmt=formatter))
199
                    f.write('    </%s>\n' % elem)
200
            f.write('</attributeStats>\n')
201

202

203
if __name__ == "__main__":
204
    main()
205

206
Product

Resources

Company