Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/ts/misc/trace_profiler.py
16339 views
1
from __future__ import print_function
2
3
import os
4
import sys
5
import csv
6
from pprint import pprint
7
from collections import deque
8
9
try:
10
long # Python 2
11
except NameError:
12
long = int # Python 3
13
14
# trace.hpp
15
REGION_FLAG_IMPL_MASK = 15 << 16
16
REGION_FLAG_IMPL_IPP = 1 << 16
17
REGION_FLAG_IMPL_OPENCL = 2 << 16
18
19
DEBUG = False
20
21
if DEBUG:
22
dprint = print
23
dpprint = pprint
24
else:
25
def dprint(args, **kwargs):
26
pass
27
def dpprint(args, **kwargs):
28
pass
29
30
def tryNum(s):
31
if s.startswith('0x'):
32
try:
33
return int(s, 16)
34
except ValueError:
35
pass
36
try:
37
return int(s)
38
except ValueError:
39
pass
40
if sys.version_info[0] < 3:
41
try:
42
return long(s)
43
except ValueError:
44
pass
45
return s
46
47
def formatTimestamp(t):
48
return "%.3f" % (t * 1e-6)
49
50
try:
51
from statistics import median
52
except ImportError:
53
def median(lst):
54
sortedLst = sorted(lst)
55
lstLen = len(lst)
56
index = (lstLen - 1) // 2
57
if (lstLen % 2):
58
return sortedLst[index]
59
else:
60
return (sortedLst[index] + sortedLst[index + 1]) * 0.5
61
62
def getCXXFunctionName(spec):
63
def dropParams(spec):
64
pos = len(spec) - 1
65
depth = 0
66
while pos >= 0:
67
if spec[pos] == ')':
68
depth = depth + 1
69
elif spec[pos] == '(':
70
depth = depth - 1
71
if depth == 0:
72
if pos == 0 or spec[pos - 1] in ['#', ':']:
73
res = dropParams(spec[pos+1:-1])
74
return (spec[:pos] + res[0], res[1])
75
return (spec[:pos], spec[pos:])
76
pos = pos - 1
77
return (spec, '')
78
79
def extractName(spec):
80
pos = len(spec) - 1
81
inName = False
82
while pos >= 0:
83
if spec[pos] == ' ':
84
if inName:
85
return spec[pos+1:]
86
elif spec[pos].isalnum():
87
inName = True
88
pos = pos - 1
89
return spec
90
91
if spec.startswith('IPP') or spec.startswith('OpenCL'):
92
prefix_size = len('IPP') if spec.startswith('IPP') else len('OpenCL')
93
prefix = spec[:prefix_size]
94
if prefix_size < len(spec) and spec[prefix_size] in ['#', ':']:
95
prefix = prefix + spec[prefix_size]
96
prefix_size = prefix_size + 1
97
begin = prefix_size
98
while begin < len(spec):
99
if spec[begin].isalnum() or spec[begin] in ['_', ':']:
100
break
101
begin = begin + 1
102
if begin == len(spec):
103
return spec
104
end = begin
105
while end < len(spec):
106
if not (spec[end].isalnum() or spec[end] in ['_', ':']):
107
break
108
end = end + 1
109
return prefix + spec[begin:end]
110
111
spec = spec.replace(') const', ')') # const methods
112
(ret_type_name, params) = dropParams(spec)
113
name = extractName(ret_type_name)
114
if 'operator' in name:
115
return name + params
116
if name.startswith('&'):
117
return name[1:]
118
return name
119
120
stack_size = 10
121
122
class Trace:
123
def __init__(self, filename=None):
124
self.tasks = {}
125
self.tasks_list = []
126
self.locations = {}
127
self.threads_stack = {}
128
self.pending_files = deque()
129
if filename:
130
self.load(filename)
131
132
class TraceTask:
133
def __init__(self, threadID, taskID, locationID, beginTimestamp):
134
self.threadID = threadID
135
self.taskID = taskID
136
self.locationID = locationID
137
self.beginTimestamp = beginTimestamp
138
self.endTimestamp = None
139
self.parentTaskID = None
140
self.parentThreadID = None
141
self.childTask = []
142
self.selfTimeIPP = 0
143
self.selfTimeOpenCL = 0
144
self.totalTimeIPP = 0
145
self.totalTimeOpenCL = 0
146
147
def __repr__(self):
148
return "TID={} ID={} loc={} parent={}:{} begin={} end={} IPP={}/{} OpenCL={}/{}".format(
149
self.threadID, self.taskID, self.locationID, self.parentThreadID, self.parentTaskID,
150
self.beginTimestamp, self.endTimestamp, self.totalTimeIPP, self.selfTimeIPP, self.totalTimeOpenCL, self.selfTimeOpenCL)
151
152
153
class TraceLocation:
154
def __init__(self, locationID, filename, line, name, flags):
155
self.locationID = locationID
156
self.filename = os.path.split(filename)[1]
157
self.line = line
158
self.name = getCXXFunctionName(name)
159
self.flags = flags
160
161
def __str__(self):
162
return "{}#{}:{}".format(self.name, self.filename, self.line)
163
164
def __repr__(self):
165
return "ID={} {}:{}:{}".format(self.locationID, self.filename, self.line, self.name)
166
167
def parse_file(self, filename):
168
dprint("Process file: '{}'".format(filename))
169
with open(filename) as infile:
170
for line in infile:
171
line = str(line).strip()
172
if line[0] == "#":
173
if line.startswith("#thread file:"):
174
name = str(line.split(':', 1)[1]).strip()
175
self.pending_files.append(os.path.join(os.path.split(filename)[0], name))
176
continue
177
self.parse_line(line)
178
179
def parse_line(self, line):
180
opts = line.split(',')
181
dpprint(opts)
182
if opts[0] == 'l':
183
opts = list(csv.reader([line]))[0] # process quote more
184
locationID = int(opts[1])
185
filename = str(opts[2])
186
line = int(opts[3])
187
name = opts[4]
188
flags = tryNum(opts[5])
189
self.locations[locationID] = self.TraceLocation(locationID, filename, line, name, flags)
190
return
191
extra_opts = {}
192
for e in opts[5:]:
193
if not '=' in e:
194
continue
195
(k, v) = e.split('=')
196
extra_opts[k] = tryNum(v)
197
if extra_opts:
198
dpprint(extra_opts)
199
threadID = None
200
taskID = None
201
locationID = None
202
ts = None
203
if opts[0] in ['b', 'e']:
204
threadID = int(opts[1])
205
taskID = int(opts[4])
206
locationID = int(opts[3])
207
ts = tryNum(opts[2])
208
thread_stack = None
209
currentTask = (None, None)
210
if threadID is not None:
211
if not threadID in self.threads_stack:
212
thread_stack = deque()
213
self.threads_stack[threadID] = thread_stack
214
else:
215
thread_stack = self.threads_stack[threadID]
216
currentTask = None if not thread_stack else thread_stack[-1]
217
t = (threadID, taskID)
218
if opts[0] == 'b':
219
assert not t in self.tasks, "Duplicate task: " + str(t) + repr(self.tasks[t])
220
task = self.TraceTask(threadID, taskID, locationID, ts)
221
self.tasks[t] = task
222
self.tasks_list.append(task)
223
thread_stack.append((threadID, taskID))
224
if currentTask:
225
task.parentThreadID = currentTask[0]
226
task.parentTaskID = currentTask[1]
227
if 'parentThread' in extra_opts:
228
task.parentThreadID = extra_opts['parentThread']
229
if 'parent' in extra_opts:
230
task.parentTaskID = extra_opts['parent']
231
if opts[0] == 'e':
232
task = self.tasks[t]
233
task.endTimestamp = ts
234
if 'tIPP' in extra_opts:
235
task.selfTimeIPP = extra_opts['tIPP']
236
if 'tOCL' in extra_opts:
237
task.selfTimeOpenCL = extra_opts['tOCL']
238
thread_stack.pop()
239
240
def load(self, filename):
241
self.pending_files.append(filename)
242
if DEBUG:
243
with open(filename, 'r') as f:
244
print(f.read(), end='')
245
while self.pending_files:
246
self.parse_file(self.pending_files.pop())
247
248
def getParentTask(self, task):
249
return self.tasks.get((task.parentThreadID, task.parentTaskID), None)
250
251
def process(self):
252
self.tasks_list.sort(key=lambda x: x.beginTimestamp)
253
254
parallel_for_location = None
255
for (id, l) in self.locations.items():
256
if l.name == 'parallel_for':
257
parallel_for_location = l.locationID
258
break
259
260
for task in self.tasks_list:
261
try:
262
task.duration = task.endTimestamp - task.beginTimestamp
263
task.selfDuration = task.duration
264
except:
265
task.duration = None
266
task.selfDuration = None
267
task.totalTimeIPP = task.selfTimeIPP
268
task.totalTimeOpenCL = task.selfTimeOpenCL
269
270
dpprint(self.tasks)
271
dprint("Calculate total times")
272
273
for task in self.tasks_list:
274
parentTask = self.getParentTask(task)
275
if parentTask:
276
parentTask.selfDuration = parentTask.selfDuration - task.duration
277
parentTask.childTask.append(task)
278
timeIPP = task.selfTimeIPP
279
timeOpenCL = task.selfTimeOpenCL
280
while parentTask:
281
if parentTask.locationID == parallel_for_location: # TODO parallel_for
282
break
283
parentLocation = self.locations[parentTask.locationID]
284
if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
285
parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
286
timeIPP = 0
287
else:
288
parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
289
if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
290
parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
291
timeOpenCL = 0
292
else:
293
parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
294
parentTask = self.getParentTask(parentTask)
295
296
dpprint(self.tasks)
297
dprint("Calculate total times (parallel_for)")
298
299
for task in self.tasks_list:
300
if task.locationID == parallel_for_location:
301
task.selfDuration = 0
302
childDuration = sum([t.duration for t in task.childTask])
303
if task.duration == 0 or childDuration == 0:
304
continue
305
timeCoef = task.duration / float(childDuration)
306
childTimeIPP = sum([t.totalTimeIPP for t in task.childTask])
307
childTimeOpenCL = sum([t.totalTimeOpenCL for t in task.childTask])
308
if childTimeIPP == 0 and childTimeOpenCL == 0:
309
continue
310
timeIPP = childTimeIPP * timeCoef
311
timeOpenCL = childTimeOpenCL * timeCoef
312
parentTask = task
313
while parentTask:
314
parentLocation = self.locations[parentTask.locationID]
315
if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
316
parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
317
timeIPP = 0
318
else:
319
parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
320
if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
321
parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
322
timeOpenCL = 0
323
else:
324
parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
325
parentTask = self.getParentTask(parentTask)
326
327
dpprint(self.tasks)
328
dprint("Done")
329
330
def dump(self, max_entries):
331
assert isinstance(max_entries, int)
332
333
class CallInfo():
334
def __init__(self, callID):
335
self.callID = callID
336
self.totalTimes = []
337
self.selfTimes = []
338
self.threads = set()
339
self.selfTimesIPP = []
340
self.selfTimesOpenCL = []
341
self.totalTimesIPP = []
342
self.totalTimesOpenCL = []
343
344
calls = {}
345
346
for currentTask in self.tasks_list:
347
task = currentTask
348
callID = []
349
for i in range(stack_size):
350
callID.append(task.locationID)
351
task = self.getParentTask(task)
352
if not task:
353
break
354
callID = tuple(callID)
355
if not callID in calls:
356
call = CallInfo(callID)
357
calls[callID] = call
358
else:
359
call = calls[callID]
360
call.totalTimes.append(currentTask.duration)
361
call.selfTimes.append(currentTask.selfDuration)
362
call.threads.add(currentTask.threadID)
363
call.selfTimesIPP.append(currentTask.selfTimeIPP)
364
call.selfTimesOpenCL.append(currentTask.selfTimeOpenCL)
365
call.totalTimesIPP.append(currentTask.totalTimeIPP)
366
call.totalTimesOpenCL.append(currentTask.totalTimeOpenCL)
367
368
dpprint(self.tasks)
369
dpprint(self.locations)
370
dpprint(calls)
371
372
calls_self_sum = {k: sum(v.selfTimes) for (k, v) in calls.items()}
373
calls_total_sum = {k: sum(v.totalTimes) for (k, v) in calls.items()}
374
calls_median = {k: median(v.selfTimes) for (k, v) in calls.items()}
375
calls_sorted = sorted(calls.keys(), key=lambda x: calls_self_sum[x], reverse=True)
376
377
calls_self_sum_IPP = {k: sum(v.selfTimesIPP) for (k, v) in calls.items()}
378
calls_total_sum_IPP = {k: sum(v.totalTimesIPP) for (k, v) in calls.items()}
379
380
calls_self_sum_OpenCL = {k: sum(v.selfTimesOpenCL) for (k, v) in calls.items()}
381
calls_total_sum_OpenCL = {k: sum(v.totalTimesOpenCL) for (k, v) in calls.items()}
382
383
if max_entries > 0 and len(calls_sorted) > max_entries:
384
calls_sorted = calls_sorted[:max_entries]
385
386
def formatPercents(p):
387
if p is not None:
388
return "{:>3d}".format(int(p*100))
389
return ''
390
391
name_width = 70
392
timestamp_width = 12
393
def fmtTS():
394
return '{:>' + str(timestamp_width) + '}'
395
fmt = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
396
fmt2 = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
397
print(fmt.format("ID", "name", "count", "thr", "min", "max", "median", "avg", "*self*", "IPP", "%", "OpenCL", "%"))
398
print(fmt2.format("", "", "", "", "t-min", "t-max", "t-median", "t-avg", "total", "t-IPP", "%", "t-OpenCL", "%"))
399
for (index, callID) in enumerate(calls_sorted):
400
call_self_times = calls[callID].selfTimes
401
loc0 = self.locations[callID[0]]
402
loc_array = [] # [str(callID)]
403
for (i, l) in enumerate(callID):
404
loc = self.locations[l]
405
loc_array.append(loc.name if i > 0 else str(loc))
406
loc_str = '|'.join(loc_array)
407
if len(loc_str) > name_width: loc_str = loc_str[:name_width-3]+'...'
408
print(fmt.format(index + 1, loc_str, len(call_self_times),
409
len(calls[callID].threads),
410
formatTimestamp(min(call_self_times)),
411
formatTimestamp(max(call_self_times)),
412
formatTimestamp(calls_median[callID]),
413
formatTimestamp(sum(call_self_times)/float(len(call_self_times))),
414
formatTimestamp(sum(call_self_times)),
415
formatTimestamp(calls_self_sum_IPP[callID]),
416
formatPercents(calls_self_sum_IPP[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
417
formatTimestamp(calls_self_sum_OpenCL[callID]),
418
formatPercents(calls_self_sum_OpenCL[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
419
))
420
call_total_times = calls[callID].totalTimes
421
print(fmt2.format("", "", "", "",
422
formatTimestamp(min(call_total_times)),
423
formatTimestamp(max(call_total_times)),
424
formatTimestamp(median(call_total_times)),
425
formatTimestamp(sum(call_total_times)/float(len(call_total_times))),
426
formatTimestamp(sum(call_total_times)),
427
formatTimestamp(calls_total_sum_IPP[callID]),
428
formatPercents(calls_total_sum_IPP[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
429
formatTimestamp(calls_total_sum_OpenCL[callID]),
430
formatPercents(calls_total_sum_OpenCL[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
431
))
432
print()
433
434
if __name__ == "__main__":
435
tracefile = sys.argv[1] if len(sys.argv) > 1 else 'OpenCVTrace.txt'
436
count = int(sys.argv[2]) if len(sys.argv) > 2 else 10
437
trace = Trace(tracefile)
438
trace.process()
439
trace.dump(max_entries = count)
440
print("OK")
441
442