Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/bench/bench.py
2723 views
1
#!/usr/bin/python3
2
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
3
import argparse
4
import os
5
import subprocess
6
import math
7
import sys
8
import re
9
import json
10
11
# Taken from rotest
12
from color import colored, Color
13
from tabulate import TablePrinter, Alignment
14
15
try:
16
import matplotlib
17
import matplotlib.pyplot as plt
18
except ModuleNotFoundError:
19
matplotlib = None
20
21
try:
22
import scipy
23
from scipy import stats
24
except ModuleNotFoundError:
25
print("Warning: scipy package is not installed, confidence values will not be available")
26
stats = None
27
28
scriptdir = os.path.dirname(os.path.realpath(__file__))
29
defaultVm = 'luau.exe' if os.name == "nt" else './luau'
30
31
argumentParser = argparse.ArgumentParser(description='Benchmark Lua script execution with an option to compare different VMs')
32
33
argumentParser.add_argument('--vm', dest='vm',default=defaultVm,help='Lua executable to test (' + defaultVm + ' by default)')
34
argumentParser.add_argument('--folder', dest='folder',default=os.path.join(scriptdir, 'tests'),help='Folder with tests (tests by default)')
35
argumentParser.add_argument('--compare', dest='vmNext',type=str,nargs='*',help='List of Lua executables to compare against')
36
argumentParser.add_argument('--results', dest='results',type=str,nargs='*',help='List of json result files to compare and graph')
37
argumentParser.add_argument('--run-test', action='store', default=None, help='Regex test filter')
38
argumentParser.add_argument('--extra-loops', action='store',type=int,default=0, help='Amount of times to loop over one test (one test already performs multiple runs)')
39
argumentParser.add_argument('--filename', action='store',type=str,default='bench', help='File name for graph and results file')
40
argumentParser.add_argument('--callgrind', dest='callgrind',action='store_const',const=1,default=0,help='Use callgrind to run benchmarks')
41
argumentParser.add_argument('--show-commands', dest='show_commands',action='store_const',const=1,default=0,help='Show the command line used to launch the VM and tests')
42
43
if matplotlib != None:
44
argumentParser.add_argument('--absolute', dest='absolute',action='store_const',const=1,default=0,help='Display absolute values instead of relative (enabled by default when benchmarking a single VM)')
45
argumentParser.add_argument('--speedup', dest='speedup',action='store_const',const=1,default=0,help='Draw a speedup graph')
46
argumentParser.add_argument('--sort', dest='sort',action='store_const',const=1,default=0,help='Sort values from worst to best improvements, ignoring conf. int. (disabled by default)')
47
argumentParser.add_argument('--window', dest='window',action='store_const',const=1,default=0,help='Display window with resulting plot (disabled by default)')
48
argumentParser.add_argument('--graph-vertical', action='store_true',dest='graph_vertical', help="Draw graph with vertical bars instead of horizontal")
49
50
argumentParser.add_argument('--report-metrics', dest='report_metrics', help="Send metrics about this session to InfluxDB URL upon completion.")
51
52
argumentParser.add_argument('--print-influx-debugging', action='store_true', dest='print_influx_debugging', help="Print output to aid in debugging of influx metrics reporting.")
53
argumentParser.add_argument('--no-print-influx-debugging', action='store_false', dest='print_influx_debugging', help="Don't print output to aid in debugging of influx metrics reporting.")
54
55
argumentParser.add_argument('--no-print-final-summary', action='store_false', dest='print_final_summary', help="Don't print a table summarizing the results after all tests are run")
56
57
# Assume 2.5 IPC on a 4 GHz CPU; this is obviously incorrect but it allows us to display simulated instruction counts using regular time units
58
CALLGRIND_INSN_PER_SEC = 2.5 * 4e9
59
60
def arrayRange(count):
61
result = []
62
63
for i in range(count):
64
result.append(i)
65
66
return result
67
68
def arrayRangeOffset(count, offset):
69
result = []
70
71
for i in range(count):
72
result.append(i + offset)
73
74
return result
75
76
def getCallgrindOutput(stdout, lines):
77
result = []
78
name = None
79
80
for l in lines:
81
if l.startswith("desc: Trigger: Client Request: "):
82
name = l[31:].strip()
83
elif l.startswith("summary: ") and name != None:
84
insn = int(l[9:])
85
# Note: we only run each bench once under callgrind so we only report a single time per run; callgrind instruction count variance is ~0.01% so it might as well be zero
86
result += "|><|" + name + "|><|" + str(insn / CALLGRIND_INSN_PER_SEC * 1000.0) + "||_||"
87
name = None
88
89
# If no results were found above, this may indicate the native executable running
90
# the benchmark doesn't have support for callgrind builtin. In that case just
91
# report the "totals" from the output file.
92
if len(result) == 0:
93
elements = stdout.decode('utf8').split("|><|")
94
if len(elements) >= 2:
95
name = elements[1]
96
97
for l in lines:
98
if l.startswith("totals: "):
99
insn = int(l[8:])
100
# Note: we only run each bench once under callgrind so we only report a single time per run; callgrind instruction count variance is ~0.01% so it might as well be zero
101
result += "|><|" + name + "|><|" + str(insn / CALLGRIND_INSN_PER_SEC * 1000.0) + "||_||"
102
103
return "".join(result)
104
105
def conditionallyShowCommand(cmd):
106
if arguments.show_commands:
107
print(f'{colored(Color.BLUE, "EXECUTING")}: {cmd}')
108
109
def checkValgrindExecutable():
110
"""Return true if valgrind can be successfully spawned"""
111
try:
112
subprocess.check_call("valgrind --version", shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
113
except:
114
print(f"{colored(Color.YELLOW, 'WARNING')}: Unable to spawn 'valgrind'. Please ensure valgrind is installed when using '--callgrind'.")
115
return False
116
117
return True
118
119
def getVmOutput(cmd):
120
if os.name == "nt":
121
try:
122
fullCmd = "start /realtime /affinity 1 /b /wait cmd /C \"" + cmd + "\""
123
conditionallyShowCommand(fullCmd)
124
return subprocess.check_output(fullCmd, shell=True, cwd=scriptdir).decode()
125
except KeyboardInterrupt:
126
exit(1)
127
except:
128
return ""
129
elif arguments.callgrind:
130
if not checkValgrindExecutable():
131
return ""
132
output_path = os.path.join(scriptdir, "callgrind.out")
133
try:
134
os.unlink(output_path) # Remove stale output
135
except:
136
pass
137
fullCmd = "valgrind --tool=callgrind --callgrind-out-file=callgrind.out --combine-dumps=yes --dump-line=no " + cmd
138
conditionallyShowCommand(fullCmd)
139
try:
140
output = subprocess.check_output(fullCmd, shell=True, stderr=subprocess.DEVNULL, cwd=scriptdir)
141
except subprocess.CalledProcessError as e:
142
print(f"{colored(Color.YELLOW, 'WARNING')}: Valgrind returned error code {e.returncode}")
143
output = e.output
144
with open(output_path, "r") as file:
145
lines = file.readlines()
146
os.unlink(output_path)
147
return getCallgrindOutput(output, lines)
148
else:
149
conditionallyShowCommand(cmd)
150
with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=scriptdir) as p:
151
# Try to lock to a single processor
152
if sys.platform != "darwin":
153
os.sched_setaffinity(p.pid, { 0 })
154
155
# Try to set high priority (requires sudo)
156
try:
157
os.nice(-10)
158
except:
159
pass
160
161
return p.communicate()[0]
162
163
def getShortVmName(name):
164
# Hope that the path to executable doesn't contain spaces
165
argumentPos = name.find(" ")
166
167
if argumentPos != -1:
168
executableName = name[0:argumentPos]
169
arguments = name[argumentPos+1:]
170
171
pathPos = executableName.rfind("\\")
172
173
if pathPos == -1:
174
pathPos = executableName.rfind("/")
175
176
if pathPos != -1:
177
executableName = executableName[pathPos+1:]
178
179
return executableName + " " + arguments
180
181
pathPos = name.rfind("\\")
182
183
if pathPos == -1:
184
pathPos = name.rfind("/")
185
186
if pathPos != -1:
187
return name[pathPos+1:]
188
189
return name
190
191
class TestResult:
192
filename = ""
193
vm = ""
194
shortVm = ""
195
name = ""
196
197
values = []
198
count = 0
199
min = None
200
avg = 0
201
max = None
202
203
sampleStdDev = 0
204
unbiasedEst = 0
205
sampleConfidenceInterval = 0
206
207
def extractResult(filename, vm, output):
208
elements = output.split("|><|")
209
210
# Remove test output
211
elements.remove(elements[0])
212
213
result = TestResult()
214
215
result.filename = filename
216
result.vm = vm
217
result.shortVm = getShortVmName(vm)
218
219
result.name = elements[0]
220
elements.remove(elements[0])
221
222
timeTable = []
223
224
for el in elements:
225
timeTable.append(float(el))
226
227
result.values = timeTable
228
result.count = len(timeTable)
229
230
return result
231
232
def mergeResult(lhs, rhs):
233
for value in rhs.values:
234
lhs.values.append(value)
235
236
lhs.count = len(lhs.values)
237
238
def mergeResults(lhs, rhs):
239
for a, b in zip(lhs, rhs):
240
mergeResult(a, b)
241
242
def finalizeResult(result):
243
total = 0.0
244
245
# Compute basic parameters
246
for v in result.values:
247
if result.min == None or v < result.min:
248
result.min = v
249
250
if result.max == None or v > result.max:
251
result.max = v
252
253
total = total + v
254
255
if result.count > 0:
256
result.avg = total / result.count
257
else:
258
result.avg = 0
259
260
# Compute standard deviation
261
sumOfSquares = 0
262
263
for v in result.values:
264
sumOfSquares = sumOfSquares + (v - result.avg) ** 2
265
266
if result.count > 1:
267
result.sampleStdDev = math.sqrt(sumOfSquares / (result.count - 1))
268
result.unbiasedEst = result.sampleStdDev * result.sampleStdDev
269
270
if stats:
271
# Two-tailed distribution with 95% conf.
272
tValue = stats.t.ppf(1 - 0.05 / 2, result.count - 1)
273
274
# Compute confidence interval
275
result.sampleConfidenceInterval = tValue * result.sampleStdDev / math.sqrt(result.count)
276
else:
277
result.sampleConfidenceInterval = result.sampleStdDev
278
else:
279
result.sampleStdDev = 0
280
result.unbiasedEst = 0
281
result.sampleConfidenceInterval = 0
282
283
return result
284
285
# Full result set
286
allResults = []
287
288
289
# Data for the graph
290
plotLegend = []
291
292
plotLabels = []
293
294
plotValueLists = []
295
plotConfIntLists = []
296
297
# Totals
298
vmTotalMin = []
299
vmTotalAverage = []
300
vmTotalImprovement = []
301
vmTotalResults = []
302
303
# Data for Telegraf report
304
mainTotalMin = 0
305
mainTotalAverage = 0
306
mainTotalMax = 0
307
308
def getExtraArguments(filepath):
309
try:
310
with open(filepath) as f:
311
for i in f.readlines():
312
pos = i.find("--bench-args:")
313
if pos != -1:
314
return i[pos + 13:].strip()
315
except:
316
pass
317
318
return ""
319
320
def substituteArguments(cmd, extra):
321
if argumentSubstituionCallback != None:
322
cmd = argumentSubstituionCallback(cmd)
323
324
if cmd.find("@EXTRA") != -1:
325
cmd = cmd.replace("@EXTRA", extra)
326
else:
327
cmd = cmd + " " + extra
328
329
return cmd
330
331
def extractResults(filename, vm, output, allowFailure):
332
results = []
333
334
splitOutput = output.split("||_||")
335
336
if len(splitOutput) <= 1:
337
if allowFailure:
338
result = TestResult()
339
340
result.filename = filename
341
result.vm = vm
342
result.shortVm = getShortVmName(vm)
343
344
results.append(result)
345
346
return results
347
348
splitOutput.remove(splitOutput[len(splitOutput) - 1])
349
350
for el in splitOutput:
351
results.append(extractResult(filename, vm, el))
352
353
return results
354
355
def analyzeResult(subdir, main, comparisons):
356
# Aggregate statistics
357
global mainTotalMin, mainTotalAverage, mainTotalMax
358
359
mainTotalMin = mainTotalMin + main.min
360
mainTotalAverage = mainTotalAverage + main.avg
361
mainTotalMax = mainTotalMax + main.max
362
363
if arguments.vmNext != None:
364
resultPrinter.add_row({
365
'Test': main.name,
366
'Min': '{:8.3f}ms'.format(main.min),
367
'Average': '{:8.3f}ms'.format(main.avg),
368
'StdDev%': '{:8.3f}%'.format(main.sampleConfidenceInterval / main.avg * 100),
369
'Driver': main.shortVm,
370
'Speedup': "",
371
'Significance': "",
372
'P(T<=t)': ""
373
})
374
else:
375
resultPrinter.add_row({
376
'Test': main.name,
377
'Min': '{:8.3f}ms'.format(main.min),
378
'Average': '{:8.3f}ms'.format(main.avg),
379
'StdDev%': '{:8.3f}%'.format(main.sampleConfidenceInterval / main.avg * 100),
380
'Driver': main.shortVm
381
})
382
383
if influxReporter != None:
384
influxReporter.report_result(subdir, main.name, main.filename, "SUCCESS", main.min, main.avg, main.max, main.sampleConfidenceInterval, main.shortVm, main.vm)
385
386
print(colored(Color.GREEN, 'SUCCESS') + ': {:<40}'.format(main.name) + ": " + '{:8.3f}'.format(main.avg) + "ms +/- " +
387
'{:6.3f}'.format(main.sampleConfidenceInterval / main.avg * 100) + "% on " + main.shortVm)
388
389
plotLabels.append(main.name)
390
391
index = 0
392
393
if len(plotValueLists) < index + 1:
394
plotValueLists.append([])
395
plotConfIntLists.append([])
396
397
vmTotalMin.append(0.0)
398
vmTotalAverage.append(0.0)
399
vmTotalImprovement.append(0.0)
400
vmTotalResults.append(0)
401
402
if arguments.absolute or arguments.speedup:
403
scale = 1
404
else:
405
scale = 100 / main.avg
406
407
plotValueLists[index].append(main.avg * scale)
408
plotConfIntLists[index].append(main.sampleConfidenceInterval * scale)
409
410
vmTotalMin[index] += main.min
411
vmTotalAverage[index] += main.avg
412
413
for compare in comparisons:
414
index = index + 1
415
416
if len(plotValueLists) < index + 1 and not arguments.speedup:
417
plotValueLists.append([])
418
plotConfIntLists.append([])
419
420
vmTotalMin.append(0.0)
421
vmTotalAverage.append(0.0)
422
vmTotalImprovement.append(0.0)
423
vmTotalResults.append(0)
424
425
if compare.min == None:
426
print(colored(Color.RED, 'FAILED') + ": '" + main.name + "' on '" + compare.vm + "'")
427
428
resultPrinter.add_row({ 'Test': main.name, 'Min': "", 'Average': "FAILED", 'StdDev%': "", 'Driver': compare.shortVm, 'Speedup': "", 'Significance': "", 'P(T<=t)': "" })
429
430
if influxReporter != None:
431
influxReporter.report_result(subdir, main.filename, main.filename, "FAILED", 0.0, 0.0, 0.0, 0.0, compare.shortVm, compare.vm)
432
433
if arguments.speedup:
434
plotValueLists[0].pop()
435
plotValueLists[0].append(0)
436
437
plotConfIntLists[0].pop()
438
plotConfIntLists[0].append(0)
439
else:
440
plotValueLists[index].append(0)
441
plotConfIntLists[index].append(0)
442
443
continue
444
445
if main.count > 1 and stats:
446
pooledStdDev = math.sqrt((main.unbiasedEst + compare.unbiasedEst) / 2)
447
448
tStat = abs(main.avg - compare.avg) / (pooledStdDev * math.sqrt(2 / main.count))
449
degreesOfFreedom = 2 * main.count - 2
450
451
# Two-tailed distribution with 95% conf.
452
tCritical = stats.t.ppf(1 - 0.05 / 2, degreesOfFreedom)
453
454
noSignificantDifference = tStat < tCritical
455
pValue = 2 * (1 - stats.t.cdf(tStat, df = degreesOfFreedom))
456
else:
457
noSignificantDifference = None
458
pValue = -1
459
460
if noSignificantDifference is None:
461
verdict = ""
462
elif noSignificantDifference:
463
verdict = "likely same"
464
elif main.avg < compare.avg:
465
verdict = "likely worse"
466
else:
467
verdict = "likely better"
468
469
speedup = (plotValueLists[0][-1] / (compare.avg * scale) - 1)
470
speedupColor = Color.YELLOW if speedup < 0 and noSignificantDifference else Color.RED if speedup < 0 else Color.GREEN if speedup > 0 else Color.YELLOW
471
472
resultPrinter.add_row({
473
'Test': main.name,
474
'Min': '{:8.3f}ms'.format(compare.min),
475
'Average': '{:8.3f}ms'.format(compare.avg),
476
'StdDev%': '{:8.3f}%'.format(compare.sampleConfidenceInterval / compare.avg * 100),
477
'Driver': compare.shortVm,
478
'Speedup': colored(speedupColor, '{:8.3f}%'.format(speedup * 100)),
479
'Significance': verdict,
480
'P(T<=t)': '---' if pValue < 0 else '{:.0f}%'.format(pValue * 100)
481
})
482
483
print(colored(Color.GREEN, 'SUCCESS') + ': {:<40}'.format(main.name) + ": " + '{:8.3f}'.format(compare.avg) + "ms +/- " +
484
'{:6.3f}'.format(compare.sampleConfidenceInterval / compare.avg * 100) + "% on " + compare.shortVm +
485
' ({:+7.3f}%, '.format(speedup * 100) + verdict + ")")
486
487
if influxReporter != None:
488
influxReporter.report_result(subdir, main.name, main.filename, "SUCCESS", compare.min, compare.avg, compare.max, compare.sampleConfidenceInterval, compare.shortVm, compare.vm)
489
490
if arguments.speedup:
491
oldValue = plotValueLists[0].pop()
492
newValue = compare.avg
493
494
plotValueLists[0].append((oldValue / newValue - 1) * 100)
495
496
plotConfIntLists[0].pop()
497
plotConfIntLists[0].append(0)
498
else:
499
plotValueLists[index].append(compare.avg * scale)
500
plotConfIntLists[index].append(compare.sampleConfidenceInterval * scale)
501
502
vmTotalMin[index] += compare.min
503
vmTotalAverage[index] += compare.avg
504
vmTotalImprovement[index] += math.log(main.avg / compare.avg)
505
vmTotalResults[index] += 1
506
507
def runTest(subdir, filename, filepath):
508
filepath = os.path.abspath(filepath)
509
510
mainVm = os.path.abspath(arguments.vm)
511
512
# Process output will contain the test name and execution times
513
mainOutput = getVmOutput(substituteArguments(mainVm, getExtraArguments(filepath)) + " " + filepath)
514
mainResultSet = extractResults(filename, mainVm, mainOutput, False)
515
516
if len(mainResultSet) == 0:
517
print(colored(Color.RED, 'FAILED') + ": '" + filepath + "' on '" + mainVm + "'")
518
519
if arguments.vmNext != None:
520
resultPrinter.add_row({ 'Test': filepath, 'Min': "", 'Average': "FAILED", 'StdDev%': "", 'Driver': getShortVmName(mainVm), 'Speedup': "", 'Significance': "", 'P(T<=t)': "" })
521
else:
522
resultPrinter.add_row({ 'Test': filepath, 'Min': "", 'Average': "FAILED", 'StdDev%': "", 'Driver': getShortVmName(mainVm) })
523
524
if influxReporter != None:
525
influxReporter.report_result(subdir, filename, filename, "FAILED", 0.0, 0.0, 0.0, 0.0, getShortVmName(mainVm), mainVm)
526
return
527
528
compareResultSets = []
529
530
if arguments.vmNext != None:
531
for compareVm in arguments.vmNext:
532
compareVm = os.path.abspath(compareVm)
533
534
compareOutput = getVmOutput(substituteArguments(compareVm, getExtraArguments(filepath)) + " " + filepath)
535
compareResultSet = extractResults(filename, compareVm, compareOutput, True)
536
537
compareResultSets.append(compareResultSet)
538
539
if arguments.extra_loops > 0:
540
# get more results
541
for i in range(arguments.extra_loops):
542
extraMainOutput = getVmOutput(substituteArguments(mainVm, getExtraArguments(filepath)) + " " + filepath)
543
extraMainResultSet = extractResults(filename, mainVm, extraMainOutput, False)
544
545
mergeResults(mainResultSet, extraMainResultSet)
546
547
if arguments.vmNext != None:
548
i = 0
549
for compareVm in arguments.vmNext:
550
compareVm = os.path.abspath(compareVm)
551
552
extraCompareOutput = getVmOutput(substituteArguments(compareVm, getExtraArguments(filepath)) + " " + filepath)
553
extraCompareResultSet = extractResults(filename, compareVm, extraCompareOutput, True)
554
555
mergeResults(compareResultSets[i], extraCompareResultSet)
556
i += 1
557
558
# finalize results
559
for result in mainResultSet:
560
finalizeResult(result)
561
562
for compareResultSet in compareResultSets:
563
for result in compareResultSet:
564
finalizeResult(result)
565
566
# analyze results
567
for i in range(len(mainResultSet)):
568
mainResult = mainResultSet[i]
569
compareResults = []
570
571
for el in compareResultSets:
572
if i < len(el):
573
compareResults.append(el[i])
574
else:
575
noResult = TestResult()
576
577
noResult.filename = el[0].filename
578
noResult.vm = el[0].vm
579
noResult.shortVm = el[0].shortVm
580
581
compareResults.append(noResult)
582
583
analyzeResult(subdir, mainResult, compareResults)
584
585
mergedResults = []
586
mergedResults.append(mainResult)
587
588
for el in compareResults:
589
mergedResults.append(el)
590
591
allResults.append(mergedResults)
592
593
def rearrangeSortKeyForComparison(e):
594
if plotValueLists[1][e] == 0:
595
return 1
596
597
return plotValueLists[0][e] / plotValueLists[1][e]
598
599
def rearrangeSortKeyForSpeedup(e):
600
return plotValueLists[0][e]
601
602
def rearrangeSortKeyDescending(e):
603
return -plotValueLists[0][e]
604
605
# Re-arrange results from worst to best
606
def rearrange(key):
607
global plotLabels
608
609
index = arrayRange(len(plotLabels))
610
index = sorted(index, key=key)
611
612
# Recreate value lists in sorted order
613
plotLabelsPrev = plotLabels
614
plotLabels = []
615
616
for i in index:
617
plotLabels.append(plotLabelsPrev[i])
618
619
for group in range(len(plotValueLists)):
620
plotValueListPrev = plotValueLists[group]
621
plotValueLists[group] = []
622
623
plotConfIntListPrev = plotConfIntLists[group]
624
plotConfIntLists[group] = []
625
626
for i in index:
627
plotValueLists[group].append(plotValueListPrev[i])
628
plotConfIntLists[group].append(plotConfIntListPrev[i])
629
630
# Graph
631
def graph():
632
if len(plotValueLists) == 0:
633
print("No results")
634
return
635
636
ind = arrayRange(len(plotLabels))
637
width = 0.8 / len(plotValueLists)
638
639
if arguments.graph_vertical:
640
# Extend graph width when we have a lot of tests to draw
641
barcount = len(plotValueLists[0])
642
plt.figure(figsize=(max(8, barcount * 0.3), 8))
643
else:
644
# Extend graph height when we have a lot of tests to draw
645
barcount = len(plotValueLists[0])
646
plt.figure(figsize=(8, max(8, barcount * 0.3)))
647
648
plotBars = []
649
650
matplotlib.rc('xtick', labelsize=10)
651
matplotlib.rc('ytick', labelsize=10)
652
653
if arguments.graph_vertical:
654
# Draw Y grid behind the bars
655
plt.rc('axes', axisbelow=True)
656
plt.grid(True, 'major', 'y')
657
658
for i in range(len(plotValueLists)):
659
bar = plt.bar(arrayRangeOffset(len(plotLabels), i * width), plotValueLists[i], width, yerr=plotConfIntLists[i])
660
plotBars.append(bar[0])
661
662
if arguments.absolute:
663
plt.ylabel('Time (ms)')
664
elif arguments.speedup:
665
plt.ylabel('Speedup (%)')
666
else:
667
plt.ylabel('Relative time (%)')
668
669
plt.title('Benchmark')
670
plt.xticks(ind, plotLabels, rotation='vertical')
671
else:
672
# Draw X grid behind the bars
673
plt.rc('axes', axisbelow=True)
674
plt.grid(True, 'major', 'x')
675
676
for i in range(len(plotValueLists)):
677
bar = plt.barh(arrayRangeOffset(len(plotLabels), i * width), plotValueLists[i], width, xerr=plotConfIntLists[i])
678
plotBars.append(bar[0])
679
680
if arguments.absolute:
681
plt.xlabel('Time (ms)')
682
elif arguments.speedup:
683
plt.xlabel('Speedup (%)')
684
else:
685
plt.xlabel('Relative time (%)')
686
687
plt.title('Benchmark')
688
plt.yticks(ind, plotLabels)
689
690
plt.gca().invert_yaxis()
691
692
plt.legend(plotBars, plotLegend)
693
694
plt.tight_layout()
695
696
plt.savefig(arguments.filename + ".png", dpi=200)
697
698
if arguments.window:
699
plt.show()
700
701
def addTotalsToTable():
702
if len(vmTotalMin) == 0:
703
return
704
705
if arguments.vmNext != None:
706
index = 0
707
708
resultPrinter.add_row({
709
'Test': 'Total',
710
'Min': '{:8.3f}ms'.format(vmTotalMin[index]),
711
'Average': '{:8.3f}ms'.format(vmTotalAverage[index]),
712
'StdDev%': "---",
713
'Driver': getShortVmName(os.path.abspath(arguments.vm)),
714
'Speedup': "",
715
'Significance': "",
716
'P(T<=t)': ""
717
})
718
719
for compareVm in arguments.vmNext:
720
index = index + 1
721
722
speedup = vmTotalAverage[0] / vmTotalAverage[index] * 100 - 100
723
724
resultPrinter.add_row({
725
'Test': 'Total',
726
'Min': '{:8.3f}ms'.format(vmTotalMin[index]),
727
'Average': '{:8.3f}ms'.format(vmTotalAverage[index]),
728
'StdDev%': "---",
729
'Driver': getShortVmName(os.path.abspath(compareVm)),
730
'Speedup': colored(Color.RED if speedup < 0 else Color.GREEN if speedup > 0 else Color.YELLOW, '{:8.3f}%'.format(speedup)),
731
'Significance': "",
732
'P(T<=t)': ""
733
})
734
else:
735
resultPrinter.add_row({
736
'Test': 'Total',
737
'Min': '{:8.3f}ms'.format(vmTotalMin[0]),
738
'Average': '{:8.3f}ms'.format(vmTotalAverage[0]),
739
'StdDev%': "---",
740
'Driver': getShortVmName(os.path.abspath(arguments.vm))
741
})
742
743
def writeResultsToFile():
744
class TestResultEncoder(json.JSONEncoder):
745
def default(self, obj):
746
if isinstance(obj, TestResult):
747
return [obj.filename, obj.vm, obj.shortVm, obj.name, obj.values, obj.count]
748
return json.JSONEncoder.default(self, obj)
749
750
try:
751
with open(arguments.filename + ".json", "w") as allResultsFile:
752
allResultsFile.write(json.dumps(allResults, cls=TestResultEncoder))
753
except:
754
print("Failed to write results to a file")
755
756
def run(args, argsubcb):
757
global arguments, resultPrinter, influxReporter, argumentSubstituionCallback, allResults
758
arguments = args
759
argumentSubstituionCallback = argsubcb
760
761
if os.name == "nt" and arguments.callgrind:
762
print(f"{colored(Color.RED, 'ERROR')}: --callgrind is not supported on Windows. Please consider using this option on another OS, or Linux using WSL.")
763
sys.exit(1)
764
765
if arguments.report_metrics or arguments.print_influx_debugging:
766
import influxbench
767
influxReporter = influxbench.InfluxReporter(arguments)
768
else:
769
influxReporter = None
770
771
if matplotlib == None:
772
arguments.absolute = 0
773
arguments.speedup = 0
774
arguments.sort = 0
775
arguments.window = 0
776
777
# Load results from files
778
if arguments.results != None:
779
vmList = []
780
781
for result in arguments.results:
782
with open(result) as resultsFile:
783
resultArray = json.load(resultsFile)
784
785
for test in resultArray:
786
for i in range(len(test)):
787
arr = test[i]
788
789
tr = TestResult()
790
791
tr.filename = arr[0]
792
tr.vm = arr[1]
793
tr.shortVm = arr[2]
794
tr.name = arr[3]
795
tr.values = arr[4]
796
tr.count = arr[5]
797
798
test[i] = tr
799
800
for test in resultArray[0]:
801
if vmList.count(test.vm) > 0:
802
pointPos = result.rfind(".")
803
804
if pointPos != -1:
805
vmList.append(test.vm + " [" + result[0:pointPos] + "]")
806
else:
807
vmList.append(test.vm + " [" + result + "]")
808
else:
809
vmList.append(test.vm)
810
811
if len(allResults) == 0:
812
allResults = resultArray
813
else:
814
for prevEl in allResults:
815
found = False
816
817
for nextEl in resultArray:
818
if nextEl[0].filename == prevEl[0].filename and nextEl[0].name == prevEl[0].name:
819
for run in nextEl:
820
prevEl.append(run)
821
found = True
822
823
if not found:
824
el = resultArray[0]
825
826
for run in el:
827
result = TestResult()
828
829
result.filename = run.filename
830
result.vm = run.vm
831
result.shortVm = run.shortVm
832
result.name = run.name
833
834
prevEl.append(result)
835
836
arguments.vmNext = []
837
838
for i in range(len(vmList)):
839
if i == 0:
840
arguments.vm = vmList[i]
841
else:
842
arguments.vmNext.append(vmList[i])
843
844
plotLegend.append(getShortVmName(arguments.vm))
845
846
if arguments.vmNext != None:
847
for compareVm in arguments.vmNext:
848
plotLegend.append(getShortVmName(compareVm))
849
else:
850
arguments.absolute = 1 # When looking at one VM, I feel that relative graph doesn't make a lot of sense
851
852
# Results table formatting
853
if arguments.vmNext != None:
854
resultPrinter = TablePrinter([
855
{'label': 'Test', 'align': Alignment.LEFT},
856
{'label': 'Min', 'align': Alignment.RIGHT},
857
{'label': 'Average', 'align': Alignment.RIGHT},
858
{'label': 'StdDev%', 'align': Alignment.RIGHT},
859
{'label': 'Driver', 'align': Alignment.LEFT},
860
{'label': 'Speedup', 'align': Alignment.RIGHT},
861
{'label': 'Significance', 'align': Alignment.LEFT},
862
{'label': 'P(T<=t)', 'align': Alignment.RIGHT}
863
])
864
else:
865
resultPrinter = TablePrinter([
866
{'label': 'Test', 'align': Alignment.LEFT},
867
{'label': 'Min', 'align': Alignment.RIGHT},
868
{'label': 'Average', 'align': Alignment.RIGHT},
869
{'label': 'StdDev%', 'align': Alignment.RIGHT},
870
{'label': 'Driver', 'align': Alignment.LEFT}
871
])
872
873
if arguments.results != None:
874
for resultSet in allResults:
875
# finalize results
876
for result in resultSet:
877
finalizeResult(result)
878
879
# analyze results
880
mainResult = resultSet[0]
881
compareResults = []
882
883
for i in range(len(resultSet)):
884
if i != 0:
885
compareResults.append(resultSet[i])
886
887
analyzeResult('', mainResult, compareResults)
888
else:
889
all_files = [subdir + os.sep + filename for subdir, dirs, files in os.walk(arguments.folder) for filename in files]
890
for filepath in sorted(all_files):
891
subdir, filename = os.path.split(filepath)
892
if filename.endswith(".lua"):
893
if arguments.run_test == None or re.match(arguments.run_test, filename[:-4]):
894
runTest(subdir, filename, filepath)
895
896
if arguments.sort and len(plotValueLists) > 1:
897
rearrange(rearrangeSortKeyForComparison)
898
elif arguments.sort and len(plotValueLists) == 1:
899
rearrange(rearrangeSortKeyDescending)
900
elif arguments.speedup:
901
rearrange(rearrangeSortKeyForSpeedup)
902
903
plotLegend[0] = arguments.vm + " vs " + arguments.vmNext[0]
904
905
if arguments.print_final_summary:
906
addTotalsToTable()
907
908
print()
909
print(colored(Color.YELLOW, '==================================================RESULTS=================================================='))
910
resultPrinter.print(summary=False)
911
print(colored(Color.YELLOW, '---'))
912
913
if len(vmTotalMin) != 0 and arguments.vmNext != None:
914
index = 0
915
916
for compareVm in arguments.vmNext:
917
index = index + 1
918
919
name = getShortVmName(os.path.abspath(compareVm))
920
deltaGeoMean = math.exp(vmTotalImprovement[index] / vmTotalResults[index]) * 100 - 100
921
922
if deltaGeoMean > 0:
923
print("'{}' change is {:.3f}% positive on average".format(name, deltaGeoMean))
924
else:
925
print("'{}' change is {:.3f}% negative on average".format(name, deltaGeoMean))
926
927
if matplotlib != None:
928
graph()
929
930
writeResultsToFile()
931
932
if influxReporter != None:
933
influxReporter.report_result(arguments.folder, "Total", "all", "SUCCESS", mainTotalMin, mainTotalAverage, mainTotalMax, 0.0, getShortVmName(arguments.vm), os.path.abspath(arguments.vm))
934
influxReporter.flush(0)
935
936
937
if __name__ == "__main__":
938
arguments = argumentParser.parse_args()
939
run(arguments, None)
940
941