CoCalc -- bench.py

GitHub Repository: Roblox/luau
Path: blob/master/bench/bench.py
²⁷²³ views
1
#!/usr/bin/python3
2
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
3
import argparse
4
import os
5
import subprocess
6
import math
7
import sys
8
import re
9
import json
10

11
# Taken from rotest
12
from color import colored, Color
13
from tabulate import TablePrinter, Alignment
14

15
try:
16
    import matplotlib
17
    import matplotlib.pyplot as plt
18
except ModuleNotFoundError:
19
    matplotlib = None
20

21
try:
22
    import scipy
23
    from scipy import stats
24
except ModuleNotFoundError:
25
    print("Warning: scipy package is not installed, confidence values will not be available")
26
    stats = None
27

28
scriptdir = os.path.dirname(os.path.realpath(__file__))
29
defaultVm = 'luau.exe' if os.name == "nt" else './luau'
30

31
argumentParser = argparse.ArgumentParser(description='Benchmark Lua script execution with an option to compare different VMs')
32

33
argumentParser.add_argument('--vm', dest='vm',default=defaultVm,help='Lua executable to test (' + defaultVm + ' by default)')
34
argumentParser.add_argument('--folder', dest='folder',default=os.path.join(scriptdir, 'tests'),help='Folder with tests (tests by default)')
35
argumentParser.add_argument('--compare', dest='vmNext',type=str,nargs='*',help='List of Lua executables to compare against')
36
argumentParser.add_argument('--results', dest='results',type=str,nargs='*',help='List of json result files to compare and graph')
37
argumentParser.add_argument('--run-test', action='store', default=None, help='Regex test filter')
38
argumentParser.add_argument('--extra-loops', action='store',type=int,default=0, help='Amount of times to loop over one test (one test already performs multiple runs)')
39
argumentParser.add_argument('--filename', action='store',type=str,default='bench', help='File name for graph and results file')
40
argumentParser.add_argument('--callgrind', dest='callgrind',action='store_const',const=1,default=0,help='Use callgrind to run benchmarks')
41
argumentParser.add_argument('--show-commands', dest='show_commands',action='store_const',const=1,default=0,help='Show the command line used to launch the VM and tests')
42

43
if matplotlib != None:
44
    argumentParser.add_argument('--absolute', dest='absolute',action='store_const',const=1,default=0,help='Display absolute values instead of relative (enabled by default when benchmarking a single VM)')
45
    argumentParser.add_argument('--speedup', dest='speedup',action='store_const',const=1,default=0,help='Draw a speedup graph')
46
    argumentParser.add_argument('--sort', dest='sort',action='store_const',const=1,default=0,help='Sort values from worst to best improvements, ignoring conf. int. (disabled by default)')
47
    argumentParser.add_argument('--window', dest='window',action='store_const',const=1,default=0,help='Display window with resulting plot (disabled by default)')
48
    argumentParser.add_argument('--graph-vertical', action='store_true',dest='graph_vertical', help="Draw graph with vertical bars instead of horizontal")
49

50
argumentParser.add_argument('--report-metrics', dest='report_metrics', help="Send metrics about this session to InfluxDB URL upon completion.")
51

52
argumentParser.add_argument('--print-influx-debugging', action='store_true', dest='print_influx_debugging', help="Print output to aid in debugging of influx metrics reporting.")
53
argumentParser.add_argument('--no-print-influx-debugging', action='store_false', dest='print_influx_debugging', help="Don't print output to aid in debugging of influx metrics reporting.")
54

55
argumentParser.add_argument('--no-print-final-summary', action='store_false', dest='print_final_summary', help="Don't print a table summarizing the results after all tests are run")
56

57
# Assume 2.5 IPC on a 4 GHz CPU; this is obviously incorrect but it allows us to display simulated instruction counts using regular time units
58
CALLGRIND_INSN_PER_SEC = 2.5 * 4e9
59

60
def arrayRange(count):
61
    result = []
62

63
    for i in range(count):
64
        result.append(i)
65

66
    return result
67

68
def arrayRangeOffset(count, offset):
69
    result = []
70

71
    for i in range(count):
72
        result.append(i + offset)
73

74
    return result
75

76
def getCallgrindOutput(stdout, lines):
77
    result = []
78
    name = None
79

80
    for l in lines:
81
        if l.startswith("desc: Trigger: Client Request: "):
82
            name = l[31:].strip()
83
        elif l.startswith("summary: ") and name != None:
84
            insn = int(l[9:])
85
            # Note: we only run each bench once under callgrind so we only report a single time per run; callgrind instruction count variance is ~0.01% so it might as well be zero
86
            result += "|><|" + name + "|><|" + str(insn / CALLGRIND_INSN_PER_SEC * 1000.0) + "||_||"
87
            name = None
88

89
    # If no results were found above, this may indicate the native executable running
90
    # the benchmark doesn't have support for callgrind builtin.  In that case just
91
    # report the "totals" from the output file.
92
    if len(result) == 0:
93
        elements = stdout.decode('utf8').split("|><|")
94
        if len(elements) >= 2:
95
            name = elements[1]
96

97
            for l in lines:
98
                if l.startswith("totals: "):
99
                    insn = int(l[8:])
100
                    # Note: we only run each bench once under callgrind so we only report a single time per run; callgrind instruction count variance is ~0.01% so it might as well be zero
101
                    result += "|><|" + name + "|><|" + str(insn / CALLGRIND_INSN_PER_SEC * 1000.0) + "||_||"
102

103
    return "".join(result)
104

105
def conditionallyShowCommand(cmd):
106
    if arguments.show_commands:
107
        print(f'{colored(Color.BLUE, "EXECUTING")}: {cmd}')
108

109
def checkValgrindExecutable():
110
    """Return true if valgrind can be successfully spawned"""
111
    try:
112
        subprocess.check_call("valgrind --version", shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
113
    except:
114
        print(f"{colored(Color.YELLOW, 'WARNING')}: Unable to spawn 'valgrind'.  Please ensure valgrind is installed when using '--callgrind'.")
115
        return False
116
    
117
    return True
118

119
def getVmOutput(cmd):
120
    if os.name == "nt":
121
        try:
122
            fullCmd = "start /realtime /affinity 1 /b /wait cmd /C \"" + cmd + "\""
123
            conditionallyShowCommand(fullCmd)
124
            return subprocess.check_output(fullCmd, shell=True, cwd=scriptdir).decode()
125
        except KeyboardInterrupt:
126
            exit(1)
127
        except:
128
            return ""
129
    elif arguments.callgrind:
130
        if not checkValgrindExecutable():
131
            return ""
132
        output_path = os.path.join(scriptdir, "callgrind.out")
133
        try:
134
            os.unlink(output_path)  # Remove stale output
135
        except:
136
            pass
137
        fullCmd = "valgrind --tool=callgrind --callgrind-out-file=callgrind.out --combine-dumps=yes --dump-line=no " + cmd
138
        conditionallyShowCommand(fullCmd)
139
        try:
140
            output = subprocess.check_output(fullCmd, shell=True, stderr=subprocess.DEVNULL, cwd=scriptdir)
141
        except subprocess.CalledProcessError as e:
142
            print(f"{colored(Color.YELLOW, 'WARNING')}: Valgrind returned error code {e.returncode}")
143
            output = e.output
144
        with open(output_path, "r") as file:
145
            lines = file.readlines()
146
        os.unlink(output_path)
147
        return getCallgrindOutput(output, lines)
148
    else:
149
        conditionallyShowCommand(cmd)
150
        with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=scriptdir) as p:
151
            # Try to lock to a single processor
152
            if sys.platform != "darwin":
153
                os.sched_setaffinity(p.pid, { 0 })
154

155
            # Try to set high priority (requires sudo)
156
            try:
157
                os.nice(-10)
158
            except:
159
                pass
160

161
            return p.communicate()[0]
162

163
def getShortVmName(name):
164
    # Hope that the path to executable doesn't contain spaces
165
    argumentPos = name.find(" ")
166

167
    if argumentPos != -1:
168
        executableName = name[0:argumentPos]
169
        arguments = name[argumentPos+1:]
170

171
        pathPos = executableName.rfind("\\")
172

173
        if pathPos == -1:
174
            pathPos = executableName.rfind("/")
175

176
        if pathPos != -1:
177
            executableName = executableName[pathPos+1:]
178

179
        return executableName + " " + arguments
180

181
    pathPos = name.rfind("\\")
182

183
    if pathPos == -1:
184
        pathPos = name.rfind("/")
185

186
    if pathPos != -1:
187
        return name[pathPos+1:]
188

189
    return name
190

191
class TestResult:
192
    filename = ""
193
    vm = ""
194
    shortVm = ""
195
    name = ""
196

197
    values = []
198
    count = 0
199
    min = None
200
    avg = 0
201
    max = None
202

203
    sampleStdDev = 0
204
    unbiasedEst = 0
205
    sampleConfidenceInterval = 0
206

207
def extractResult(filename, vm, output):
208
    elements = output.split("|><|")
209

210
    # Remove test output
211
    elements.remove(elements[0])
212

213
    result = TestResult()
214

215
    result.filename = filename
216
    result.vm = vm
217
    result.shortVm = getShortVmName(vm)
218

219
    result.name = elements[0]
220
    elements.remove(elements[0])
221

222
    timeTable = []
223

224
    for el in elements:
225
        timeTable.append(float(el))
226

227
    result.values = timeTable
228
    result.count = len(timeTable)
229

230
    return result
231

232
def mergeResult(lhs, rhs):
233
    for value in rhs.values:
234
        lhs.values.append(value)
235

236
    lhs.count = len(lhs.values)
237

238
def mergeResults(lhs, rhs):
239
    for a, b in zip(lhs, rhs):
240
        mergeResult(a, b)
241

242
def finalizeResult(result):
243
    total = 0.0
244

245
    # Compute basic parameters
246
    for v in result.values:
247
        if result.min == None or v < result.min:
248
            result.min = v
249

250
        if result.max == None or v > result.max:
251
            result.max = v
252

253
        total = total + v
254

255
    if result.count > 0:
256
        result.avg = total / result.count
257
    else:
258
        result.avg = 0
259

260
    # Compute standard deviation
261
    sumOfSquares = 0
262

263
    for v in result.values:
264
        sumOfSquares = sumOfSquares + (v - result.avg) ** 2
265

266
    if result.count > 1:
267
        result.sampleStdDev = math.sqrt(sumOfSquares / (result.count - 1))
268
        result.unbiasedEst = result.sampleStdDev * result.sampleStdDev
269

270
        if stats:
271
            # Two-tailed distribution with 95% conf.
272
            tValue = stats.t.ppf(1 - 0.05 / 2, result.count - 1)
273

274
            # Compute confidence interval
275
            result.sampleConfidenceInterval = tValue * result.sampleStdDev / math.sqrt(result.count)
276
        else:
277
            result.sampleConfidenceInterval = result.sampleStdDev
278
    else:
279
        result.sampleStdDev = 0
280
        result.unbiasedEst = 0
281
        result.sampleConfidenceInterval = 0
282

283
    return result
284

285
# Full result set
286
allResults = []
287

288

289
# Data for the graph
290
plotLegend = []
291

292
plotLabels = []
293

294
plotValueLists = []
295
plotConfIntLists = []
296

297
# Totals
298
vmTotalMin = []
299
vmTotalAverage = []
300
vmTotalImprovement = []
301
vmTotalResults = []
302

303
# Data for Telegraf report
304
mainTotalMin = 0
305
mainTotalAverage = 0
306
mainTotalMax = 0
307

308
def getExtraArguments(filepath):
309
    try:
310
        with open(filepath) as f:
311
            for i in f.readlines():
312
                pos = i.find("--bench-args:")
313
                if pos != -1:
314
                    return i[pos + 13:].strip()
315
    except:
316
        pass
317

318
    return ""
319

320
def substituteArguments(cmd, extra):
321
    if argumentSubstituionCallback != None:
322
        cmd = argumentSubstituionCallback(cmd)
323

324
    if cmd.find("@EXTRA") != -1:
325
        cmd = cmd.replace("@EXTRA", extra)
326
    else:
327
        cmd = cmd + " " + extra
328

329
    return cmd
330

331
def extractResults(filename, vm, output, allowFailure):
332
    results = []
333

334
    splitOutput = output.split("||_||")
335

336
    if len(splitOutput) <= 1:
337
        if allowFailure:
338
            result = TestResult()
339

340
            result.filename = filename
341
            result.vm = vm
342
            result.shortVm = getShortVmName(vm)
343

344
            results.append(result)
345

346
        return results
347

348
    splitOutput.remove(splitOutput[len(splitOutput) - 1])
349

350
    for el in splitOutput:
351
        results.append(extractResult(filename, vm, el))
352

353
    return results
354

355
def analyzeResult(subdir, main, comparisons):
356
    # Aggregate statistics
357
    global mainTotalMin, mainTotalAverage, mainTotalMax
358

359
    mainTotalMin = mainTotalMin + main.min
360
    mainTotalAverage = mainTotalAverage + main.avg
361
    mainTotalMax = mainTotalMax + main.max
362

363
    if arguments.vmNext != None:
364
        resultPrinter.add_row({
365
            'Test': main.name,
366
            'Min': '{:8.3f}ms'.format(main.min),
367
            'Average': '{:8.3f}ms'.format(main.avg),
368
            'StdDev%': '{:8.3f}%'.format(main.sampleConfidenceInterval / main.avg * 100),
369
            'Driver': main.shortVm,
370
            'Speedup': "",
371
            'Significance': "",
372
            'P(T<=t)': ""
373
        })
374
    else:
375
        resultPrinter.add_row({
376
            'Test': main.name,
377
            'Min': '{:8.3f}ms'.format(main.min),
378
            'Average': '{:8.3f}ms'.format(main.avg),
379
            'StdDev%': '{:8.3f}%'.format(main.sampleConfidenceInterval / main.avg * 100),
380
            'Driver': main.shortVm
381
        })
382

383
    if influxReporter != None:
384
        influxReporter.report_result(subdir, main.name, main.filename, "SUCCESS", main.min, main.avg, main.max, main.sampleConfidenceInterval, main.shortVm, main.vm)
385

386
    print(colored(Color.GREEN, 'SUCCESS') + ': {:<40}'.format(main.name) + ": " + '{:8.3f}'.format(main.avg) + "ms +/- " +
387
        '{:6.3f}'.format(main.sampleConfidenceInterval / main.avg * 100) + "% on " + main.shortVm)
388

389
    plotLabels.append(main.name)
390

391
    index = 0
392

393
    if len(plotValueLists) < index + 1:
394
        plotValueLists.append([])
395
        plotConfIntLists.append([])
396

397
        vmTotalMin.append(0.0)
398
        vmTotalAverage.append(0.0)
399
        vmTotalImprovement.append(0.0)
400
        vmTotalResults.append(0)
401

402
    if arguments.absolute or arguments.speedup:
403
        scale = 1
404
    else:
405
        scale = 100 / main.avg
406

407
    plotValueLists[index].append(main.avg * scale)
408
    plotConfIntLists[index].append(main.sampleConfidenceInterval * scale)
409

410
    vmTotalMin[index] += main.min
411
    vmTotalAverage[index] += main.avg
412

413
    for compare in comparisons:
414
        index = index + 1
415

416
        if len(plotValueLists) < index + 1 and not arguments.speedup:
417
            plotValueLists.append([])
418
            plotConfIntLists.append([])
419

420
            vmTotalMin.append(0.0)
421
            vmTotalAverage.append(0.0)
422
            vmTotalImprovement.append(0.0)
423
            vmTotalResults.append(0)
424

425
        if compare.min == None:
426
            print(colored(Color.RED, 'FAILED') + ":  '" + main.name + "' on '" + compare.vm +  "'")
427

428
            resultPrinter.add_row({ 'Test': main.name, 'Min': "", 'Average': "FAILED", 'StdDev%': "", 'Driver': compare.shortVm, 'Speedup': "", 'Significance': "", 'P(T<=t)': "" })
429

430
            if influxReporter != None:
431
                influxReporter.report_result(subdir, main.filename, main.filename, "FAILED", 0.0, 0.0, 0.0, 0.0, compare.shortVm, compare.vm)
432

433
            if arguments.speedup:
434
                plotValueLists[0].pop()
435
                plotValueLists[0].append(0)
436

437
                plotConfIntLists[0].pop()
438
                plotConfIntLists[0].append(0)
439
            else:
440
                plotValueLists[index].append(0)
441
                plotConfIntLists[index].append(0)
442

443
            continue
444

445
        if main.count > 1 and stats:
446
            pooledStdDev = math.sqrt((main.unbiasedEst + compare.unbiasedEst) / 2)
447

448
            tStat = abs(main.avg - compare.avg) / (pooledStdDev * math.sqrt(2 / main.count))
449
            degreesOfFreedom = 2 * main.count - 2
450

451
            # Two-tailed distribution with 95% conf.
452
            tCritical = stats.t.ppf(1 - 0.05 / 2, degreesOfFreedom)
453

454
            noSignificantDifference = tStat < tCritical
455
            pValue = 2 * (1 - stats.t.cdf(tStat, df = degreesOfFreedom))
456
        else:
457
            noSignificantDifference = None
458
            pValue = -1
459

460
        if noSignificantDifference is None:
461
            verdict = ""
462
        elif noSignificantDifference:
463
            verdict = "likely same"
464
        elif main.avg < compare.avg:
465
            verdict = "likely worse"
466
        else:
467
            verdict = "likely better"
468

469
        speedup = (plotValueLists[0][-1] / (compare.avg * scale) - 1)
470
        speedupColor = Color.YELLOW if speedup < 0 and noSignificantDifference else Color.RED if speedup < 0 else Color.GREEN if speedup > 0 else Color.YELLOW
471

472
        resultPrinter.add_row({
473
            'Test': main.name,
474
            'Min': '{:8.3f}ms'.format(compare.min),
475
            'Average': '{:8.3f}ms'.format(compare.avg),
476
            'StdDev%': '{:8.3f}%'.format(compare.sampleConfidenceInterval / compare.avg * 100),
477
            'Driver': compare.shortVm,
478
            'Speedup': colored(speedupColor, '{:8.3f}%'.format(speedup * 100)),
479
            'Significance': verdict,
480
            'P(T<=t)': '---' if pValue < 0 else '{:.0f}%'.format(pValue * 100)
481
        })
482

483
        print(colored(Color.GREEN, 'SUCCESS') + ': {:<40}'.format(main.name) + ": " + '{:8.3f}'.format(compare.avg) + "ms +/- " +
484
            '{:6.3f}'.format(compare.sampleConfidenceInterval / compare.avg * 100) + "% on " + compare.shortVm +
485
            ' ({:+7.3f}%, '.format(speedup * 100) + verdict + ")")
486

487
        if influxReporter != None:
488
            influxReporter.report_result(subdir, main.name, main.filename, "SUCCESS", compare.min, compare.avg, compare.max, compare.sampleConfidenceInterval, compare.shortVm, compare.vm)
489

490
        if arguments.speedup:
491
            oldValue = plotValueLists[0].pop()
492
            newValue = compare.avg
493

494
            plotValueLists[0].append((oldValue / newValue - 1) * 100)
495

496
            plotConfIntLists[0].pop()
497
            plotConfIntLists[0].append(0)
498
        else:
499
            plotValueLists[index].append(compare.avg * scale)
500
            plotConfIntLists[index].append(compare.sampleConfidenceInterval * scale)
501

502
        vmTotalMin[index] += compare.min
503
        vmTotalAverage[index] += compare.avg
504
        vmTotalImprovement[index] += math.log(main.avg / compare.avg)
505
        vmTotalResults[index] += 1
506

507
def runTest(subdir, filename, filepath):
508
    filepath = os.path.abspath(filepath)
509

510
    mainVm = os.path.abspath(arguments.vm)
511

512
    # Process output will contain the test name and execution times
513
    mainOutput = getVmOutput(substituteArguments(mainVm, getExtraArguments(filepath)) + " " + filepath)
514
    mainResultSet = extractResults(filename, mainVm, mainOutput, False)
515

516
    if len(mainResultSet) == 0:
517
        print(colored(Color.RED, 'FAILED') + ":  '" + filepath + "' on '" + mainVm +  "'")
518

519
        if arguments.vmNext != None:
520
            resultPrinter.add_row({ 'Test': filepath, 'Min': "", 'Average': "FAILED", 'StdDev%': "", 'Driver': getShortVmName(mainVm), 'Speedup': "", 'Significance': "", 'P(T<=t)': "" })
521
        else:
522
            resultPrinter.add_row({ 'Test': filepath, 'Min': "", 'Average': "FAILED", 'StdDev%': "", 'Driver': getShortVmName(mainVm) })
523

524
        if influxReporter != None:
525
            influxReporter.report_result(subdir, filename, filename, "FAILED", 0.0, 0.0, 0.0, 0.0, getShortVmName(mainVm), mainVm)
526
        return
527

528
    compareResultSets = []
529

530
    if arguments.vmNext != None:
531
        for compareVm in arguments.vmNext:
532
            compareVm = os.path.abspath(compareVm)
533

534
            compareOutput = getVmOutput(substituteArguments(compareVm, getExtraArguments(filepath)) + " " + filepath)
535
            compareResultSet = extractResults(filename, compareVm, compareOutput, True)
536

537
            compareResultSets.append(compareResultSet)
538

539
    if arguments.extra_loops > 0:
540
        # get more results
541
        for i in range(arguments.extra_loops):
542
            extraMainOutput = getVmOutput(substituteArguments(mainVm, getExtraArguments(filepath)) + " " + filepath)
543
            extraMainResultSet = extractResults(filename, mainVm, extraMainOutput, False)
544

545
            mergeResults(mainResultSet, extraMainResultSet)
546

547
            if arguments.vmNext != None:
548
                i = 0
549
                for compareVm in arguments.vmNext:
550
                    compareVm = os.path.abspath(compareVm)
551

552
                    extraCompareOutput = getVmOutput(substituteArguments(compareVm, getExtraArguments(filepath)) + " " + filepath)
553
                    extraCompareResultSet = extractResults(filename, compareVm, extraCompareOutput, True)
554

555
                    mergeResults(compareResultSets[i], extraCompareResultSet)
556
                    i += 1
557

558
    # finalize results
559
    for result in mainResultSet:
560
        finalizeResult(result)
561

562
    for compareResultSet in compareResultSets:
563
        for result in compareResultSet:
564
            finalizeResult(result)
565

566
    # analyze results
567
    for i in range(len(mainResultSet)):
568
        mainResult = mainResultSet[i]
569
        compareResults = []
570

571
        for el in compareResultSets:
572
            if i < len(el):
573
                compareResults.append(el[i])
574
            else:
575
                noResult = TestResult()
576

577
                noResult.filename = el[0].filename
578
                noResult.vm = el[0].vm
579
                noResult.shortVm = el[0].shortVm
580

581
                compareResults.append(noResult)
582

583
        analyzeResult(subdir, mainResult, compareResults)
584

585
        mergedResults = []
586
        mergedResults.append(mainResult)
587

588
        for el in compareResults:
589
            mergedResults.append(el)
590

591
        allResults.append(mergedResults)
592

593
def rearrangeSortKeyForComparison(e):
594
    if plotValueLists[1][e] == 0:
595
        return 1
596

597
    return plotValueLists[0][e] / plotValueLists[1][e]
598

599
def rearrangeSortKeyForSpeedup(e):
600
    return plotValueLists[0][e]
601

602
def rearrangeSortKeyDescending(e):
603
    return -plotValueLists[0][e]
604

605
# Re-arrange results from worst to best
606
def rearrange(key):
607
    global plotLabels
608

609
    index = arrayRange(len(plotLabels))
610
    index = sorted(index, key=key)
611

612
    # Recreate value lists in sorted order
613
    plotLabelsPrev = plotLabels
614
    plotLabels = []
615

616
    for i in index:
617
        plotLabels.append(plotLabelsPrev[i])
618

619
    for group in range(len(plotValueLists)):
620
        plotValueListPrev = plotValueLists[group]
621
        plotValueLists[group] = []
622

623
        plotConfIntListPrev = plotConfIntLists[group]
624
        plotConfIntLists[group] = []
625

626
        for i in index:
627
            plotValueLists[group].append(plotValueListPrev[i])
628
            plotConfIntLists[group].append(plotConfIntListPrev[i])
629

630
# Graph
631
def graph():
632
    if len(plotValueLists) == 0:
633
        print("No results")
634
        return
635

636
    ind = arrayRange(len(plotLabels))
637
    width = 0.8 / len(plotValueLists)
638

639
    if arguments.graph_vertical:
640
        # Extend graph width when we have a lot of tests to draw
641
        barcount = len(plotValueLists[0])
642
        plt.figure(figsize=(max(8, barcount * 0.3), 8))
643
    else:
644
        # Extend graph height when we have a lot of tests to draw
645
        barcount = len(plotValueLists[0])
646
        plt.figure(figsize=(8, max(8, barcount * 0.3)))
647

648
    plotBars = []
649

650
    matplotlib.rc('xtick', labelsize=10)
651
    matplotlib.rc('ytick', labelsize=10)
652

653
    if arguments.graph_vertical:
654
        # Draw Y grid behind the bars
655
        plt.rc('axes', axisbelow=True)
656
        plt.grid(True, 'major', 'y')
657

658
        for i in range(len(plotValueLists)):
659
            bar = plt.bar(arrayRangeOffset(len(plotLabels), i * width), plotValueLists[i], width, yerr=plotConfIntLists[i])
660
            plotBars.append(bar[0])
661

662
        if arguments.absolute:
663
            plt.ylabel('Time (ms)')
664
        elif arguments.speedup:
665
            plt.ylabel('Speedup (%)')
666
        else:
667
            plt.ylabel('Relative time (%)')
668

669
        plt.title('Benchmark')
670
        plt.xticks(ind, plotLabels, rotation='vertical')
671
    else:
672
        # Draw X grid behind the bars
673
        plt.rc('axes', axisbelow=True)
674
        plt.grid(True, 'major', 'x')
675

676
        for i in range(len(plotValueLists)):
677
            bar = plt.barh(arrayRangeOffset(len(plotLabels), i * width), plotValueLists[i], width, xerr=plotConfIntLists[i])
678
            plotBars.append(bar[0])
679

680
        if arguments.absolute:
681
            plt.xlabel('Time (ms)')
682
        elif arguments.speedup:
683
            plt.xlabel('Speedup (%)')
684
        else:
685
            plt.xlabel('Relative time (%)')
686

687
        plt.title('Benchmark')
688
        plt.yticks(ind, plotLabels)
689

690
        plt.gca().invert_yaxis()
691

692
    plt.legend(plotBars, plotLegend)
693

694
    plt.tight_layout()
695

696
    plt.savefig(arguments.filename + ".png", dpi=200)
697

698
    if arguments.window:
699
        plt.show()
700

701
def addTotalsToTable():
702
    if len(vmTotalMin) == 0:
703
        return
704

705
    if arguments.vmNext != None:
706
        index = 0
707

708
        resultPrinter.add_row({
709
            'Test': 'Total',
710
            'Min': '{:8.3f}ms'.format(vmTotalMin[index]),
711
            'Average': '{:8.3f}ms'.format(vmTotalAverage[index]),
712
            'StdDev%': "---",
713
            'Driver': getShortVmName(os.path.abspath(arguments.vm)),
714
            'Speedup': "",
715
            'Significance': "",
716
            'P(T<=t)': ""
717
        })
718

719
        for compareVm in arguments.vmNext:
720
            index = index + 1
721

722
            speedup = vmTotalAverage[0] / vmTotalAverage[index] * 100 - 100
723

724
            resultPrinter.add_row({
725
                'Test': 'Total',
726
                'Min': '{:8.3f}ms'.format(vmTotalMin[index]),
727
                'Average': '{:8.3f}ms'.format(vmTotalAverage[index]),
728
                'StdDev%': "---",
729
                'Driver': getShortVmName(os.path.abspath(compareVm)),
730
                'Speedup': colored(Color.RED if speedup < 0 else Color.GREEN if speedup > 0 else Color.YELLOW, '{:8.3f}%'.format(speedup)),
731
                'Significance': "",
732
                'P(T<=t)': ""
733
            })
734
    else:
735
        resultPrinter.add_row({
736
            'Test': 'Total',
737
            'Min': '{:8.3f}ms'.format(vmTotalMin[0]),
738
            'Average': '{:8.3f}ms'.format(vmTotalAverage[0]),
739
            'StdDev%': "---",
740
            'Driver': getShortVmName(os.path.abspath(arguments.vm))
741
        })
742

743
def writeResultsToFile():
744
    class TestResultEncoder(json.JSONEncoder):
745
        def default(self, obj):
746
            if isinstance(obj, TestResult):
747
                return [obj.filename, obj.vm, obj.shortVm, obj.name, obj.values, obj.count]
748
            return json.JSONEncoder.default(self, obj)
749

750
    try:
751
        with open(arguments.filename + ".json", "w") as allResultsFile:
752
            allResultsFile.write(json.dumps(allResults, cls=TestResultEncoder))
753
    except:
754
        print("Failed to write results to a file")
755

756
def run(args, argsubcb):
757
    global arguments, resultPrinter, influxReporter, argumentSubstituionCallback, allResults
758
    arguments = args
759
    argumentSubstituionCallback = argsubcb
760

761
    if os.name == "nt" and arguments.callgrind:
762
        print(f"{colored(Color.RED, 'ERROR')}: --callgrind is not supported on Windows.  Please consider using this option on another OS, or Linux using WSL.")
763
        sys.exit(1)
764

765
    if arguments.report_metrics or arguments.print_influx_debugging:
766
        import influxbench
767
        influxReporter = influxbench.InfluxReporter(arguments)
768
    else:
769
        influxReporter = None
770

771
    if matplotlib == None:
772
        arguments.absolute = 0
773
        arguments.speedup = 0
774
        arguments.sort = 0
775
        arguments.window = 0
776

777
    # Load results from files
778
    if arguments.results != None:
779
        vmList = []
780

781
        for result in arguments.results:
782
            with open(result) as resultsFile:
783
                resultArray = json.load(resultsFile)
784

785
            for test in resultArray:
786
                for i in range(len(test)):
787
                    arr = test[i]
788

789
                    tr = TestResult()
790

791
                    tr.filename = arr[0]
792
                    tr.vm = arr[1]
793
                    tr.shortVm = arr[2]
794
                    tr.name = arr[3]
795
                    tr.values = arr[4]
796
                    tr.count = arr[5]
797

798
                    test[i] = tr
799

800
            for test in resultArray[0]:
801
                if vmList.count(test.vm) > 0:
802
                    pointPos = result.rfind(".")
803

804
                    if pointPos != -1:
805
                        vmList.append(test.vm + " [" + result[0:pointPos] + "]")
806
                    else:
807
                        vmList.append(test.vm + " [" + result + "]")
808
                else:
809
                    vmList.append(test.vm)
810

811
            if len(allResults) == 0:
812
                allResults = resultArray
813
            else:
814
                for prevEl in allResults:
815
                    found = False
816

817
                    for nextEl in resultArray:
818
                        if nextEl[0].filename == prevEl[0].filename and nextEl[0].name == prevEl[0].name:
819
                            for run in nextEl:
820
                                prevEl.append(run)
821
                            found = True
822

823
                    if not found:
824
                        el = resultArray[0]
825

826
                        for run in el:
827
                            result = TestResult()
828

829
                            result.filename = run.filename
830
                            result.vm = run.vm
831
                            result.shortVm = run.shortVm
832
                            result.name = run.name
833

834
                            prevEl.append(result)
835

836
        arguments.vmNext = []
837

838
        for i in range(len(vmList)):
839
            if i == 0:
840
                arguments.vm = vmList[i]
841
            else:
842
                arguments.vmNext.append(vmList[i])
843

844
    plotLegend.append(getShortVmName(arguments.vm))
845

846
    if arguments.vmNext != None:
847
        for compareVm in arguments.vmNext:
848
            plotLegend.append(getShortVmName(compareVm))
849
    else:
850
        arguments.absolute = 1 # When looking at one VM, I feel that relative graph doesn't make a lot of sense
851

852
    # Results table formatting
853
    if arguments.vmNext != None:
854
        resultPrinter = TablePrinter([
855
            {'label': 'Test', 'align': Alignment.LEFT},
856
            {'label': 'Min', 'align': Alignment.RIGHT},
857
            {'label': 'Average', 'align': Alignment.RIGHT},
858
            {'label': 'StdDev%', 'align': Alignment.RIGHT},
859
            {'label': 'Driver', 'align': Alignment.LEFT},
860
            {'label': 'Speedup', 'align': Alignment.RIGHT},
861
            {'label': 'Significance', 'align': Alignment.LEFT},
862
            {'label': 'P(T<=t)', 'align': Alignment.RIGHT}
863
        ])
864
    else:
865
        resultPrinter = TablePrinter([
866
            {'label': 'Test', 'align': Alignment.LEFT},
867
            {'label': 'Min', 'align': Alignment.RIGHT},
868
            {'label': 'Average', 'align': Alignment.RIGHT},
869
            {'label': 'StdDev%', 'align': Alignment.RIGHT},
870
            {'label': 'Driver', 'align': Alignment.LEFT}
871
        ])
872

873
    if arguments.results != None:
874
        for resultSet in allResults:
875
            # finalize results
876
            for result in resultSet:
877
                finalizeResult(result)
878

879
            # analyze results
880
            mainResult = resultSet[0]
881
            compareResults = []
882

883
            for i in range(len(resultSet)):
884
                if i != 0:
885
                    compareResults.append(resultSet[i])
886

887
            analyzeResult('', mainResult, compareResults)
888
    else:
889
        all_files = [subdir + os.sep + filename for subdir, dirs, files in os.walk(arguments.folder) for filename in files]
890
        for filepath in sorted(all_files):
891
            subdir, filename = os.path.split(filepath)
892
            if filename.endswith(".lua"):
893
                if arguments.run_test == None or re.match(arguments.run_test, filename[:-4]):
894
                    runTest(subdir, filename, filepath)
895

896
    if arguments.sort and len(plotValueLists) > 1:
897
        rearrange(rearrangeSortKeyForComparison)
898
    elif arguments.sort and len(plotValueLists) == 1:
899
        rearrange(rearrangeSortKeyDescending)
900
    elif arguments.speedup:
901
        rearrange(rearrangeSortKeyForSpeedup)
902

903
        plotLegend[0] = arguments.vm + " vs " + arguments.vmNext[0]
904

905
    if arguments.print_final_summary:
906
        addTotalsToTable()
907

908
        print()
909
        print(colored(Color.YELLOW, '==================================================RESULTS=================================================='))
910
        resultPrinter.print(summary=False)
911
        print(colored(Color.YELLOW, '---'))
912

913
    if len(vmTotalMin) != 0 and arguments.vmNext != None:
914
        index = 0
915

916
        for compareVm in arguments.vmNext:
917
            index = index + 1
918

919
            name = getShortVmName(os.path.abspath(compareVm))
920
            deltaGeoMean = math.exp(vmTotalImprovement[index] / vmTotalResults[index]) * 100 - 100
921

922
            if deltaGeoMean > 0:
923
                print("'{}' change is {:.3f}% positive on average".format(name, deltaGeoMean))
924
            else:
925
                print("'{}' change is {:.3f}% negative on average".format(name, deltaGeoMean))
926

927
    if matplotlib != None:
928
        graph()
929

930
    writeResultsToFile()
931

932
    if influxReporter != None:
933
        influxReporter.report_result(arguments.folder, "Total", "all", "SUCCESS", mainTotalMin, mainTotalAverage, mainTotalMax, 0.0, getShortVmName(arguments.vm), os.path.abspath(arguments.vm))
934
        influxReporter.flush(0)
935

936

937
if __name__ == "__main__":
938
    arguments = argumentParser.parse_args()
939
    run(arguments, None)
940

941
Product

Resources

Company