Path: blob/main_old/src/tests/run_perf_tests.py
1693 views
#! /usr/bin/env vpython1#2# Copyright 2021 The ANGLE Project Authors. All rights reserved.3# Use of this source code is governed by a BSD-style license that can be4# found in the LICENSE file.5#6# run_perf_test.py:7# Runs ANGLE perf tests using some statistical averaging.89import argparse10import fnmatch11import json12import logging13import time14import os15import re16import sys1718# Add //src/testing into sys.path for importing xvfb and test_env, and19# //src/testing/scripts for importing common.20d = os.path.dirname21THIS_DIR = d(os.path.abspath(__file__))22ANGLE_DIR = d(d(THIS_DIR))23sys.path.append(os.path.join(ANGLE_DIR, 'testing'))24sys.path.append(os.path.join(ANGLE_DIR, 'testing', 'scripts'))2526import common27import test_env28import xvfb2930sys.path.append(os.path.join(ANGLE_DIR, 'third_party', 'catapult', 'tracing'))31from tracing.value import histogram32from tracing.value import histogram_set33from tracing.value import merge_histograms3435DEFAULT_TEST_SUITE = 'angle_perftests'36DEFAULT_LOG = 'info'37DEFAULT_SAMPLES = 538DEFAULT_TRIALS = 339DEFAULT_MAX_ERRORS = 340DEFAULT_WARMUP_LOOPS = 341DEFAULT_CALIBRATION_TIME = 34243# Filters out stuff like: " I 72.572s run_tests_on_device(96071FFAZ00096) "44ANDROID_LOGGING_PREFIX = r'I +\d+.\d+s \w+\(\w+\) '4546# Test expectations47FAIL = 'FAIL'48PASS = 'PASS'49SKIP = 'SKIP'505152def is_windows():53return sys.platform == 'cygwin' or sys.platform.startswith('win')545556def get_binary_name(binary):57if is_windows():58return '.\\%s.exe' % binary59else:60return './%s' % binary616263def _run_and_get_output(args, cmd, env):64lines = []65with common.temporary_file() as tempfile_path:66if args.xvfb:67ret = xvfb.run_executable(cmd, env, stdoutfile=tempfile_path)68else:69ret = test_env.run_command_with_output(cmd, env=env, stdoutfile=tempfile_path)70if ret:71logging.error('Error running test suite.')72return None73with open(tempfile_path) as f:74for line in f:75lines.append(line.strip())76return lines777879def _filter_tests(tests, pattern):80return [test for test in tests if fnmatch.fnmatch(test, pattern)]818283def _shard_tests(tests, shard_count, shard_index):84return [tests[index] for index in range(shard_index, len(tests), shard_count)]858687def _get_results_from_output(output, result):88output = '\n'.join(output)89m = re.search(r'Running (\d+) tests', output)90if m and int(m.group(1)) > 1:91raise Exception('Found more than one test result in output')9293# Results are reported in the format:94# name_backend.result: story= value units.95pattern = r'\.' + result + r':.*= ([0-9.]+)'96logging.debug('Searching for %s in output' % pattern)97m = re.findall(pattern, output)98if not m:99logging.warning('Did not find the result "%s" in the test output.' % result)100return None101102return [float(value) for value in m]103104105def _get_tests_from_output(lines):106seen_start_of_tests = False107tests = []108android_prefix = re.compile(ANDROID_LOGGING_PREFIX)109logging.debug('Read %d lines from test output.' % len(lines))110for line in lines:111line = android_prefix.sub('', line.strip())112if line == 'Tests list:':113seen_start_of_tests = True114elif line == 'End tests list.':115break116elif seen_start_of_tests:117tests.append(line)118if not seen_start_of_tests:119raise Exception('Did not find test list in test output!')120logging.debug('Found %d tests from test output.' % len(tests))121return tests122123124def _truncated_list(data, n):125"""Compute a truncated list, n is truncation size"""126if len(data) < n * 2:127raise ValueError('list not large enough to truncate')128return sorted(data)[n:-n]129130131def _mean(data):132"""Return the sample arithmetic mean of data."""133n = len(data)134if n < 1:135raise ValueError('mean requires at least one data point')136return float(sum(data)) / float(n) # in Python 2 use sum(data)/float(n)137138139def _sum_of_square_deviations(data, c):140"""Return sum of square deviations of sequence data."""141ss = sum((float(x) - c)**2 for x in data)142return ss143144145def _coefficient_of_variation(data):146"""Calculates the population coefficient of variation."""147n = len(data)148if n < 2:149raise ValueError('variance requires at least two data points')150c = _mean(data)151ss = _sum_of_square_deviations(data, c)152pvar = ss / n # the population variance153stddev = (pvar**0.5) # population standard deviation154return stddev / c155156157def _save_extra_output_files(args, test_results, histograms):158isolated_out_dir = os.path.dirname(args.isolated_script_test_output)159if not os.path.isdir(isolated_out_dir):160return161benchmark_path = os.path.join(isolated_out_dir, args.test_suite)162if not os.path.isdir(benchmark_path):163os.makedirs(benchmark_path)164test_output_path = os.path.join(benchmark_path, 'test_results.json')165logging.info('Saving test results to %s.' % test_output_path)166with open(test_output_path, 'w') as out_file:167out_file.write(json.dumps(test_results, indent=2))168perf_output_path = os.path.join(benchmark_path, 'perf_results.json')169logging.info('Saving perf histograms to %s.' % perf_output_path)170with open(perf_output_path, 'w') as out_file:171out_file.write(json.dumps(histograms.AsDicts(), indent=2))172173174def main():175parser = argparse.ArgumentParser()176parser.add_argument('--isolated-script-test-output', type=str)177parser.add_argument('--isolated-script-test-perf-output', type=str)178parser.add_argument(179'-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.')180parser.add_argument('--test-suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE)181parser.add_argument('--xvfb', help='Use xvfb.', action='store_true')182parser.add_argument(183'--shard-count',184help='Number of shards for test splitting. Default is 1.',185type=int,186default=1)187parser.add_argument(188'--shard-index',189help='Index of the current shard for test splitting. Default is 0.',190type=int,191default=0)192parser.add_argument(193'-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG)194parser.add_argument(195'-s',196'--samples-per-test',197help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES,198type=int,199default=DEFAULT_SAMPLES)200parser.add_argument(201'-t',202'--trials-per-sample',203help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS,204type=int,205default=DEFAULT_TRIALS)206parser.add_argument(207'--steps-per-trial', help='Fixed number of steps to run per trial.', type=int)208parser.add_argument(209'--max-errors',210help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS,211type=int,212default=DEFAULT_MAX_ERRORS)213parser.add_argument(214'--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true')215parser.add_argument(216'--warmup-loops',217help='Number of warmup loops to run in the perf test. Default is %d.' %218DEFAULT_WARMUP_LOOPS,219type=int,220default=DEFAULT_WARMUP_LOOPS)221parser.add_argument(222'--calibration-time',223help='Amount of time to spend each loop in calibration and warmup. Default is %d seconds.'224% DEFAULT_CALIBRATION_TIME,225type=int,226default=DEFAULT_CALIBRATION_TIME)227228args, extra_flags = parser.parse_known_args()229logging.basicConfig(level=args.log.upper(), stream=sys.stdout)230231start_time = time.time()232233# Use fast execution for smoke test mode.234if args.smoke_test_mode:235args.steps_per_trial = 1236args.trials_per_sample = 1237args.samples_per_test = 1238239env = os.environ.copy()240241# Get sharding args242if 'GTEST_TOTAL_SHARDS' in env and int(env['GTEST_TOTAL_SHARDS']) != 1:243if 'GTEST_SHARD_INDEX' not in env:244logging.error('Sharding params must be specified together.')245sys.exit(1)246args.shard_count = int(env.pop('GTEST_TOTAL_SHARDS'))247args.shard_index = int(env.pop('GTEST_SHARD_INDEX'))248249# Get test list250cmd = [get_binary_name(args.test_suite), '--list-tests', '--verbose']251lines = _run_and_get_output(args, cmd, env)252if not lines:253raise Exception('Could not find test list from test output.')254tests = _get_tests_from_output(lines)255256if args.filter:257tests = _filter_tests(tests, args.filter)258259# Get tests for this shard (if using sharding args)260tests = _shard_tests(tests, args.shard_count, args.shard_index)261262# Run tests263results = {264'tests': {},265'interrupted': False,266'seconds_since_epoch': time.time(),267'path_delimiter': '.',268'version': 3,269'num_failures_by_type': {270FAIL: 0,271PASS: 0,272SKIP: 0,273},274}275276test_results = {}277histograms = histogram_set.HistogramSet()278total_errors = 0279280for test in tests:281cmd = [282get_binary_name(args.test_suite),283'--gtest_filter=%s' % test,284'--extract-test-list-from-filter',285'--enable-device-cache',286'--skip-clear-data',287'--use-existing-test-data',288'--verbose',289'--calibration-time',290str(args.calibration_time),291]292if args.steps_per_trial:293steps_per_trial = args.steps_per_trial294else:295cmd_calibrate = cmd + [296'--calibration',297'--warmup-loops',298str(args.warmup_loops),299]300calibrate_output = _run_and_get_output(args, cmd_calibrate, env)301if not calibrate_output:302logging.error('Failed to get calibration output')303test_results[test] = {'expected': PASS, 'actual': FAIL, 'is_unexpected': True}304results['num_failures_by_type'][FAIL] += 1305total_errors += 1306continue307steps_per_trial = _get_results_from_output(calibrate_output, 'steps_to_run')308if not steps_per_trial:309logging.warning('Skipping test %s' % test)310continue311assert (len(steps_per_trial) == 1)312steps_per_trial = int(steps_per_trial[0])313logging.info('Running %s %d times with %d trials and %d steps per trial.' %314(test, args.samples_per_test, args.trials_per_sample, steps_per_trial))315wall_times = []316test_histogram_set = histogram_set.HistogramSet()317for sample in range(args.samples_per_test):318if total_errors >= args.max_errors:319logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors)320return 1321322cmd_run = cmd + [323'--steps-per-trial',324str(steps_per_trial),325'--trials',326str(args.trials_per_sample),327]328if args.smoke_test_mode:329cmd_run += ['--no-warmup']330else:331cmd_run += ['--warmup-loops', str(args.warmup_loops)]332with common.temporary_file() as histogram_file_path:333cmd_run += ['--isolated-script-test-perf-output=%s' % histogram_file_path]334output = _run_and_get_output(args, cmd_run, env)335if output:336sample_wall_times = _get_results_from_output(output, 'wall_time')337if not sample_wall_times:338logging.warning('Test %s failed to produce a sample output' % test)339break340logging.info('Sample %d wall_time results: %s' %341(sample, str(sample_wall_times)))342wall_times += sample_wall_times343with open(histogram_file_path) as histogram_file:344sample_json = json.load(histogram_file)345sample_histogram = histogram_set.HistogramSet()346sample_histogram.ImportDicts(sample_json)347test_histogram_set.Merge(sample_histogram)348else:349logging.error('Failed to get sample for test %s' % test)350total_errors += 1351352if not wall_times:353logging.warning('Skipping test %s. Assuming this is intentional.' % test)354test_results[test] = {'expected': SKIP, 'actual': SKIP}355results['num_failures_by_type'][SKIP] += 1356elif len(wall_times) == (args.samples_per_test * args.trials_per_sample):357if len(wall_times) > 7:358truncation_n = len(wall_times) >> 3359logging.info(360'Truncation: Removing the %d highest and lowest times from wall_times.' %361truncation_n)362wall_times = _truncated_list(wall_times, truncation_n)363364if len(wall_times) > 1:365logging.info(366'Mean wall_time for %s is %.2f, with coefficient of variation %.2f%%' %367(test, _mean(wall_times), (_coefficient_of_variation(wall_times) * 100.0)))368test_results[test] = {'expected': PASS, 'actual': PASS}369results['num_failures_by_type'][PASS] += 1370371# Merge the histogram set into one histogram372with common.temporary_file() as merge_histogram_path:373logging.info('Writing merged histograms to %s.' % merge_histogram_path)374with open(merge_histogram_path, 'w') as merge_histogram_file:375json.dump(test_histogram_set.AsDicts(), merge_histogram_file)376merge_histogram_file.close()377merged_dicts = merge_histograms.MergeHistograms(378merge_histogram_path, groupby=['name'])379merged_histogram = histogram_set.HistogramSet()380merged_histogram.ImportDicts(merged_dicts)381histograms.Merge(merged_histogram)382else:383logging.error('Test %s failed to record some samples' % test)384test_results[test] = {'expected': PASS, 'actual': FAIL, 'is_unexpected': True}385results['num_failures_by_type'][FAIL] += 1386387if test_results:388results['tests'][args.test_suite] = test_results389390if args.isolated_script_test_output:391with open(args.isolated_script_test_output, 'w') as out_file:392out_file.write(json.dumps(results, indent=2))393394# Uses special output files to match the merge script.395_save_extra_output_files(args, results, histograms)396397if args.isolated_script_test_perf_output:398with open(args.isolated_script_test_perf_output, 'w') as out_file:399out_file.write(json.dumps(histograms.AsDicts(), indent=2))400401end_time = time.time()402logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time))403404return 0405406407# This is not really a "script test" so does not need to manually add408# any additional compile targets.409def main_compile_targets(args):410json.dump([], args.output)411412413if __name__ == '__main__':414# Conform minimally to the protocol defined by ScriptTest.415if 'compile_targets' in sys.argv:416funcs = {417'run': None,418'compile_targets': main_compile_targets,419}420sys.exit(common.run_script(sys.argv[1:], funcs))421sys.exit(main())422423424