Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/tools/flag-bisect.py
2723 views
1
#!/usr/bin/python3
2
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
3
4
import argparse
5
import asyncio
6
import copy
7
import json
8
import math
9
import os
10
import platform
11
import re
12
import subprocess
13
import sys
14
import textwrap
15
from enum import Enum
16
17
def add_parser(subparsers):
18
flag_bisect_command = subparsers.add_parser('flag-bisect',
19
help=help(),
20
description=help(),
21
epilog=epilog(),
22
formatter_class=argparse.RawDescriptionHelpFormatter,
23
)
24
25
add_argument_parsers(flag_bisect_command)
26
flag_bisect_command.set_defaults(func=flag_bisect_main)
27
return flag_bisect_command
28
29
def help():
30
return 'Search for a set of flags triggering the faulty behavior in unit tests'
31
32
def get_terminal_width():
33
try:
34
return os.get_terminal_size().columns
35
except:
36
# Return a reasonable default when a terminal is not available
37
return 80
38
def wrap_text(text, width):
39
leading_whitespace_re = re.compile('( *)')
40
41
def get_paragraphs_and_indent(string):
42
lines = string.split('\n')
43
result = ''
44
line_count = 0
45
initial_indent = ''
46
subsequent_indent = ''
47
for line in lines:
48
if len(line.strip()) == 0:
49
if line_count > 0:
50
yield result, initial_indent, subsequent_indent
51
result = ''
52
line_count = 0
53
else:
54
line_count += 1
55
if line_count == 1:
56
initial_indent = leading_whitespace_re.match(line).group(1)
57
subsequent_indent = initial_indent
58
elif line_count == 2:
59
subsequent_indent = leading_whitespace_re.match(line).group(1)
60
result += line.strip() + '\n'
61
62
result = ''
63
for paragraph, initial_indent, subsequent_indent in get_paragraphs_and_indent(text):
64
result += textwrap.fill(paragraph, width=width, initial_indent=initial_indent, subsequent_indent=subsequent_indent, break_on_hyphens=False) + '\n\n'
65
return result
66
67
def wrap_text_for_terminal(text):
68
right_margin = 2 # This margin matches what argparse uses when formatting argument documentation
69
min_width = 20
70
width = max(min_width, get_terminal_width() - right_margin)
71
return wrap_text(text, width)
72
73
def epilog():
74
return wrap_text_for_terminal('''
75
This tool uses the delta debugging algorithm to minimize the set of flags to the ones that are faulty in your unit tests,
76
and the usage is trivial. Just provide a path to the unit test and you're done, the tool will do the rest.
77
78
There are many use cases with flag-bisect. Included but not limited to:
79
80
1: If your test is failing when you omit `--fflags=true` but it works when passing `--fflags=true`, then you can
81
use this tool to find that set of flag requirements to see which flags are missing that will help to fix it. Ditto
82
for the opposite too, this tool is generalized for that case.
83
84
2: If you happen to run into a problem on production, and you're not sure which flags is the problem and you can easily
85
create a unit test, you can run flag-bisect on that unit test to rapidly find the set of flags.
86
87
3: If you have a flag that causes a performance regression, there's also the `--timeout=N` where `N` is in seconds.
88
89
4: If you have tests that are demonstrating flakiness behavior, you can also use `--tries=N` where `N` is the number of
90
attempts to run the same set of flags before moving on to the new set. This will eventually drill down to the flaky flag(s).
91
Generally 8 tries should be more than enough, but it depends on the rarity. The more rare it is, the higher the attempts count
92
needs to be. Note that this comes with a performance cost the higher you go, but certainly still faster than manual search.
93
This argument will disable parallel mode by default. If this is not desired, explicitly write `--parallel=on`.
94
95
5: By default flag-bisect runs in parallel mode which uses a slightly modified version of delta debugging algorithm to support
96
trying multiple sets of flags concurrently. This means that the number of sets the algorithm will try at once is equal to the
97
number of concurrent jobs. There is currently no upper bound to that, so heed this warning that your machine may slow down
98
significantly. In this mode, we display the number of jobs it is running in parallel. Use `--parallel=off` to disable parallel
99
mode.
100
101
Be aware that this introduces some level of *non-determinism*, and it is fundamental due to the interaction with flag dependencies
102
and the fact one job may finish faster than another job that got ran in the same cycle. However, it generally shouldn't matter
103
if your test is deterministic and has no implicit flag dependencies in the codebase.
104
105
The tool will try to automatically figure out which of `--pass` or `--fail` to use if you omit them or use `--auto` by applying
106
heuristics. For example, if the tests works using `--fflags=true` and crashes if omitting `--fflags=true`, then it knows
107
to use `--pass` to give you set of flags that will cause that crash. As usual, vice versa is also true. Since this is a
108
heuristic, if it gets that guess wrong, you can override with `--pass` or `--fail`.
109
110
You can speed this process up by scoping it to as few tests as possible, for example if you're using doctest then you'd
111
pass `--tc=my_test` as an argument after `--`, so `flag-bisect ./path/to/binary -- --tc=my_test`.
112
''')
113
114
class InterestnessMode(Enum):
115
AUTO = 0,
116
FAIL = 1,
117
PASS = 2,
118
119
def add_argument_parsers(parser):
120
parser.add_argument('binary_path', help='Path to the unit test binary that will be bisected for a set of flags')
121
122
parser.add_argument('--tries', dest='attempts', type=int, default=1, metavar='N',
123
help='If the tests are flaky, flag-bisect will try again with the same set by N amount of times before moving on')
124
125
parser.add_argument('--parallel', dest='parallel', choices=['on', 'off'], default='default',
126
help='Test multiple sets of flags in parallel, useful when the test takes a while to run.')
127
128
parser.add_argument('--explicit', dest='explicit', action='store_true', default=False, help='Explicitly set flags to false')
129
130
parser.add_argument('--filter', dest='filter', default=None, help='Regular expression to filter for a subset of flags to test')
131
132
parser.add_argument('--verbose', dest='verbose', action='store_true', default=False, help='Show stdout and stderr of the program being run')
133
134
interestness_parser = parser.add_mutually_exclusive_group()
135
interestness_parser.add_argument('--auto', dest='mode', action='store_const', const=InterestnessMode.AUTO,
136
default=InterestnessMode.AUTO, help='Automatically figure out which one of --pass or --fail should be used')
137
interestness_parser.add_argument('--fail', dest='mode', action='store_const', const=InterestnessMode.FAIL,
138
help='You want this if passing --fflags=true causes tests to fail')
139
interestness_parser.add_argument('--pass', dest='mode', action='store_const', const=InterestnessMode.PASS,
140
help='You want this if passing --fflags=true causes tests to pass')
141
interestness_parser.add_argument('--timeout', dest='timeout', type=int, default=0, metavar='SECONDS',
142
help='Find the flag(s) causing performance regression if time to run exceeds the timeout in seconds')
143
144
class Options:
145
def __init__(self, args, other_args, sense):
146
self.path = args.binary_path
147
self.explicit = args.explicit
148
self.sense = sense
149
self.timeout = args.timeout
150
self.interested_in_timeouts = args.timeout != 0
151
self.attempts = args.attempts
152
self.parallel = (args.parallel == 'on' or args.parallel == 'default') if args.attempts == 1 else args.parallel == 'on'
153
self.filter = re.compile(".*" + args.filter + ".*") if args.filter else None
154
self.verbose = args.verbose
155
self.other_args = [arg for arg in other_args if arg != '--'] # Useless to have -- here, discard.
156
157
def copy_with_sense(self, sense):
158
new_copy = copy.copy(self)
159
new_copy.sense = sense
160
return new_copy
161
162
class InterestnessResult(Enum):
163
FAIL = 0,
164
PASS = 1,
165
TIMED_OUT = 2,
166
167
class Progress:
168
def __init__(self, count, n_of_jobs=None):
169
self.count = count
170
self.steps = 0
171
self.n_of_jobs = n_of_jobs
172
self.buffer = None
173
174
def show(self):
175
# remaining is actually the height of the current search tree.
176
remain = int(math.log2(self.count))
177
flag_plural = 'flag' if self.count == 1 else 'flags'
178
node_plural = 'node' if remain == 1 else 'nodes'
179
jobs_info = f', running {self.n_of_jobs} jobs' if self.n_of_jobs is not None else ''
180
return f'flag bisection: testing {self.count} {flag_plural} (step {self.steps}, {remain} {node_plural} remain{jobs_info})'
181
182
def hide(self):
183
if self.buffer:
184
sys.stdout.write('\b \b' * len(self.buffer))
185
186
def update(self, len, n_of_jobs=None):
187
self.hide()
188
self.count = len
189
self.steps += 1
190
self.n_of_jobs = n_of_jobs
191
self.buffer = self.show()
192
sys.stdout.write(self.buffer)
193
sys.stdout.flush()
194
195
def list_fflags(options):
196
try:
197
out = subprocess.check_output([options.path, '--list-fflags'], encoding='UTF-8')
198
flag_names = []
199
200
# It's unlikely that a program we're going to test has no flags.
201
# So if the output doesn't start with FFlag, assume it doesn't support --list-fflags and therefore cannot be bisected.
202
if not out.startswith('FFlag') and not out.startswith('DFFlag') and not out.startswith('SFFlag'):
203
return None
204
205
flag_names = out.split('\n')[:-1]
206
207
subset = [flag for flag in flag_names if options.filter.match(flag) is not None] if options.filter else flag_names
208
return subset if subset else None
209
except:
210
return None
211
212
def mk_flags_argument(options, flags, initial_flags):
213
lst = [flag + '=true' for flag in flags]
214
215
# When --explicit is provided, we'd like to find the set of flags from initial_flags that's not in active flags.
216
# This is so that we can provide a =false value instead of leaving them out to be the default value.
217
if options.explicit:
218
for flag in initial_flags:
219
if flag not in flags:
220
lst.append(flag + '=false')
221
222
return '--fflags=' + ','.join(lst)
223
224
def mk_command_line(options, flags_argument):
225
arguments = [options.path, *options.other_args]
226
if flags_argument is not None:
227
arguments.append(flags_argument)
228
return arguments
229
230
async def get_interestness(options, flags_argument):
231
try:
232
timeout = options.timeout if options.interested_in_timeouts else None
233
cmd = mk_command_line(options, flags_argument)
234
stdout = subprocess.PIPE if not options.verbose else None
235
stderr = subprocess.PIPE if not options.verbose else None
236
process = subprocess.run(cmd, stdout=stdout, stderr=stderr, timeout=timeout)
237
return InterestnessResult.PASS if process.returncode == 0 else InterestnessResult.FAIL
238
except subprocess.TimeoutExpired:
239
return InterestnessResult.TIMED_OUT
240
241
async def is_hot(options, flags_argument, pred=any):
242
results = await asyncio.gather(*[get_interestness(options, flags_argument) for _ in range(options.attempts)])
243
244
if options.interested_in_timeouts:
245
return pred([InterestnessResult.TIMED_OUT == x for x in results])
246
else:
247
return pred([(InterestnessResult.PASS if options.sense else InterestnessResult.FAIL) == x for x in results])
248
249
def pairwise_disjoints(flags, granularity):
250
offset = 0
251
per_slice_len = len(flags) // granularity
252
while offset < len(flags):
253
yield flags[offset:offset + per_slice_len]
254
offset += per_slice_len
255
256
def subsets_and_complements(flags, granularity):
257
for disjoint_set in pairwise_disjoints(flags, granularity):
258
yield disjoint_set, [flag for flag in flags if flag not in disjoint_set]
259
260
# https://www.cs.purdue.edu/homes/xyzhang/fall07/Papers/delta-debugging.pdf
261
async def ddmin(options, initial_flags):
262
current = initial_flags
263
granularity = 2
264
265
progress = Progress(len(current))
266
progress.update(len(current))
267
268
while len(current) >= 2:
269
changed = False
270
271
for (subset, complement) in subsets_and_complements(current, granularity):
272
progress.update(len(current))
273
if await is_hot(options, mk_flags_argument(options, complement, initial_flags)):
274
current = complement
275
granularity = max(granularity - 1, 2)
276
changed = True
277
break
278
elif await is_hot(options, mk_flags_argument(options, subset, initial_flags)):
279
current = subset
280
granularity = 2
281
changed = True
282
break
283
284
if not changed:
285
if granularity == len(current):
286
break
287
granularity = min(granularity * 2, len(current))
288
289
progress.hide()
290
return current
291
292
async def ddmin_parallel(options, initial_flags):
293
current = initial_flags
294
granularity = 2
295
296
progress = Progress(len(current))
297
progress.update(len(current), granularity)
298
299
while len(current) >= 2:
300
changed = False
301
302
subset_jobs = []
303
complement_jobs = []
304
305
def advance(task):
306
nonlocal current
307
nonlocal granularity
308
nonlocal changed
309
# task.cancel() calls the callback passed to add_done_callback...
310
if task.cancelled():
311
return
312
hot, new_delta, new_granularity = task.result()
313
if hot and not changed:
314
current = new_delta
315
granularity = new_granularity
316
changed = True
317
for job in subset_jobs:
318
job.cancel()
319
for job in complement_jobs:
320
job.cancel()
321
322
for (subset, complement) in subsets_and_complements(current, granularity):
323
async def work(flags, new_granularity):
324
hot = await is_hot(options, mk_flags_argument(options, flags, initial_flags))
325
return (hot, flags, new_granularity)
326
327
# We want to run subset jobs in parallel first.
328
subset_job = asyncio.create_task(work(subset, 2))
329
subset_job.add_done_callback(advance)
330
subset_jobs.append(subset_job)
331
332
# Then the complements afterwards, but only if we didn't find a new subset.
333
complement_job = asyncio.create_task(work(complement, max(granularity - 1, 2)))
334
complement_job.add_done_callback(advance)
335
complement_jobs.append(complement_job)
336
337
# When we cancel jobs, the asyncio.gather will be waiting pointlessly.
338
# In that case, we'd like to return the control to this routine.
339
await asyncio.gather(*subset_jobs, return_exceptions=True)
340
if not changed:
341
await asyncio.gather(*complement_jobs, return_exceptions=True)
342
progress.update(len(current), granularity)
343
344
if not changed:
345
if granularity == len(current):
346
break
347
granularity = min(granularity * 2, len(current))
348
349
progress.hide()
350
return current
351
352
def search(options, initial_flags):
353
if options.parallel:
354
return ddmin_parallel(options, initial_flags)
355
else:
356
return ddmin(options, initial_flags)
357
358
async def do_work(args, other_args):
359
sense = None
360
361
# If --timeout isn't used, try to apply a heuristic to figure out which of --pass or --fail we want.
362
if args.timeout == 0 and args.mode == InterestnessMode.AUTO:
363
inner_options = Options(args, other_args, sense)
364
365
# We aren't interested in timeout for this heuristic. It just makes no sense to assume timeouts.
366
# This actually cannot happen by this point, but if we make timeout a non-exclusive switch to --auto, this will go wrong.
367
inner_options.timeout = 0
368
inner_options.interested_in_timeouts = False
369
370
all_tasks = asyncio.gather(
371
is_hot(inner_options.copy_with_sense(True), '--fflags=true', all),
372
is_hot(inner_options.copy_with_sense(False), '--fflags=false' if inner_options.explicit else None, all),
373
)
374
375
# If it times out, we can print a message saying that this is still working. We intentionally want to continue doing work.
376
done, pending = await asyncio.wait([all_tasks], timeout=1.5)
377
if all_tasks not in done:
378
print('Hang on! I\'m running your program to try and figure out which of --pass or --fail to use!')
379
print('Need to find out faster? Cancel the work and explicitly write --pass or --fail')
380
381
is_pass_hot, is_fail_hot = await all_tasks
382
383
# This is a bit counter-intuitive, but the following table tells us which of the sense we want.
384
# Because when you omit --fflags=true argument and it fails, then is_fail_hot is True.
385
# Consequently, you need to use --pass to find out what that set of flags is. And vice versa.
386
#
387
# Also, when is_pass_hot is True and is_fail_hot is False, then that program is working as expected.
388
# There should be no reason to run flag bisection.
389
# However, this can be ambiguous in the opposite of the aforementioned outcome!
390
#
391
# is_pass_hot | is_fail_hot | is ambiguous?
392
#-------------|-------------|---------------
393
# True | True | No! Pick --pass.
394
# False | False | No! Pick --fail.
395
# True | False | No! But this is the exact situation where you shouldn't need to flag-bisect. Raise an error.
396
# False | True | Yes! But we'll pragmatically pick --fail here in the hope it gives the correct set of flags.
397
398
if is_pass_hot and not is_fail_hot:
399
print('The tests seems to be working fine for me. If you really need to flag-bisect, please try again with an explicit --pass or --fail', file=sys.stderr)
400
return 1
401
402
if not is_pass_hot and is_fail_hot:
403
print('I couldn\'t quite figure out which of --pass or --fail to use, but I\'ll carry on anyway')
404
405
sense = is_pass_hot
406
argument = '--pass' if sense else '--fail'
407
print(f'I\'m bisecting flags as if {argument} was used')
408
else:
409
sense = True if args.mode == InterestnessMode.PASS else False
410
411
options = Options(args, other_args, sense)
412
413
initial_flags = list_fflags(options)
414
if initial_flags is None:
415
print('I cannot bisect flags with ' + options.path, file=sys.stderr)
416
print('These are required for me to be able to cooperate:', file=sys.stderr)
417
print('\t--list-fflags must print a list of flags separated by newlines, including FFlag prefix', file=sys.stderr)
418
print('\t--fflags=... to accept a comma-separated pair of flag names and their value in the form FFlagFoo=true', file=sys.stderr)
419
return 1
420
421
# On Windows, there is an upper bound on the numbers of characters for a command line incantation.
422
# If we don't handle this ourselves, the runtime error is going to look nothing like the actual problem.
423
# It'd say "file name way too long" or something to that effect. We can teed up a better error message and
424
# tell the user how to work around it by using --filter.
425
if platform.system() == 'Windows':
426
cmd_line = ' '.join(mk_command_line(options, mk_flags_argument(options, initial_flags, [])))
427
if len(cmd_line) >= 8191:
428
print(f'Never mind! The command line is too long because we have {len(initial_flags)} flags to test', file=sys.stderr)
429
print('Consider using `--filter=<regex>` to narrow it down upfront, or use any version of WSL instead', file=sys.stderr)
430
return 1
431
432
hot_flags = await search(options, initial_flags)
433
if hot_flags:
434
print('I narrowed down to these flags:')
435
print(textwrap.indent('\n'.join(hot_flags), prefix='\t'))
436
437
# If we showed the command line in explicit mode, all flags would be listed here.
438
# This would pollute the terminal with 3000 flags. We don't want that. Don't show it.
439
# Ditto for when the number flags we bisected are equal.
440
if not options.explicit and len(hot_flags) != len(initial_flags):
441
print('$ ' + ' '.join(mk_command_line(options, mk_flags_argument(options, hot_flags, initial_flags))))
442
443
return 0
444
445
print('I found nothing, sorry', file=sys.stderr)
446
return 1
447
448
def flag_bisect_main(args, other_args):
449
return asyncio.run(do_work(args, other_args))
450
451
def main():
452
parser = argparse.ArgumentParser(description=help(), epilog=epilog(), formatter_class=argparse.RawTextHelpFormatter)
453
add_argument_parsers(parser)
454
args, other_args = parser.parse_known_args()
455
return flag_bisect_main(args, other_args)
456
457
if __name__ == '__main__':
458
sys.exit(main())
459
460