Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/test/base/simulationOptions.ts
13388 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
import minimist from 'minimist';
6
import { EmbeddingType } from '../../src/platform/embeddings/common/embeddingsComputer';
7
import { CacheMode } from './simulationContext';
8
9
/** Number of runs that are stored in baseline.json */
10
export const BASELINE_RUN_COUNT = 10;
11
12
export type NesDatagen = {
13
readonly input: string;
14
readonly output: string | undefined;
15
readonly rowOffset: number;
16
readonly workerMode: boolean;
17
};
18
19
export class SimulationOptions {
20
public static fromProcessArgs(): SimulationOptions {
21
return new SimulationOptions(process.argv);
22
}
23
24
public static fromArray(argv: readonly string[]): SimulationOptions {
25
return new SimulationOptions(argv);
26
}
27
28
private readonly argv: minimist.ParsedArgs;
29
30
public readonly help: boolean;
31
public readonly listModels: boolean;
32
public readonly listTests: boolean;
33
public readonly listSuites: boolean;
34
public readonly jsonOutput: boolean;
35
public readonly nRuns: number;
36
public readonly chatModel: string | undefined;
37
public readonly smartChatModel: string | undefined;
38
public readonly fastChatModel: string | undefined;
39
public readonly fastRewriteModel: string | undefined;
40
public readonly summarizeHistory: boolean;
41
public readonly swebenchPrompt: boolean;
42
public readonly embeddingType: EmbeddingType | undefined;
43
public readonly boost: boolean;
44
public readonly parallelism: number;
45
public readonly lmCacheMode: CacheMode;
46
public readonly modelCacheMode: CacheMode;
47
public readonly resourcesCacheMode: CacheMode;
48
public readonly cachePath: string | undefined;
49
public readonly externalBaseline: string | undefined;
50
public readonly externalScenarios: string | undefined;
51
public readonly output: string | undefined;
52
public readonly inline: boolean;
53
public readonly sidebar: boolean;
54
public readonly applyChatCodeBlocks: boolean;
55
public readonly stageCacheEntries: boolean;
56
public readonly ci: boolean;
57
public readonly gc: boolean;
58
public readonly externalCacheLayersPath: string | undefined;
59
public readonly verbose: number | boolean | undefined;
60
public readonly grep: string[] | string | undefined;
61
public readonly omitGrep: string | undefined;
62
public readonly heapSnapshots: boolean | string | undefined;
63
/** --scenario-test, --scenarioTest Run tests from provided scenario test file name */
64
public readonly scenarioTest: string | undefined;
65
public readonly isUpdateBaseline: boolean;
66
public readonly noFetch: boolean;
67
public readonly noCachePointer: boolean;
68
/**
69
* A label for the current simulation run, to be displayed in the UI for distinguishing between runs.
70
*/
71
public readonly label: string;
72
public readonly runServerPoweredNesProvider: boolean;
73
public readonly nes: 'external' | 'coffe' | undefined;
74
public readonly nesUrl: string | undefined;
75
public readonly nesApiKey: string | undefined;
76
77
public readonly nesDatagen: NesDatagen | undefined;
78
79
public readonly subcommand: 'nes-datagen' | undefined;
80
81
public readonly disabledTools: Set<string>;
82
83
/** If true, all tests are run in the extension host */
84
public readonly inExtensionHost: boolean;
85
/** Extensions to ensure are available in the extension host */
86
public readonly installExtensions: string[];
87
/** Whether to run headless (defaults to false) */
88
public readonly headless: boolean;
89
/** @internal Only run a single test number */
90
public readonly runNumber: number;
91
/** Explicit workspace URI to use for stest --in-extension-host */
92
public readonly useScenarioWorkspace: boolean;
93
94
/** If true, will try to use code search using our service. */
95
public readonly useExperimentalCodeSearchService: boolean;
96
97
public readonly configFile: string | undefined;
98
99
public readonly modelConfigFile: string | undefined;
100
101
protected constructor(processArgv: readonly string[]) {
102
const argv = minimist(processArgv.slice(2));
103
this.argv = argv;
104
this.help = boolean(argv['help'], false);
105
this.listModels = boolean(argv['list-models'], false);
106
this.listTests = boolean(argv['list-tests'], false);
107
this.listSuites = boolean(argv['list-suites'], false);
108
this.jsonOutput = boolean(argv['json'], false);
109
this.isUpdateBaseline = boolean(argv['update-baseline'] ?? argv['u'], false);
110
this.boost = boolean(argv['boost'], false);
111
const fetch = boolean(argv['fetch'], true);
112
this.noFetch = !fetch; // `--no-fetch` becomes argv[`fetch`] because of how minimist works
113
const cachePointer = boolean(argv['cache-pointer'], true);
114
this.noCachePointer = !cachePointer; // `--no-cache-pointer` becomes argv[`cache-pointer`] because of how minimist works
115
this.nRuns = typeof argv['n'] === 'number' ? argv['n'] : (this.isUpdateBaseline || argv['ci'] ? BASELINE_RUN_COUNT : 10);
116
this.chatModel = this.argv['model'];
117
this.smartChatModel = this.argv['smart-model'];
118
this.fastChatModel = this.argv['fast-model'];
119
this.fastRewriteModel = this.argv['fast-rewrite-model'];
120
this.summarizeHistory = boolean(argv['summarize-history'], true);
121
this.swebenchPrompt = boolean(argv['swebench-prompt'], false);
122
this.embeddingType = cliOptionsToWellKnownEmbeddingsType(this.argv['embedding-model']);
123
this.parallelism = this.argv['parallelism'] ?? this.argv['p'] ?? 20;
124
this.modelCacheMode = this.argv['skip-model-cache'] ? CacheMode.Disable : CacheMode.Default;
125
this.lmCacheMode = (
126
this.argv['skip-cache'] ? CacheMode.Disable
127
: (this.argv['require-cache'] ? CacheMode.Require : CacheMode.Default)
128
);
129
this.resourcesCacheMode = (
130
this.argv['skip-resources-cache'] ? CacheMode.Disable : CacheMode.Default
131
);
132
this.externalScenarios = this.argv['external-scenarios'];
133
this.externalBaseline = this.argv['external-baseline']; // must be set after `externalScenarios`
134
this.validateExternalBaseline();
135
this.output = this.argv['output'];
136
this.cachePath = this.argv['cache-location'];
137
this.inline = boolean(this.argv['inline'], false);
138
this.sidebar = boolean(this.argv['sidebar'], false);
139
this.applyChatCodeBlocks = boolean(this.argv['apply-chat-code-blocks'], false);
140
this.stageCacheEntries = boolean(this.argv['stage-cache-entries'], false);
141
this.ci = boolean(this.argv['ci'], false);
142
this.gc = boolean(this.argv['gc'], false);
143
this.externalCacheLayersPath = argv['external-cache-layers-path'];
144
this.verbose = this.argv['verbose'];
145
this.grep = argv['grep'];
146
this.omitGrep = argv['omit-grep'];
147
this.heapSnapshots = argv['heap-snapshots'];
148
this.scenarioTest = argv['scenarioTest'] ?? argv['scenario-test'];
149
this.label = argv['label'] ?? '';
150
151
this.inExtensionHost = boolean(argv['in-extension-host'], false);
152
this.installExtensions = argv['install-extension'] ? argv['install-extension'].split(',') : [];
153
this.headless = boolean(argv['headless'], true);
154
this.runNumber = Number(argv['run-number']) || 0;
155
156
this.runServerPoweredNesProvider = boolean(argv['runServerPoweredNesProvider'], false);
157
158
this.nes = SimulationOptions.validateNesArgument(argv['nes']);
159
160
this.nesUrl = argv['nes-url'];
161
// [SuppressMessage("Microsoft.Security", "CS002:SecretInNextLine", Justification="used for local simulation tests")]
162
this.nesApiKey = argv['nes-api-key'];
163
SimulationOptions.validateNesUrlOverride(this.nesUrl, this.nesApiKey);
164
165
this.disabledTools = argv['disable-tools'] ? new Set(argv['disable-tools'].split(',')) : new Set();
166
this.useScenarioWorkspace = boolean(argv['scenario-workspace-folder'], false);
167
168
this.useExperimentalCodeSearchService = boolean(argv['use-experimental-code-search-service'], false);
169
170
const isNesDatagen = (argv._ as string[]).includes('nes-datagen');
171
this.subcommand = isNesDatagen ? 'nes-datagen' : undefined;
172
this.nesDatagen = isNesDatagen && argv['input']
173
? {
174
input: argv['input'],
175
output: argv['out'],
176
rowOffset: typeof argv['row-offset'] === 'number' ? argv['row-offset'] : 0,
177
workerMode: boolean(argv['worker'], false),
178
}
179
: undefined;
180
181
this.configFile = argv['config-file'];
182
this.modelConfigFile = argv['model-config-file'];
183
}
184
185
public printHelp(): void {
186
console.log([
187
`Example usages: `,
188
` npm run simulate`,
189
` npm run simulate -- --external-scenarios=<path> --inline --output=<path>`,
190
` npm run simulate -- --external-scenarios=<path> --sidebar --output=<path>`,
191
` npm run simulate -- --external-scenarios=<path> --nes --output=<path>`,
192
` npm run simulate -- --update-baseline`,
193
``,
194
` -u, --update-baseline Updates scores in baseline.json if they change as a result of your changes to prompts sent to the model`,
195
` --external-scenarios Path to a directory containing scenarios to run`,
196
` --inline Run inline chat external scenarios`,
197
` --sidebar Run sidebar chat external scenarios`,
198
` --nes Run NES external scenarios`,
199
` --output Path to a directory where to generate output`,
200
` --n Run each scenario N times`,
201
` --ci Equivalent to --n=${BASELINE_RUN_COUNT} but throws if the baseline is not up-to-date`,
202
` --gc Used with --require-cache to compact cache layers into the baseline cache`,
203
` --external-cache-layers-path Used to specify the path to the external cache layers`,
204
` --grep Run a test which contains the passed-in string`,
205
` --omit-grep Run a test which does not contain the passed-in string`,
206
` --embedding-model Specify the model to use for the embedding endpoint (default: ada)`,
207
` Values: ada, text3small, text3large`,
208
` --list-models List available chat models`,
209
` --model Specify the model to use for the chat endpoint (use --list-models to see valid options)`,
210
` --smart-model Specify the model to use in place of the smarter slower model, i.e GPT 4o`,
211
` --fast-model Specify the model to use in place of the faster / less smart model, i.e GPT 4o mini`,
212
` --fast-rewrite-model [experimental] Specify the model to use for the fast rewrite endpoint`,
213
` -p, --parallelism [experimental] Run tests in parallel (default: 1)`,
214
` --skip-cache [experimental] Do not use the cache for language model requests`,
215
` --require-cache [experimental] Require cache hits, fail on cache misses`,
216
` --regenerate-cache [experimental] Fetch all responses and refresh the cache`,
217
` --skip-resources-cache [experimental] Do not use the cache for computed resources`,
218
` --skip-model-cache [experimental] Do not use the cache for model metadata`,
219
` --stage-cache-entries [experimental] Stage cache files that were used in current simulation run`,
220
` --list-tests List tests without running them`,
221
` --json Print output in JSONL format`,
222
` --verbose Print more information about test and assertion failures`,
223
` --scenario-test Run tests from provided scenario test file name, e.g., 'docComment.stest' or 'docComment.stest.ts' (--scenarioTest is supported but will be deprecated in future)`,
224
` --no-fetch Do not send requests to the model endpoint (uses cache but doesn't write to it) (useful to make sure prompts are unchanged by observing cache misses)`,
225
` --no-cache-pointer [experimental] Do not write files to outcome/`,
226
` --label A label for the current simulation run, to be displayed in the UI for distinguishing between runs`,
227
` --nes-url To override endpoint URL for NES (must be used with --nes-api-key)`,
228
` --nes-api-key API key for endpoint URL provided via NES (must be used with --nes-url)`,
229
` --runServerPoweredNesProvider Run stests against the http server powered NES provider (server must be run at port 8001)`,
230
` --disable-tools A comma-separated list of tools to disable`,
231
` --swebench-prompt Use the headless swebench prompt for agent mode`,
232
` --summarize-history Enable experimental conversation history summarization in agent mode`,
233
` --scenario-workspace-folder If true, runs the stest inline in the scenario's workspace folder`,
234
` --config-file Path to a JSON file containing configuration options`,
235
` --model-config-file Path to a JSON file containing model configuration options`,
236
``,
237
`Subcommands:`,
238
` nes-datagen Generate training data from alternative action recordings`,
239
` Run 'npm run simulate -- nes-datagen --help' for options`,
240
``,
241
].join('\n'));
242
}
243
244
public printTrainHelp(): void {
245
console.log([
246
`Usage: npm run simulate -- --config-file=<path> [global options] nes-datagen --input=<path> [options]`,
247
``,
248
`Generate training data by replaying alternative action recordings through the NES prompt pipeline.`,
249
`The prompting strategy is read from the model configuration in --config-file.`,
250
``,
251
`Options:`,
252
` --input Path to a JSON file with training data recordings (required)`,
253
` --out Output path for JSON file. Default: <input-path>_output.json`,
254
``,
255
`Global options (placed before 'nes-datagen'):`,
256
` --config-file Path to a JSON config file (required for nes-datagen)`,
257
` Must include "github.copilot.chat.inlineEdits.xtabProvider.modelConfiguration"`,
258
` with at least { "modelName", "promptingStrategy", "includeTagsInCurrentFile" }`,
259
` -p, --parallelism Number of parallel workers (default: 20)`,
260
` --verbose Print detailed progress and error information`,
261
` --help Show this help message`,
262
``,
263
`Examples:`,
264
` npm run simulate -- --config-file=config.json nes-datagen --input=data.json`,
265
` npm run simulate -- --config-file=config.json --parallelism=10 --verbose nes-datagen --input=data.json`,
266
``,
267
].join('\n'));
268
}
269
270
private validateExternalBaseline() {
271
if (this.externalBaseline && !this.externalScenarios) {
272
throw new Error('External scenarios must be provided for external baseline to work.');
273
}
274
}
275
276
private static validateNesArgument(nes: unknown): 'external' | 'coffe' | undefined {
277
if (nes === undefined || nes === null) {
278
return undefined;
279
}
280
if (typeof nes === 'boolean') { // this's for backward compat because previously it was possible to just pass `--nes` to run external stests against NES
281
return 'external';
282
}
283
if (typeof nes !== 'string') {
284
throw new Error(`--nes must be a string, but got: ${typeof nes}`);
285
}
286
switch (nes) {
287
case 'external':
288
case 'coffe':
289
return nes;
290
default:
291
throw new Error(`--nes can only be 'external' or 'coffe', but got: ${nes}`);
292
}
293
}
294
295
private static validateNesUrlOverride(nesUrl: string | undefined, nesApiKey: string | undefined): void {
296
if (nesUrl !== undefined && nesApiKey === undefined) {
297
throw new Error(`--nesApiKey must be provided when --nesUrl is set`);
298
}
299
if (nesUrl === undefined && nesApiKey !== undefined) {
300
throw new Error(`--nesUrl must be provided when --nesApiKey is set`);
301
}
302
}
303
}
304
305
function cliOptionsToWellKnownEmbeddingsType(model: string | undefined): EmbeddingType | undefined {
306
switch (model) {
307
case 'text3small':
308
case EmbeddingType.text3small_512.id:
309
return EmbeddingType.text3small_512;
310
311
case 'metis':
312
case EmbeddingType.metis_1024_I16_Binary.id:
313
return EmbeddingType.metis_1024_I16_Binary;
314
315
case undefined:
316
return undefined;
317
318
default:
319
throw new Error(`Unknown embedding model: ${model}`);
320
}
321
}
322
323
function boolean(value: any, defaultValue: boolean): boolean {
324
if (typeof value === 'undefined') {
325
return defaultValue;
326
}
327
if (value === 'false') {
328
// treat the string 'false' as false
329
return false;
330
}
331
return Boolean(value);
332
}
333
334