Path: blob/main/extensions/copilot/test/base/simulationOptions.ts
13388 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/4import minimist from 'minimist';5import { EmbeddingType } from '../../src/platform/embeddings/common/embeddingsComputer';6import { CacheMode } from './simulationContext';78/** Number of runs that are stored in baseline.json */9export const BASELINE_RUN_COUNT = 10;1011export type NesDatagen = {12readonly input: string;13readonly output: string | undefined;14readonly rowOffset: number;15readonly workerMode: boolean;16};1718export class SimulationOptions {19public static fromProcessArgs(): SimulationOptions {20return new SimulationOptions(process.argv);21}2223public static fromArray(argv: readonly string[]): SimulationOptions {24return new SimulationOptions(argv);25}2627private readonly argv: minimist.ParsedArgs;2829public readonly help: boolean;30public readonly listModels: boolean;31public readonly listTests: boolean;32public readonly listSuites: boolean;33public readonly jsonOutput: boolean;34public readonly nRuns: number;35public readonly chatModel: string | undefined;36public readonly smartChatModel: string | undefined;37public readonly fastChatModel: string | undefined;38public readonly fastRewriteModel: string | undefined;39public readonly summarizeHistory: boolean;40public readonly swebenchPrompt: boolean;41public readonly embeddingType: EmbeddingType | undefined;42public readonly boost: boolean;43public readonly parallelism: number;44public readonly lmCacheMode: CacheMode;45public readonly modelCacheMode: CacheMode;46public readonly resourcesCacheMode: CacheMode;47public readonly cachePath: string | undefined;48public readonly externalBaseline: string | undefined;49public readonly externalScenarios: string | undefined;50public readonly output: string | undefined;51public readonly inline: boolean;52public readonly sidebar: boolean;53public readonly applyChatCodeBlocks: boolean;54public readonly stageCacheEntries: boolean;55public readonly ci: boolean;56public readonly gc: boolean;57public readonly externalCacheLayersPath: string | undefined;58public readonly verbose: number | boolean | undefined;59public readonly grep: string[] | string | undefined;60public readonly omitGrep: string | undefined;61public readonly heapSnapshots: boolean | string | undefined;62/** --scenario-test, --scenarioTest Run tests from provided scenario test file name */63public readonly scenarioTest: string | undefined;64public readonly isUpdateBaseline: boolean;65public readonly noFetch: boolean;66public readonly noCachePointer: boolean;67/**68* A label for the current simulation run, to be displayed in the UI for distinguishing between runs.69*/70public readonly label: string;71public readonly runServerPoweredNesProvider: boolean;72public readonly nes: 'external' | 'coffe' | undefined;73public readonly nesUrl: string | undefined;74public readonly nesApiKey: string | undefined;7576public readonly nesDatagen: NesDatagen | undefined;7778public readonly subcommand: 'nes-datagen' | undefined;7980public readonly disabledTools: Set<string>;8182/** If true, all tests are run in the extension host */83public readonly inExtensionHost: boolean;84/** Extensions to ensure are available in the extension host */85public readonly installExtensions: string[];86/** Whether to run headless (defaults to false) */87public readonly headless: boolean;88/** @internal Only run a single test number */89public readonly runNumber: number;90/** Explicit workspace URI to use for stest --in-extension-host */91public readonly useScenarioWorkspace: boolean;9293/** If true, will try to use code search using our service. */94public readonly useExperimentalCodeSearchService: boolean;9596public readonly configFile: string | undefined;9798public readonly modelConfigFile: string | undefined;99100protected constructor(processArgv: readonly string[]) {101const argv = minimist(processArgv.slice(2));102this.argv = argv;103this.help = boolean(argv['help'], false);104this.listModels = boolean(argv['list-models'], false);105this.listTests = boolean(argv['list-tests'], false);106this.listSuites = boolean(argv['list-suites'], false);107this.jsonOutput = boolean(argv['json'], false);108this.isUpdateBaseline = boolean(argv['update-baseline'] ?? argv['u'], false);109this.boost = boolean(argv['boost'], false);110const fetch = boolean(argv['fetch'], true);111this.noFetch = !fetch; // `--no-fetch` becomes argv[`fetch`] because of how minimist works112const cachePointer = boolean(argv['cache-pointer'], true);113this.noCachePointer = !cachePointer; // `--no-cache-pointer` becomes argv[`cache-pointer`] because of how minimist works114this.nRuns = typeof argv['n'] === 'number' ? argv['n'] : (this.isUpdateBaseline || argv['ci'] ? BASELINE_RUN_COUNT : 10);115this.chatModel = this.argv['model'];116this.smartChatModel = this.argv['smart-model'];117this.fastChatModel = this.argv['fast-model'];118this.fastRewriteModel = this.argv['fast-rewrite-model'];119this.summarizeHistory = boolean(argv['summarize-history'], true);120this.swebenchPrompt = boolean(argv['swebench-prompt'], false);121this.embeddingType = cliOptionsToWellKnownEmbeddingsType(this.argv['embedding-model']);122this.parallelism = this.argv['parallelism'] ?? this.argv['p'] ?? 20;123this.modelCacheMode = this.argv['skip-model-cache'] ? CacheMode.Disable : CacheMode.Default;124this.lmCacheMode = (125this.argv['skip-cache'] ? CacheMode.Disable126: (this.argv['require-cache'] ? CacheMode.Require : CacheMode.Default)127);128this.resourcesCacheMode = (129this.argv['skip-resources-cache'] ? CacheMode.Disable : CacheMode.Default130);131this.externalScenarios = this.argv['external-scenarios'];132this.externalBaseline = this.argv['external-baseline']; // must be set after `externalScenarios`133this.validateExternalBaseline();134this.output = this.argv['output'];135this.cachePath = this.argv['cache-location'];136this.inline = boolean(this.argv['inline'], false);137this.sidebar = boolean(this.argv['sidebar'], false);138this.applyChatCodeBlocks = boolean(this.argv['apply-chat-code-blocks'], false);139this.stageCacheEntries = boolean(this.argv['stage-cache-entries'], false);140this.ci = boolean(this.argv['ci'], false);141this.gc = boolean(this.argv['gc'], false);142this.externalCacheLayersPath = argv['external-cache-layers-path'];143this.verbose = this.argv['verbose'];144this.grep = argv['grep'];145this.omitGrep = argv['omit-grep'];146this.heapSnapshots = argv['heap-snapshots'];147this.scenarioTest = argv['scenarioTest'] ?? argv['scenario-test'];148this.label = argv['label'] ?? '';149150this.inExtensionHost = boolean(argv['in-extension-host'], false);151this.installExtensions = argv['install-extension'] ? argv['install-extension'].split(',') : [];152this.headless = boolean(argv['headless'], true);153this.runNumber = Number(argv['run-number']) || 0;154155this.runServerPoweredNesProvider = boolean(argv['runServerPoweredNesProvider'], false);156157this.nes = SimulationOptions.validateNesArgument(argv['nes']);158159this.nesUrl = argv['nes-url'];160// [SuppressMessage("Microsoft.Security", "CS002:SecretInNextLine", Justification="used for local simulation tests")]161this.nesApiKey = argv['nes-api-key'];162SimulationOptions.validateNesUrlOverride(this.nesUrl, this.nesApiKey);163164this.disabledTools = argv['disable-tools'] ? new Set(argv['disable-tools'].split(',')) : new Set();165this.useScenarioWorkspace = boolean(argv['scenario-workspace-folder'], false);166167this.useExperimentalCodeSearchService = boolean(argv['use-experimental-code-search-service'], false);168169const isNesDatagen = (argv._ as string[]).includes('nes-datagen');170this.subcommand = isNesDatagen ? 'nes-datagen' : undefined;171this.nesDatagen = isNesDatagen && argv['input']172? {173input: argv['input'],174output: argv['out'],175rowOffset: typeof argv['row-offset'] === 'number' ? argv['row-offset'] : 0,176workerMode: boolean(argv['worker'], false),177}178: undefined;179180this.configFile = argv['config-file'];181this.modelConfigFile = argv['model-config-file'];182}183184public printHelp(): void {185console.log([186`Example usages: `,187` npm run simulate`,188` npm run simulate -- --external-scenarios=<path> --inline --output=<path>`,189` npm run simulate -- --external-scenarios=<path> --sidebar --output=<path>`,190` npm run simulate -- --external-scenarios=<path> --nes --output=<path>`,191` npm run simulate -- --update-baseline`,192``,193` -u, --update-baseline Updates scores in baseline.json if they change as a result of your changes to prompts sent to the model`,194` --external-scenarios Path to a directory containing scenarios to run`,195` --inline Run inline chat external scenarios`,196` --sidebar Run sidebar chat external scenarios`,197` --nes Run NES external scenarios`,198` --output Path to a directory where to generate output`,199` --n Run each scenario N times`,200` --ci Equivalent to --n=${BASELINE_RUN_COUNT} but throws if the baseline is not up-to-date`,201` --gc Used with --require-cache to compact cache layers into the baseline cache`,202` --external-cache-layers-path Used to specify the path to the external cache layers`,203` --grep Run a test which contains the passed-in string`,204` --omit-grep Run a test which does not contain the passed-in string`,205` --embedding-model Specify the model to use for the embedding endpoint (default: ada)`,206` Values: ada, text3small, text3large`,207` --list-models List available chat models`,208` --model Specify the model to use for the chat endpoint (use --list-models to see valid options)`,209` --smart-model Specify the model to use in place of the smarter slower model, i.e GPT 4o`,210` --fast-model Specify the model to use in place of the faster / less smart model, i.e GPT 4o mini`,211` --fast-rewrite-model [experimental] Specify the model to use for the fast rewrite endpoint`,212` -p, --parallelism [experimental] Run tests in parallel (default: 1)`,213` --skip-cache [experimental] Do not use the cache for language model requests`,214` --require-cache [experimental] Require cache hits, fail on cache misses`,215` --regenerate-cache [experimental] Fetch all responses and refresh the cache`,216` --skip-resources-cache [experimental] Do not use the cache for computed resources`,217` --skip-model-cache [experimental] Do not use the cache for model metadata`,218` --stage-cache-entries [experimental] Stage cache files that were used in current simulation run`,219` --list-tests List tests without running them`,220` --json Print output in JSONL format`,221` --verbose Print more information about test and assertion failures`,222` --scenario-test Run tests from provided scenario test file name, e.g., 'docComment.stest' or 'docComment.stest.ts' (--scenarioTest is supported but will be deprecated in future)`,223` --no-fetch Do not send requests to the model endpoint (uses cache but doesn't write to it) (useful to make sure prompts are unchanged by observing cache misses)`,224` --no-cache-pointer [experimental] Do not write files to outcome/`,225` --label A label for the current simulation run, to be displayed in the UI for distinguishing between runs`,226` --nes-url To override endpoint URL for NES (must be used with --nes-api-key)`,227` --nes-api-key API key for endpoint URL provided via NES (must be used with --nes-url)`,228` --runServerPoweredNesProvider Run stests against the http server powered NES provider (server must be run at port 8001)`,229` --disable-tools A comma-separated list of tools to disable`,230` --swebench-prompt Use the headless swebench prompt for agent mode`,231` --summarize-history Enable experimental conversation history summarization in agent mode`,232` --scenario-workspace-folder If true, runs the stest inline in the scenario's workspace folder`,233` --config-file Path to a JSON file containing configuration options`,234` --model-config-file Path to a JSON file containing model configuration options`,235``,236`Subcommands:`,237` nes-datagen Generate training data from alternative action recordings`,238` Run 'npm run simulate -- nes-datagen --help' for options`,239``,240].join('\n'));241}242243public printTrainHelp(): void {244console.log([245`Usage: npm run simulate -- --config-file=<path> [global options] nes-datagen --input=<path> [options]`,246``,247`Generate training data by replaying alternative action recordings through the NES prompt pipeline.`,248`The prompting strategy is read from the model configuration in --config-file.`,249``,250`Options:`,251` --input Path to a JSON file with training data recordings (required)`,252` --out Output path for JSON file. Default: <input-path>_output.json`,253``,254`Global options (placed before 'nes-datagen'):`,255` --config-file Path to a JSON config file (required for nes-datagen)`,256` Must include "github.copilot.chat.inlineEdits.xtabProvider.modelConfiguration"`,257` with at least { "modelName", "promptingStrategy", "includeTagsInCurrentFile" }`,258` -p, --parallelism Number of parallel workers (default: 20)`,259` --verbose Print detailed progress and error information`,260` --help Show this help message`,261``,262`Examples:`,263` npm run simulate -- --config-file=config.json nes-datagen --input=data.json`,264` npm run simulate -- --config-file=config.json --parallelism=10 --verbose nes-datagen --input=data.json`,265``,266].join('\n'));267}268269private validateExternalBaseline() {270if (this.externalBaseline && !this.externalScenarios) {271throw new Error('External scenarios must be provided for external baseline to work.');272}273}274275private static validateNesArgument(nes: unknown): 'external' | 'coffe' | undefined {276if (nes === undefined || nes === null) {277return undefined;278}279if (typeof nes === 'boolean') { // this's for backward compat because previously it was possible to just pass `--nes` to run external stests against NES280return 'external';281}282if (typeof nes !== 'string') {283throw new Error(`--nes must be a string, but got: ${typeof nes}`);284}285switch (nes) {286case 'external':287case 'coffe':288return nes;289default:290throw new Error(`--nes can only be 'external' or 'coffe', but got: ${nes}`);291}292}293294private static validateNesUrlOverride(nesUrl: string | undefined, nesApiKey: string | undefined): void {295if (nesUrl !== undefined && nesApiKey === undefined) {296throw new Error(`--nesApiKey must be provided when --nesUrl is set`);297}298if (nesUrl === undefined && nesApiKey !== undefined) {299throw new Error(`--nesUrl must be provided when --nesApiKey is set`);300}301}302}303304function cliOptionsToWellKnownEmbeddingsType(model: string | undefined): EmbeddingType | undefined {305switch (model) {306case 'text3small':307case EmbeddingType.text3small_512.id:308return EmbeddingType.text3small_512;309310case 'metis':311case EmbeddingType.metis_1024_I16_Binary.id:312return EmbeddingType.metis_1024_I16_Binary;313314case undefined:315return undefined;316317default:318throw new Error(`Unknown embedding model: ${model}`);319}320}321322function boolean(value: any, defaultValue: boolean): boolean {323if (typeof value === 'undefined') {324return defaultValue;325}326if (value === 'false') {327// treat the string 'false' as false328return false;329}330return Boolean(value);331}332333334