Path: blob/main/extensions/copilot/test/base/stest.ts
13389 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/4import * as fs from 'fs';5import path from 'path';6import { Config, ExperimentBasedConfig, ExperimentBasedConfigType } from '../../src/platform/configuration/common/configurationService';7import { EmbeddingType } from '../../src/platform/embeddings/common/embeddingsComputer';8import { ILogTarget, LogLevel } from '../../src/platform/log/common/logService';9import { ISimulationTestContext } from '../../src/platform/simulationTestContext/common/simulationTestContext';10import { TestingServiceCollection } from '../../src/platform/test/node/services';11import { createServiceIdentifier } from '../../src/util/common/services';12import { grepStrToRegex } from '../simulation/shared/grepFilter';13import { EXPLICIT_LOG_TAG, IMPLICIT_LOG_TAG, ITestLocation, IWrittenFile, SIMULATION_EXPLICIT_LOG_FILENAME, SIMULATION_IMPLICIT_LOG_FILENAME, SimulationTestOutcome } from '../simulation/shared/sharedTypes';14import { computeSHA256 } from './hash';15import { SimulationOptions } from './simulationOptions';16export { REPO_ROOT } from '../util';1718export interface SimulationTestFunction {19(testingServiceCollection: TestingServiceCollection): Promise<unknown> | unknown;20}2122export interface ISimulationTestOptions {23optional?: boolean;24skip?: (opts: SimulationOptions) => boolean;25location?: ITestLocation;26conversationPath?: string;27scenarioFolderPath?: string;28stateFile?: string;29}3031export class SimulationTestOptions {32public get optional(): boolean {33if (this._suiteOpts.optional) {34return true;35}36return this._opts.optional ?? false;37}3839public skip(opts: SimulationOptions): boolean {40if (this._suiteOpts.skip(opts)) {41return true;42}43return this.mySkip(opts);44}4546private _cachedMySkip: boolean | undefined = undefined;47private mySkip(opts: SimulationOptions): boolean {48if (this._cachedMySkip === undefined) {49this._cachedMySkip = this._opts.skip?.(opts) ?? false;50}51return this._cachedMySkip;52}5354public get location(): ITestLocation | undefined {55return this._opts.location;56}5758public get conversationPath(): string | undefined {59return this._opts.conversationPath;60}6162public get scenarioFolderPath() {63return this._opts.scenarioFolderPath;64}6566public get stateFile() {67return this._opts.stateFile;68}6970constructor(71private readonly _opts: ISimulationTestOptions,72private readonly _suiteOpts: SimulationSuiteOptions73) { }74}7576export interface ISimulationTestDescriptor {7778/**79* This is used to capture the test scenario description itself.80*/81readonly description: string;8283/**84* The programming language used for the test.85*86* If not set, may be inherited from the suite this test in if the suite descriptor specifies the language.87*/88readonly language?: string;8990/**91* The model used for the test.92*/93readonly model?: string;9495/**96* The embeddings model used for the test.97*/98readonly embeddingType?: EmbeddingType;99100/**101* Setting configurations defined for the test102*/103readonly configurations?: Configuration<any>[];104105/**106* Non-extension settings configurations defined for the test107*/108readonly nonExtensionConfigurations?: NonExtensionConfiguration[] | undefined;109110/**111* Arbitrary attributes that will be serialised to the metadata.json file.112*/113readonly attributes?: Record<string, string | number>;114}115116export type NonExtensionConfiguration = [string, any];117118export type Configuration<T> = { key: ExperimentBasedConfig<ExperimentBasedConfigType> | Config<T>; value: T };119120export class SimulationTest {121122public readonly options: SimulationTestOptions;123public readonly description: string;124public readonly language: string | undefined;125public readonly model: string | undefined;126public readonly embeddingType: EmbeddingType | undefined;127public readonly configurations: Configuration<any>[] | undefined;128public readonly nonExtensionConfigurations: NonExtensionConfiguration[] | undefined;129public readonly attributes: Record<string, string | number> | undefined;130131constructor(132descriptor: ISimulationTestDescriptor,133options: ISimulationTestOptions,134public readonly suite: SimulationSuite,135private readonly _runner: SimulationTestFunction,136) {137this.description = descriptor.description;138this.language = descriptor.language;139this.model = descriptor.model;140this.embeddingType = descriptor.embeddingType;141this.configurations = descriptor.configurations;142this.nonExtensionConfigurations = descriptor.nonExtensionConfigurations;143this.attributes = descriptor.attributes;144this.options = new SimulationTestOptions(options, suite.options);145}146147public get fullName(): string {148return `${this.suite.fullName} ${this.language ? `[${this.language}] ` : ''}- ${this.description}${this.model ? ` - (${this.model})` : ''}${this.embeddingType ? ` - (${this.embeddingType})` : ''}`;149}150151public get outcomeCategory(): string {152return this.suite.outcomeCategory;153}154155public get outcomeFileName(): string {156return getOutcomeFileName(this.fullName);157}158159public run(testingServiceCollection: TestingServiceCollection): Promise<unknown> {160return Promise.resolve(this._runner(testingServiceCollection));161}162163toString(): string {164return `SimulationTest: ${this.fullName}`;165}166}167168export function getOutcomeFileName(testName: string): string {169let suffix = '';170if (testName.endsWith(' - (gpt-4)')) {171testName = testName.substring(0, testName.length - 10);172suffix = '-gpt-4';173} else if (testName.endsWith(' - (gpt-3.5-turbo)')) {174testName = testName.substring(0, testName.length - 18);175suffix = '-gpt-3.5-turbo';176}177const result = toDirname(testName);178return `${result.substring(0, 60)}${suffix}.json`.replace(/-+/g, '-');179}180181export interface ISimulationSuiteOptions {182optional?: boolean;183skip?: (opts: SimulationOptions) => boolean;184location?: ITestLocation;185}186187export class SimulationSuiteOptions {188public get optional(): boolean {189return this._opts.optional ?? false;190}191192private _cachedSkip: boolean | undefined = undefined;193public skip(opts: SimulationOptions): boolean {194if (this._cachedSkip === undefined) {195this._cachedSkip = this._opts.skip?.(opts) ?? false;196}197return this._cachedSkip;198}199200public get location(): ITestLocation | undefined {201return this._opts.location;202}203204constructor(205private readonly _opts: ISimulationSuiteOptions206) { }207}208209export type ExtHostDescriptor = boolean; // todo: more things like extension config later210211export interface ISimulationSuiteDescriptor {212213/***214* This is used to group tests together.215* If using a slashCommand, use the command name else use "generic"216*/217readonly title: string;218219220/***221* This is used to capture the test scenario scope.222* Example: e2e, prompt, generate etc.223*/224readonly subtitle?: string;225readonly location: 'inline' | 'panel' | 'external' | 'context';226227/**228* The programming language this suite tests.229*230* The test within the suite will also have this language if they do not specify a language in their descriptor {@link ISimulationTestDescriptor}.231*/232readonly language?: string;233234/**235* Settings that override default settings in configuration service.236*237* These settings can further be overridden by the test itself.238*/239readonly configurations?: Configuration<any>[];240241/**242* Non-extension settings configurations defined for the test243*/244readonly nonExtensionConfigurations?: NonExtensionConfiguration[] | undefined;245246/**247* Set to true to run in a real VS Code extension host.248*/249readonly extHost?: ExtHostDescriptor;250}251252export class SimulationSuite {253public readonly options: SimulationSuiteOptions;254255public readonly language: string | undefined;256257private readonly _title: string;258private readonly _subtitle: string | undefined;259private readonly _location: 'inline' | 'panel' | 'external' | 'context';260261public readonly configurations: Configuration<any>[] | undefined;262public readonly nonExtensionConfigurations: NonExtensionConfiguration[] | undefined;263public readonly extHost: ExtHostDescriptor | undefined;264265constructor(266descriptor: ISimulationSuiteDescriptor,267opts: ISimulationSuiteOptions = {},268public readonly tests: SimulationTest[] = [],269) {270this._title = descriptor.title;271this._subtitle = descriptor.subtitle;272this._location = descriptor.location;273this.language = descriptor.language;274this.configurations = descriptor.configurations;275this.nonExtensionConfigurations = descriptor.nonExtensionConfigurations;276this.options = new SimulationSuiteOptions(opts);277}278279public get fullName(): string {280return `${this._title} ${this._subtitle ? `(${this._subtitle}) ` : ''}[${this._location}]`;281}282283public get outcomeCategory(): string {284return `${this._title}${this._subtitle ? `-${this._subtitle}` : ''}-${this._location}`;285}286}287288export type SimulationTestFilter = (test: SimulationTest) => boolean;289export function createSimulationTestFilter(grep?: string[] | string, omitGrep?: string): SimulationTestFilter {290const filters: ((test: SimulationTest) => boolean)[] = [];291if (grep) {292293if (typeof grep === 'string') {294let trimmedGrep = grep.trim();295const isSuiteNameSearch = trimmedGrep.startsWith('!s:');296if (isSuiteNameSearch) {297trimmedGrep = trimmedGrep.replace(/^!s:/, '');298}299const grepRegex = grepStrToRegex(trimmedGrep);300filters.push((test) => isSuiteNameSearch ? grepRegex.test(test.suite.fullName) : grepRegex.test(test.fullName));301} else {302const grepArr = Array.isArray(grep) ? grep : [grep];303for (const grep of grepArr) {304const grepLowerCase = String(grep).toLowerCase();305const grepFilter = (str: string) => str.toLowerCase().indexOf(grepLowerCase) >= 0;306filters.push((test) => grepFilter(test.fullName));307}308}309}310311if (omitGrep) {312const omitGrepRegex = grepStrToRegex(omitGrep);313filters.push((test) => !omitGrepRegex.test(test.fullName));314}315return (test: SimulationTest) => filters.every(shouldRunTest => shouldRunTest(test));316}317318class SimulationTestsRegistryClass {319private readonly defaultSuite: SimulationSuite = new SimulationSuite({ title: 'generic', location: 'inline' });320private suites: SimulationSuite[] = [this.defaultSuite];321private currentSuite: SimulationSuite = this.defaultSuite;322private readonly testNames = new Set<string>();323324private _inputPath: string | undefined;325public setInputPath(inputPath: string) {326this._inputPath = inputPath;327}328329private _testPath: string | undefined;330private _filter: (test: SimulationTest) => boolean = () => true;331public setFilters(testPath?: string, grep?: string[] | string, omitGrep?: string) {332this._testPath = testPath;333this._filter = createSimulationTestFilter(grep, omitGrep);334}335336public getAllSuites(): readonly SimulationSuite[] {337return this.suites;338}339340public getAllTests(): readonly SimulationTest[] {341const allTests = this.suites.reduce((prev, curr) => prev.concat(curr.tests), [] as SimulationTest[]);342const testsToRun = allTests.filter(this._filter).sort((t0, t1) => t0.fullName.localeCompare(t1.fullName));343return testsToRun;344}345346private _allowTestReregistration = false;347348public allowTestReregistration() {349this._allowTestReregistration = true;350}351352public registerTest(testDescriptor: ISimulationTestDescriptor, options: ISimulationTestOptions, runner: SimulationTestFunction): void {353if (testDescriptor.language === undefined && this.currentSuite.language) {354testDescriptor = { ...testDescriptor, language: this.currentSuite.language };355}356357// inherit configurations from suite358if (this.currentSuite.configurations !== undefined) {359const updatedConfigurations =360testDescriptor.configurations === undefined361? this.currentSuite.configurations362: [...this.currentSuite.configurations, ...testDescriptor.configurations];363testDescriptor = { ...testDescriptor, configurations: updatedConfigurations };364}365366if (this.currentSuite.nonExtensionConfigurations !== undefined) {367const updatedNonExtConfig: NonExtensionConfiguration[] = this.currentSuite.nonExtensionConfigurations.slice(0);368updatedNonExtConfig.push(...testDescriptor.nonExtensionConfigurations ?? []);369testDescriptor = { ...testDescriptor, nonExtensionConfigurations: updatedNonExtConfig };370}371372// remove newlines, carriage returns, bad whitespace, etc373testDescriptor = { ...testDescriptor, description: testDescriptor.description.replace(/\s+/g, ' ') };374375// force a length of 100 chars for a stest name376if (testDescriptor.description.length > 100) {377testDescriptor = { ...testDescriptor, description: testDescriptor.description.substring(0, 100) + '…' };378}379380const test = new SimulationTest(testDescriptor, options, this.currentSuite, runner);381// change this validation up382if (this.testNames.has(test.fullName) && !this._allowTestReregistration) {383throw new Error(`Cannot have two tests with the same name: ${test.fullName}`);384}385this.testNames.add(test.fullName);386387this.currentSuite.tests.push(test);388}389390public registerSuite(descriptor: ISimulationSuiteDescriptor, options: ISimulationSuiteOptions, factory: (inputPath?: string) => void) {391if (this._testPath && options.location !== undefined) {392393const testBasename = path.basename(options.location.path);394const testBasenameWithoutExtension = testBasename.replace(/\.[^/.]+$/, '');395396if (this._testPath !== testBasename && this._testPath !== testBasenameWithoutExtension) {397return;398}399}400401const suite = new SimulationSuite(descriptor, options);402403function suiteId(s: SimulationSuite): string {404return s.options.location?.path + '###' + s.fullName;405}406this.suites = this.suites.filter(s => suiteId(s) !== suiteId(suite)); // When re-registering a suite, delete the old one407this.suites.push(suite);408this.invokeSuiteFactory(suite, factory);409}410411private invokeSuiteFactory(suite: SimulationSuite, factory: (inputPath?: string) => void) {412try {413this.currentSuite = suite;414factory(this._inputPath);415} finally {416this.currentSuite = this.defaultSuite;417}418}419}420421export const SimulationTestsRegistry = new SimulationTestsRegistryClass();422423function captureLocation(fn: Function): ITestLocation | undefined {424try {425const err = new Error();426Error.captureStackTrace(err, fn);427throw err;428} catch (e) {429430const stack = (<string[]>e.stack.split('\n')).at(1);431if (!stack) {432// It looks like sometimes the stack is empty,433// so let's add a fallback case434return captureLocationUsingClassicalWay();435}436return extractPositionFromStackTraceLine(stack);437}438439function captureLocationUsingClassicalWay(): ITestLocation | undefined {440try {441throw new Error();442} catch (e) {443// Error:444// at captureLocationUsingClassicalWay (/Users/alex/src/vscode-copilot/test/base/stest.ts:398:10)445// at captureLocation (/Users/alex/src/vscode-copilot/test/base/stest.ts:374:11)446// at stest (/Users/alex/src/vscode-copilot/test/base/stest.ts:467:84)447// at /Users/alex/src/vscode-copilot/test/codeMapper/codeMapper.stest.ts:22:2448const stack = (<string[]>e.stack.split('\n')).at(4);449if (!stack) {450console.log(`No stack in captureLocation`);451console.log(e.stack);452return undefined;453}454return extractPositionFromStackTraceLine(stack);455}456}457458function extractPositionFromStackTraceLine(stack: string): ITestLocation | undefined {459const r1 = /\((.+):(\d+):(\d+)\)/;460const r2 = /at (.+):(\d+):(\d+)/;461const match = stack.match(r1) ?? stack.match(r2);462if (!match) {463console.log(`No matches in stack for captureLocation`);464console.log(stack);465return undefined;466}467468return {469path: match[1],470position: {471line: Number(match[2]) - 1,472character: Number(match[3]) - 1,473}474};475}476}477478/**479* @remarks DO NOT FORGET to register the test file in `simulationTests.ts` for local test files480*/481export function ssuite(descriptor: ISimulationSuiteDescriptor, factory: (inputPath?: string) => void) {482SimulationTestsRegistry.registerSuite(descriptor, { optional: false, location: captureLocation(ssuite) }, factory);483}484ssuite.optional = function (skip: (opts: SimulationOptions) => boolean, descriptor: ISimulationSuiteDescriptor, factory: (inputPath?: string) => void) {485SimulationTestsRegistry.registerSuite(descriptor, { optional: true, skip, location: captureLocation(ssuite.optional) }, factory);486};487ssuite.skip = function (descriptor: ISimulationSuiteDescriptor, factory: (inputPath?: string) => void) {488SimulationTestsRegistry.registerSuite(descriptor, { optional: true, skip: (_: SimulationOptions) => true, location: captureLocation(ssuite.skip) }, factory);489};490491/**492* The test function will receive as first argument a context.493*494* On the context, you will find a good working ChatMLFetcher which uses caching495* and a caching slot which matches the run number.496*497* You will also find `SimulationTestRuntime` on the context, which allows you498* to use logging in your test or write files to the test outcome directory.499*/500export function stest(testDescriptor: string | ISimulationTestDescriptor, runner: SimulationTestFunction, opts?: ISimulationTestOptions) {501testDescriptor = typeof testDescriptor === 'string' ? { description: testDescriptor } : testDescriptor;502SimulationTestsRegistry.registerTest(testDescriptor, { optional: false, location: captureLocation(stest), ...opts }, runner);503}504stest.optional = function (skip: () => boolean, testDescriptor: ISimulationTestDescriptor, runner: SimulationTestFunction, opts?: ISimulationTestOptions) {505SimulationTestsRegistry.registerTest(testDescriptor, { optional: true, skip, location: captureLocation(stest.optional), ...opts }, runner);506};507stest.skip = function (testDescriptor: ISimulationTestDescriptor, runner: SimulationTestFunction, opts?: ISimulationTestOptions) {508SimulationTestsRegistry.registerTest(testDescriptor, { optional: true, skip: () => true, location: captureLocation(stest.skip), ...opts }, runner);509};510511export const ISimulationTestRuntime = createServiceIdentifier<ISimulationTestRuntime>('ISimulationTestRuntime');512513export interface ISimulationTestRuntime extends ILogTarget, ISimulationTestContext {514515logIt(level: LogLevel, metadataStr: string, ...extra: any[]): void;516shouldLog(level: LogLevel): boolean | undefined;517log(message: string, err?: any): void;518flushLogs(): Promise<void>;519writeFile(filename: string, contents: Uint8Array | string, tag: string): Promise<string>;520getWrittenFiles(): IWrittenFile[];521getOutcome(): SimulationTestOutcome | undefined;522setOutcome(outcome: SimulationTestOutcome): void;523getExplicitScore(): number | undefined;524setExplicitScore(score: number): void;525}526527export class SimulationTestRuntime implements ISimulationTestRuntime {528529declare readonly _serviceBrand: undefined;530531private readonly explicitLogMessages: string[] = [];532private readonly implicitLogMessages: string[] = [];533private readonly writtenFiles: IWrittenFile[] = [];534private score?: number;535private outcome: SimulationTestOutcome | undefined = undefined;536537constructor(538private readonly baseDir: string,539private readonly testOutcomeDir: string,540protected readonly runNumber: number541) { }542543public readonly isInSimulationTests = true;544545public logIt(level: LogLevel, metadataStr: string, ...extra: any[]): void {546const timestamp = new Date().toISOString();547this.implicitLogMessages.push(`[${timestamp}] ${metadataStr} ${extra.join(' ')}`);548}549550public shouldLog(level: LogLevel): boolean | undefined {551return undefined;552}553554public log(message: string, err?: any): void {555if (err) {556message += ' ' + (err.stack ? String(err.stack) : String(err));557}558this.explicitLogMessages.push(message);559}560561public async flushLogs(): Promise<void> {562if (this.explicitLogMessages.length > 0) {563await this.writeFile(SIMULATION_EXPLICIT_LOG_FILENAME, this.explicitLogMessages.join('\n'), EXPLICIT_LOG_TAG);564}565if (this.implicitLogMessages.length > 0) {566await this.writeFile(SIMULATION_IMPLICIT_LOG_FILENAME, this.implicitLogMessages.join('\n'), IMPLICIT_LOG_TAG);567}568}569570public async writeFile(filename: string, contents: Uint8Array | string, tag: string): Promise<string> {571const dest = this._findUniqueFilename(572path.join(this.testOutcomeDir, this.massageFilename(filename))573);574575const relativePath = path.relative(this.baseDir, dest);576this.writtenFiles.push({577relativePath,578tag579});580581await fs.promises.mkdir(path.dirname(dest), { recursive: true });582await fs.promises.writeFile(dest, contents);583return relativePath;584}585586protected massageFilename(filename: string): string {587return `${(this.runNumber).toString().padStart(2, '0')}-${filename}`;588}589590/**591* Generate a new filePath in case this filePath already exists.592*/593private _findUniqueFilename(initialFilePath: string): string {594for (let i = 0; i < 1000; i++) {595let filePath = initialFilePath;596if (i > 0) {597// This file was already written, we'll rename it to <basename>.X.<ext>598const ext = path.extname(initialFilePath);599const basename = initialFilePath.substring(0, initialFilePath.length - ext.length);600filePath = `${basename}.${i}${ext}`;601}602const relativePath = path.relative(this.baseDir, filePath);603const exists = this.writtenFiles.find(x => x.relativePath === relativePath);604if (!exists) {605return filePath;606}607}608return initialFilePath;609}610611public getWrittenFiles(): IWrittenFile[] {612return this.writtenFiles.slice(0);613}614615public getOutcome(): SimulationTestOutcome | undefined {616return this.outcome;617}618619public setOutcome(outcome: SimulationTestOutcome) {620this.outcome = outcome;621}622623public getExplicitScore(): number | undefined {624return this.score;625}626627public setExplicitScore(score: number) {628this.score = score;629}630}631632const FILENAME_LIMIT = 125;633634export function toDirname(testName: string): string {635const filename = testName.replace(/[^a-zA-Z0-9]/g, '-').replace(/-+/g, '-').toLowerCase();636if (filename.length > FILENAME_LIMIT) { // windows file names can not exceed 255 chars and path length limits, so keep it short637return `${filename.substring(0, FILENAME_LIMIT)}-${computeSHA256(filename).substring(0, 8)}`;638}639return filename;640}641642