Path: blob/main/extensions/copilot/test/e2e/toolSimTest.ts
13388 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import assert from 'assert';6import { IToolCall } from '../../src/extension/prompt/common/intents';7import { ToolName } from '../../src/extension/tools/common/toolNames';8import { IToolsService } from '../../src/extension/tools/common/toolsService';9import { NoopTestToolsService } from '../../src/extension/tools/node/test/testToolsService';10import { IConfigurationService } from '../../src/platform/configuration/common/configurationService';11import { InMemoryConfigurationService } from '../../src/platform/configuration/test/common/inMemoryConfigurationService';12import { ITestingServicesAccessor } from '../../src/platform/test/node/services';13import { SyncDescriptor } from '../../src/util/vs/platform/instantiation/common/descriptors';14import { SimulationTestFunction } from '../base/stest';15import { KeywordPredicate, validate } from '../base/validate';16import { fetchConversationScenarios, IConversationTestCase, Scenario } from './scenarioLoader';17import { generateScenarioTestRunner } from './scenarioTest';1819export type ToolScenarioEvaluator = (20accessor: ITestingServicesAccessor,21question: string,22toolCalls: any[]23) => Promise<void>;2425export interface IParsedToolCall {26name: string;27input: unknown;28id: string;29}3031export interface IToolCallExpectation {32allowParallelToolCalls?: boolean;3334/**35* Validate tool results with a callback.36*/37toolCallValidators?: Partial<Record<ToolName, (toolCall: IParsedToolCall[]) => void | Promise<void>>>;38}3940export function generateToolTestRunner(toolScenario: IConversationToolTestCase | ToolScenario, expectedToolCalls?: IToolCallExpectation): SimulationTestFunction {41if (!Array.isArray(toolScenario)) {42toolScenario = [toolScenario];43}4445return async (testingServiceCollection) => {46testingServiceCollection.define(IToolsService, new SyncDescriptor(NoopTestToolsService));4748if (toolScenario.length !== 1) {49throw new Error('Tool test cases must only have one scenario');50}51const testCase = toolScenario[0];52testCase.question = ensureSlashEditAgent(testCase.question);53testCase.setupCase = accessor => {54(accessor.get(IConfigurationService) as InMemoryConfigurationService).setNonExtensionConfig('chat.agent.maxRequests', 0);55};5657// Apply default name58const scenario: Scenario = toolScenario.map(testCase => ({59...testCase,60name: testCase.name ?? testCase.question,61}));6263return generateScenarioTestRunner(scenario, async (accessor, question, userVisibleAnswer, rawResponse, turn, scenarioIndex, commands) => {64const toolCalls = turn?.resultMetadata?.toolCallRounds;65if (!toolCalls || toolCalls.length === 0) {66return { success: false, errorMessage: 'No tool calls were made.' };67}6869if (toolCalls.length !== 1) {70return { success: false, errorMessage: `Multiple tool call rounds, this shouldn't've happened.` };71}7273await validateToolCallExpectation(accessor, testCase, expectedToolCalls, toolCalls[0].toolCalls);74return { success: true };75})(testingServiceCollection);76};77}7879async function validateToolCallExpectation(accessor: ITestingServicesAccessor, testCase: IConversationToolTestCase, expectation: IToolCallExpectation | undefined, toolCalls: IToolCall[]): Promise<void> {80const toolsService = accessor.get(IToolsService);8182const expectedAnyOfToolNames = testCase.expectedToolCalls && new Set(83typeof testCase.expectedToolCalls === 'string' ?84[testCase.expectedToolCalls] :85testCase.expectedToolCalls.anyOf);8687const toolCallsByName = new Map<ToolName, IParsedToolCall[]>();88for (const toolCall of toolCalls) {89if (expectedAnyOfToolNames) {90if (!expectedAnyOfToolNames.has(toolCall.name as ToolName)) {91throw new Error(`Tool call name "${toolCall.name}" does not match expected tool call names (${Array.from(expectedAnyOfToolNames).join(', ')}).`);92}9394if (!expectation?.allowParallelToolCalls) {95// Add a flag if we need to support multiple calls to the same tool96expectedAnyOfToolNames.delete(toolCall.name as ToolName);97}98}99100const validationResult = toolsService.validateToolInput(toolCall.name, toolCall.arguments);101if ('error' in validationResult) {102throw new Error(`Tool call input "${JSON.stringify(toolCall.arguments)}" is invalid: ${validationResult.error}`);103}104105const toolName = toolCall.name as ToolName;106const parsedToolCall: IParsedToolCall = {107...toolCall,108input: validationResult.inputObj as object109};110toolCallsByName.set(toolName, toolCallsByName.get(toolName) ?? []);111toolCallsByName.get(toolName)?.push(parsedToolCall);112113if (testCase.toolInputValues) {114Object.keys(testCase.toolInputValues).forEach(key => {115const argValue = (parsedToolCall.input as any)[key];116const keyword = testCase.toolInputValues![key]!;117if (typeof keyword === 'boolean') {118assert.strictEqual(argValue, keyword, key);119return;120}121122if (typeof argValue !== 'string') {123throw new Error(`Tool call input arg "${key}" must be a string to use toolInputValues. Got: ${JSON.stringify(argValue)}`);124}125126const err = validate(argValue, keyword);127if (err) {128throw new Error(err);129}130});131}132}133134for (const [toolName, toolCalls] of toolCallsByName) {135const validator = expectation?.toolCallValidators?.[toolName];136if (validator) {137await validator(toolCalls);138}139}140}141142/**143* JSON extensions for tool test cases.144*/145export interface IConversationToolTestCase extends Omit<IConversationTestCase, 'name'> {146name?: string;147expectedToolCalls?: ToolName | { anyOf: ToolName[] };148toolInputValues?: Record<string, object | boolean | KeywordPredicate[]>;149}150151export type ToolScenario = IConversationToolTestCase[];152153export function fetchToolScenarios(scenarioFolderPath: string): ToolScenario[] {154const scenarios = fetchConversationScenarios(scenarioFolderPath);155return scenarios.map(scenario => {156return scenario.map<IConversationToolTestCase>(testCase => {157if (!testCase.json.expectedToolCalls) {158throw new Error(`Tool test case "${testCase.name}" must define expectedToolCalls.`);159}160161return {162...testCase,163expectedToolCalls: testCase.json.expectedToolCalls,164};165});166});167}168169function ensureSlashEditAgent(question: string): string {170if (question.startsWith('/editAgent')) {171return question;172}173return '/editAgent ' + question;174}175176