CoCalc -- toolSimTest.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/test/e2e/toolSimTest.ts
¹³³⁸⁸ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import assert from 'assert';
7
import { IToolCall } from '../../src/extension/prompt/common/intents';
8
import { ToolName } from '../../src/extension/tools/common/toolNames';
9
import { IToolsService } from '../../src/extension/tools/common/toolsService';
10
import { NoopTestToolsService } from '../../src/extension/tools/node/test/testToolsService';
11
import { IConfigurationService } from '../../src/platform/configuration/common/configurationService';
12
import { InMemoryConfigurationService } from '../../src/platform/configuration/test/common/inMemoryConfigurationService';
13
import { ITestingServicesAccessor } from '../../src/platform/test/node/services';
14
import { SyncDescriptor } from '../../src/util/vs/platform/instantiation/common/descriptors';
15
import { SimulationTestFunction } from '../base/stest';
16
import { KeywordPredicate, validate } from '../base/validate';
17
import { fetchConversationScenarios, IConversationTestCase, Scenario } from './scenarioLoader';
18
import { generateScenarioTestRunner } from './scenarioTest';
19

20
export type ToolScenarioEvaluator = (
21
	accessor: ITestingServicesAccessor,
22
	question: string,
23
	toolCalls: any[]
24
) => Promise<void>;
25

26
export interface IParsedToolCall {
27
	name: string;
28
	input: unknown;
29
	id: string;
30
}
31

32
export interface IToolCallExpectation {
33
	allowParallelToolCalls?: boolean;
34

35
	/**
36
	 * Validate tool results with a callback.
37
	 */
38
	toolCallValidators?: Partial<Record<ToolName, (toolCall: IParsedToolCall[]) => void | Promise<void>>>;
39
}
40

41
export function generateToolTestRunner(toolScenario: IConversationToolTestCase | ToolScenario, expectedToolCalls?: IToolCallExpectation): SimulationTestFunction {
42
	if (!Array.isArray(toolScenario)) {
43
		toolScenario = [toolScenario];
44
	}
45

46
	return async (testingServiceCollection) => {
47
		testingServiceCollection.define(IToolsService, new SyncDescriptor(NoopTestToolsService));
48

49
		if (toolScenario.length !== 1) {
50
			throw new Error('Tool test cases must only have one scenario');
51
		}
52
		const testCase = toolScenario[0];
53
		testCase.question = ensureSlashEditAgent(testCase.question);
54
		testCase.setupCase = accessor => {
55
			(accessor.get(IConfigurationService) as InMemoryConfigurationService).setNonExtensionConfig('chat.agent.maxRequests', 0);
56
		};
57

58
		// Apply default name
59
		const scenario: Scenario = toolScenario.map(testCase => ({
60
			...testCase,
61
			name: testCase.name ?? testCase.question,
62
		}));
63

64
		return generateScenarioTestRunner(scenario, async (accessor, question, userVisibleAnswer, rawResponse, turn, scenarioIndex, commands) => {
65
			const toolCalls = turn?.resultMetadata?.toolCallRounds;
66
			if (!toolCalls || toolCalls.length === 0) {
67
				return { success: false, errorMessage: 'No tool calls were made.' };
68
			}
69

70
			if (toolCalls.length !== 1) {
71
				return { success: false, errorMessage: `Multiple tool call rounds, this shouldn't've happened.` };
72
			}
73

74
			await validateToolCallExpectation(accessor, testCase, expectedToolCalls, toolCalls[0].toolCalls);
75
			return { success: true };
76
		})(testingServiceCollection);
77
	};
78
}
79

80
async function validateToolCallExpectation(accessor: ITestingServicesAccessor, testCase: IConversationToolTestCase, expectation: IToolCallExpectation | undefined, toolCalls: IToolCall[]): Promise<void> {
81
	const toolsService = accessor.get(IToolsService);
82

83
	const expectedAnyOfToolNames = testCase.expectedToolCalls && new Set(
84
		typeof testCase.expectedToolCalls === 'string' ?
85
			[testCase.expectedToolCalls] :
86
			testCase.expectedToolCalls.anyOf);
87

88
	const toolCallsByName = new Map<ToolName, IParsedToolCall[]>();
89
	for (const toolCall of toolCalls) {
90
		if (expectedAnyOfToolNames) {
91
			if (!expectedAnyOfToolNames.has(toolCall.name as ToolName)) {
92
				throw new Error(`Tool call name "${toolCall.name}" does not match expected tool call names (${Array.from(expectedAnyOfToolNames).join(', ')}).`);
93
			}
94

95
			if (!expectation?.allowParallelToolCalls) {
96
				// Add a flag if we need to support multiple calls to the same tool
97
				expectedAnyOfToolNames.delete(toolCall.name as ToolName);
98
			}
99
		}
100

101
		const validationResult = toolsService.validateToolInput(toolCall.name, toolCall.arguments);
102
		if ('error' in validationResult) {
103
			throw new Error(`Tool call input "${JSON.stringify(toolCall.arguments)}" is invalid: ${validationResult.error}`);
104
		}
105

106
		const toolName = toolCall.name as ToolName;
107
		const parsedToolCall: IParsedToolCall = {
108
			...toolCall,
109
			input: validationResult.inputObj as object
110
		};
111
		toolCallsByName.set(toolName, toolCallsByName.get(toolName) ?? []);
112
		toolCallsByName.get(toolName)?.push(parsedToolCall);
113

114
		if (testCase.toolInputValues) {
115
			Object.keys(testCase.toolInputValues).forEach(key => {
116
				const argValue = (parsedToolCall.input as any)[key];
117
				const keyword = testCase.toolInputValues![key]!;
118
				if (typeof keyword === 'boolean') {
119
					assert.strictEqual(argValue, keyword, key);
120
					return;
121
				}
122

123
				if (typeof argValue !== 'string') {
124
					throw new Error(`Tool call input arg "${key}" must be a string to use toolInputValues. Got: ${JSON.stringify(argValue)}`);
125
				}
126

127
				const err = validate(argValue, keyword);
128
				if (err) {
129
					throw new Error(err);
130
				}
131
			});
132
		}
133
	}
134

135
	for (const [toolName, toolCalls] of toolCallsByName) {
136
		const validator = expectation?.toolCallValidators?.[toolName];
137
		if (validator) {
138
			await validator(toolCalls);
139
		}
140
	}
141
}
142

143
/**
144
 * JSON extensions for tool test cases.
145
 */
146
export interface IConversationToolTestCase extends Omit<IConversationTestCase, 'name'> {
147
	name?: string;
148
	expectedToolCalls?: ToolName | { anyOf: ToolName[] };
149
	toolInputValues?: Record<string, object | boolean | KeywordPredicate[]>;
150
}
151

152
export type ToolScenario = IConversationToolTestCase[];
153

154
export function fetchToolScenarios(scenarioFolderPath: string): ToolScenario[] {
155
	const scenarios = fetchConversationScenarios(scenarioFolderPath);
156
	return scenarios.map(scenario => {
157
		return scenario.map<IConversationToolTestCase>(testCase => {
158
			if (!testCase.json.expectedToolCalls) {
159
				throw new Error(`Tool test case "${testCase.name}" must define expectedToolCalls.`);
160
			}
161

162
			return {
163
				...testCase,
164
				expectedToolCalls: testCase.json.expectedToolCalls,
165
			};
166
		});
167
	});
168
}
169

170
function ensureSlashEditAgent(question: string): string {
171
	if (question.startsWith('/editAgent')) {
172
		return question;
173
	}
174
	return '/editAgent ' + question;
175
}
176
Product

Resources

Company