Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/test/e2e/toolSimTest.ts
13388 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import assert from 'assert';
7
import { IToolCall } from '../../src/extension/prompt/common/intents';
8
import { ToolName } from '../../src/extension/tools/common/toolNames';
9
import { IToolsService } from '../../src/extension/tools/common/toolsService';
10
import { NoopTestToolsService } from '../../src/extension/tools/node/test/testToolsService';
11
import { IConfigurationService } from '../../src/platform/configuration/common/configurationService';
12
import { InMemoryConfigurationService } from '../../src/platform/configuration/test/common/inMemoryConfigurationService';
13
import { ITestingServicesAccessor } from '../../src/platform/test/node/services';
14
import { SyncDescriptor } from '../../src/util/vs/platform/instantiation/common/descriptors';
15
import { SimulationTestFunction } from '../base/stest';
16
import { KeywordPredicate, validate } from '../base/validate';
17
import { fetchConversationScenarios, IConversationTestCase, Scenario } from './scenarioLoader';
18
import { generateScenarioTestRunner } from './scenarioTest';
19
20
export type ToolScenarioEvaluator = (
21
accessor: ITestingServicesAccessor,
22
question: string,
23
toolCalls: any[]
24
) => Promise<void>;
25
26
export interface IParsedToolCall {
27
name: string;
28
input: unknown;
29
id: string;
30
}
31
32
export interface IToolCallExpectation {
33
allowParallelToolCalls?: boolean;
34
35
/**
36
* Validate tool results with a callback.
37
*/
38
toolCallValidators?: Partial<Record<ToolName, (toolCall: IParsedToolCall[]) => void | Promise<void>>>;
39
}
40
41
export function generateToolTestRunner(toolScenario: IConversationToolTestCase | ToolScenario, expectedToolCalls?: IToolCallExpectation): SimulationTestFunction {
42
if (!Array.isArray(toolScenario)) {
43
toolScenario = [toolScenario];
44
}
45
46
return async (testingServiceCollection) => {
47
testingServiceCollection.define(IToolsService, new SyncDescriptor(NoopTestToolsService));
48
49
if (toolScenario.length !== 1) {
50
throw new Error('Tool test cases must only have one scenario');
51
}
52
const testCase = toolScenario[0];
53
testCase.question = ensureSlashEditAgent(testCase.question);
54
testCase.setupCase = accessor => {
55
(accessor.get(IConfigurationService) as InMemoryConfigurationService).setNonExtensionConfig('chat.agent.maxRequests', 0);
56
};
57
58
// Apply default name
59
const scenario: Scenario = toolScenario.map(testCase => ({
60
...testCase,
61
name: testCase.name ?? testCase.question,
62
}));
63
64
return generateScenarioTestRunner(scenario, async (accessor, question, userVisibleAnswer, rawResponse, turn, scenarioIndex, commands) => {
65
const toolCalls = turn?.resultMetadata?.toolCallRounds;
66
if (!toolCalls || toolCalls.length === 0) {
67
return { success: false, errorMessage: 'No tool calls were made.' };
68
}
69
70
if (toolCalls.length !== 1) {
71
return { success: false, errorMessage: `Multiple tool call rounds, this shouldn't've happened.` };
72
}
73
74
await validateToolCallExpectation(accessor, testCase, expectedToolCalls, toolCalls[0].toolCalls);
75
return { success: true };
76
})(testingServiceCollection);
77
};
78
}
79
80
async function validateToolCallExpectation(accessor: ITestingServicesAccessor, testCase: IConversationToolTestCase, expectation: IToolCallExpectation | undefined, toolCalls: IToolCall[]): Promise<void> {
81
const toolsService = accessor.get(IToolsService);
82
83
const expectedAnyOfToolNames = testCase.expectedToolCalls && new Set(
84
typeof testCase.expectedToolCalls === 'string' ?
85
[testCase.expectedToolCalls] :
86
testCase.expectedToolCalls.anyOf);
87
88
const toolCallsByName = new Map<ToolName, IParsedToolCall[]>();
89
for (const toolCall of toolCalls) {
90
if (expectedAnyOfToolNames) {
91
if (!expectedAnyOfToolNames.has(toolCall.name as ToolName)) {
92
throw new Error(`Tool call name "${toolCall.name}" does not match expected tool call names (${Array.from(expectedAnyOfToolNames).join(', ')}).`);
93
}
94
95
if (!expectation?.allowParallelToolCalls) {
96
// Add a flag if we need to support multiple calls to the same tool
97
expectedAnyOfToolNames.delete(toolCall.name as ToolName);
98
}
99
}
100
101
const validationResult = toolsService.validateToolInput(toolCall.name, toolCall.arguments);
102
if ('error' in validationResult) {
103
throw new Error(`Tool call input "${JSON.stringify(toolCall.arguments)}" is invalid: ${validationResult.error}`);
104
}
105
106
const toolName = toolCall.name as ToolName;
107
const parsedToolCall: IParsedToolCall = {
108
...toolCall,
109
input: validationResult.inputObj as object
110
};
111
toolCallsByName.set(toolName, toolCallsByName.get(toolName) ?? []);
112
toolCallsByName.get(toolName)?.push(parsedToolCall);
113
114
if (testCase.toolInputValues) {
115
Object.keys(testCase.toolInputValues).forEach(key => {
116
const argValue = (parsedToolCall.input as any)[key];
117
const keyword = testCase.toolInputValues![key]!;
118
if (typeof keyword === 'boolean') {
119
assert.strictEqual(argValue, keyword, key);
120
return;
121
}
122
123
if (typeof argValue !== 'string') {
124
throw new Error(`Tool call input arg "${key}" must be a string to use toolInputValues. Got: ${JSON.stringify(argValue)}`);
125
}
126
127
const err = validate(argValue, keyword);
128
if (err) {
129
throw new Error(err);
130
}
131
});
132
}
133
}
134
135
for (const [toolName, toolCalls] of toolCallsByName) {
136
const validator = expectation?.toolCallValidators?.[toolName];
137
if (validator) {
138
await validator(toolCalls);
139
}
140
}
141
}
142
143
/**
144
* JSON extensions for tool test cases.
145
*/
146
export interface IConversationToolTestCase extends Omit<IConversationTestCase, 'name'> {
147
name?: string;
148
expectedToolCalls?: ToolName | { anyOf: ToolName[] };
149
toolInputValues?: Record<string, object | boolean | KeywordPredicate[]>;
150
}
151
152
export type ToolScenario = IConversationToolTestCase[];
153
154
export function fetchToolScenarios(scenarioFolderPath: string): ToolScenario[] {
155
const scenarios = fetchConversationScenarios(scenarioFolderPath);
156
return scenarios.map(scenario => {
157
return scenario.map<IConversationToolTestCase>(testCase => {
158
if (!testCase.json.expectedToolCalls) {
159
throw new Error(`Tool test case "${testCase.name}" must define expectedToolCalls.`);
160
}
161
162
return {
163
...testCase,
164
expectedToolCalls: testCase.json.expectedToolCalls,
165
};
166
});
167
});
168
}
169
170
function ensureSlashEditAgent(question: string): string {
171
if (question.startsWith('/editAgent')) {
172
return question;
173
}
174
return '/editAgent ' + question;
175
}
176