Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/intents/test/node/toolCallingLoopAutopilot.spec.ts
13405 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
7
import type { ChatRequest, LanguageModelToolInformation } from 'vscode';
8
import { IChatHookService } from '../../../../platform/chat/common/chatHookService';
9
import { ChatFetchResponseType, ChatResponse } from '../../../../platform/chat/common/commonTypes';
10
import { CancellationTokenSource } from '../../../../util/vs/base/common/cancellation';
11
import { DisposableStore } from '../../../../util/vs/base/common/lifecycle';
12
import { generateUuid } from '../../../../util/vs/base/common/uuid';
13
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
14
import { Conversation, Turn } from '../../../prompt/common/conversation';
15
import { IBuildPromptContext, IToolCallRound } from '../../../prompt/common/intents';
16
import { IBuildPromptResult, nullRenderPromptResult } from '../../../prompt/node/intents';
17
import { createExtensionUnitTestingServices } from '../../../test/node/services';
18
import { IToolsService } from '../../../tools/common/toolsService';
19
import { TestToolsService } from '../../../tools/node/test/testToolsService';
20
import { IToolCallingLoopOptions, IToolCallSingleResult, ToolCallingLoop } from '../../node/toolCallingLoop';
21
import { MockChatHookService } from './toolCallingLoopHooks.spec';
22
23
/**
24
* Concrete test implementation that exposes autopilot-related protected methods.
25
*/
26
class AutopilotTestToolCallingLoop extends ToolCallingLoop<IToolCallingLoopOptions> {
27
protected override async buildPrompt(_buildPromptContext: IBuildPromptContext): Promise<IBuildPromptResult> {
28
return nullRenderPromptResult();
29
}
30
31
protected override async getAvailableTools(): Promise<LanguageModelToolInformation[]> {
32
return [];
33
}
34
35
protected override async fetch(): Promise<never> {
36
throw new Error('fetch should not be called in these tests');
37
}
38
39
public testShouldAutopilotContinue(result: IToolCallSingleResult): string | undefined {
40
return this.shouldAutopilotContinue(result);
41
}
42
43
public testShouldAutoRetry(response: ChatResponse): boolean {
44
return (this as any).shouldAutoRetry(response);
45
}
46
47
public incrementAutopilotRetryCount(): void {
48
(this as any).autopilotRetryCount++;
49
}
50
51
/**
52
* Simulate the autopilotStopHookActive flag being set (as it would be in run()).
53
*/
54
public setAutopilotStopHookActive(value: boolean): void {
55
// Access the private-ish field via prototype trick
56
(this as any).autopilotStopHookActive = value;
57
}
58
59
/**
60
* Push a fake round into the internal toolCallRounds.
61
*/
62
public addToolCallRound(round: IToolCallRound): void {
63
(this as any).toolCallRounds.push(round);
64
}
65
66
/**
67
* Expose ensureAutopilotTools for testing.
68
*/
69
public testEnsureAutopilotTools(tools: LanguageModelToolInformation[]): LanguageModelToolInformation[] {
70
return this.ensureAutopilotTools(tools);
71
}
72
}
73
74
function createMockChatRequest(overrides: Partial<ChatRequest> = {}): ChatRequest {
75
return {
76
prompt: 'test prompt',
77
command: undefined,
78
references: [],
79
location: 1,
80
location2: undefined,
81
attempt: 0,
82
enableCommandDetection: false,
83
isParticipantDetected: false,
84
toolReferences: [],
85
toolInvocationToken: {} as ChatRequest['toolInvocationToken'],
86
model: null!,
87
tools: new Map(),
88
id: generateUuid(),
89
sessionId: generateUuid(),
90
...overrides,
91
} as ChatRequest;
92
}
93
94
function createTestConversation(turnCount: number = 1): Conversation {
95
const turns: Turn[] = [];
96
for (let i = 0; i < turnCount; i++) {
97
turns.push(new Turn(
98
generateUuid(),
99
{ message: `test message ${i}`, type: 'user' }
100
));
101
}
102
return new Conversation(generateUuid(), turns);
103
}
104
105
function createMockRound(toolCallNames: string[] = [], response: string = ''): IToolCallRound {
106
return {
107
id: generateUuid(),
108
response,
109
toolInputRetry: 0,
110
toolCalls: toolCallNames.map(name => ({
111
id: generateUuid(),
112
name,
113
arguments: '{}',
114
})),
115
};
116
}
117
118
function createMockSingleResult(overrides: Partial<IToolCallSingleResult> = {}): IToolCallSingleResult {
119
return {
120
response: { type: 0, value: '' } as any,
121
round: createMockRound(),
122
hadIgnoredFiles: false,
123
lastRequestMessages: [],
124
availableTools: [],
125
...overrides,
126
};
127
}
128
129
describe('ToolCallingLoop autopilot', () => {
130
let disposables: DisposableStore;
131
let instantiationService: IInstantiationService;
132
let tokenSource: CancellationTokenSource;
133
134
beforeEach(() => {
135
disposables = new DisposableStore();
136
const mockChatHookService = new MockChatHookService();
137
138
const serviceCollection = disposables.add(createExtensionUnitTestingServices());
139
serviceCollection.define(IChatHookService, mockChatHookService);
140
141
const accessor = serviceCollection.createTestingAccessor();
142
instantiationService = accessor.get(IInstantiationService);
143
144
tokenSource = new CancellationTokenSource();
145
disposables.add(tokenSource);
146
});
147
148
afterEach(() => {
149
disposables.dispose();
150
vi.restoreAllMocks();
151
});
152
153
function createLoop(permissionLevel?: string, requestOverrides: Partial<ChatRequest> = {}): AutopilotTestToolCallingLoop {
154
const conversation = createTestConversation(1);
155
const request = createMockChatRequest({
156
permissionLevel,
157
...requestOverrides,
158
} as Partial<ChatRequest>);
159
const loop = instantiationService.createInstance(
160
AutopilotTestToolCallingLoop,
161
{
162
conversation,
163
toolCallLimit: 10,
164
request,
165
}
166
);
167
disposables.add(loop);
168
return loop;
169
}
170
171
describe('shouldAutopilotContinue', () => {
172
it('should return a nudge message when task_complete was not called', () => {
173
const loop = createLoop('autopilot');
174
const result = loop.testShouldAutopilotContinue(createMockSingleResult());
175
expect(result).toContain('task_complete');
176
});
177
178
it('should return undefined when task_complete was called in a previous round', () => {
179
const loop = createLoop('autopilot');
180
loop.addToolCallRound(createMockRound(['task_complete']));
181
182
const result = loop.testShouldAutopilotContinue(createMockSingleResult());
183
expect(result).toBeUndefined();
184
});
185
186
it('should stop after MAX_AUTOPILOT_ITERATIONS', () => {
187
const loop = createLoop('autopilot');
188
189
// Iterate 5 times (MAX_AUTOPILOT_ITERATIONS = 5)
190
for (let i = 0; i < 5; i++) {
191
const msg = loop.testShouldAutopilotContinue(createMockSingleResult());
192
expect(msg).toContain('task_complete');
193
}
194
195
// 6th call should return undefined — hit the cap
196
const msg = loop.testShouldAutopilotContinue(createMockSingleResult());
197
expect(msg).toBeUndefined();
198
});
199
200
it('should bail when prior nudge produced no tool calls', () => {
201
const loop = createLoop('autopilot');
202
203
// Simulate that we already nudged once and set the flag
204
loop.setAutopilotStopHookActive(true);
205
206
// Should bail — the previous nudge produced no tool calls, so further nudges
207
// would just waste tokens (the model is effectively done).
208
const result = loop.testShouldAutopilotContinue(createMockSingleResult());
209
expect(result).toBeUndefined();
210
});
211
212
it('should skip the nudge when the model returned a text-only response (no tool calls)', () => {
213
const loop = createLoop('autopilot');
214
const result = loop.testShouldAutopilotContinue(createMockSingleResult({
215
round: createMockRound([], 'Here is a summary of what I did.'),
216
}));
217
expect(result).toBeUndefined();
218
});
219
220
it('should allow another nudge after autopilotStopHookActive is reset', () => {
221
const loop = createLoop('autopilot');
222
223
// First nudge
224
const msg1 = loop.testShouldAutopilotContinue(createMockSingleResult());
225
expect(msg1).toContain('task_complete');
226
227
// Simulate the run() loop setting the flag then the model making progress
228
loop.setAutopilotStopHookActive(true);
229
// Reset as if tool calls were made (what run() does now)
230
loop.setAutopilotStopHookActive(false);
231
232
// Second nudge should work
233
const msg2 = loop.testShouldAutopilotContinue(createMockSingleResult());
234
expect(msg2).toContain('task_complete');
235
});
236
});
237
238
describe('shouldAutoRetry', () => {
239
function mockResponse(type: ChatFetchResponseType): ChatResponse {
240
return { type, reason: 'test', requestId: 'req-1', serverRequestId: undefined } as any;
241
}
242
243
it('should retry on network error in autoApprove mode', () => {
244
const loop = createLoop('autoApprove');
245
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.NetworkError))).toBe(true);
246
});
247
248
it('should retry on Failed in autopilot mode', () => {
249
const loop = createLoop('autopilot');
250
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Failed))).toBe(true);
251
});
252
253
it('should retry on BadRequest', () => {
254
const loop = createLoop('autoApprove');
255
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.BadRequest))).toBe(true);
256
});
257
258
it('should not retry on RateLimited', () => {
259
const loop = createLoop('autoApprove');
260
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.RateLimited))).toBe(false);
261
});
262
263
it('should not retry on QuotaExceeded', () => {
264
const loop = createLoop('autopilot');
265
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.QuotaExceeded))).toBe(false);
266
});
267
268
it('should not retry on Canceled', () => {
269
const loop = createLoop('autoApprove');
270
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Canceled))).toBe(false);
271
});
272
273
it('should not retry on OffTopic', () => {
274
const loop = createLoop('autopilot');
275
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.OffTopic))).toBe(false);
276
});
277
278
it('should not retry on Success', () => {
279
const loop = createLoop('autoApprove');
280
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Success))).toBe(false);
281
});
282
283
it('should not retry without autoApprove or autopilot permission', () => {
284
const loop = createLoop(undefined);
285
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.NetworkError))).toBe(false);
286
});
287
288
it('should not retry after hitting MAX_AUTOPILOT_RETRIES', () => {
289
const loop = createLoop('autoApprove');
290
for (let i = 0; i < 3; i++) {
291
loop.incrementAutopilotRetryCount();
292
}
293
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.NetworkError))).toBe(false);
294
});
295
296
it('should allow retries up to the limit', () => {
297
const loop = createLoop('autopilot');
298
for (let i = 0; i < 2; i++) {
299
loop.incrementAutopilotRetryCount();
300
}
301
// 2 retries done, still under the cap of 3
302
expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Failed))).toBe(true);
303
});
304
});
305
306
describe('tool call limit extension', () => {
307
it('should have a hard cap of 200 for autoApprove mode', () => {
308
const conversation = createTestConversation(1);
309
const request = createMockChatRequest({
310
permissionLevel: 'autoApprove',
311
} as Partial<ChatRequest>);
312
const loop = instantiationService.createInstance(
313
AutopilotTestToolCallingLoop,
314
{
315
conversation,
316
toolCallLimit: 150,
317
request,
318
}
319
);
320
disposables.add(loop);
321
322
// The actual extension happens in run(), which we can't easily call
323
// without a full mock of runOne, but we verified the cap of 200
324
// exists in the source. The important thing is the constant behavior.
325
expect((loop as any).options.toolCallLimit).toBe(150);
326
});
327
328
it('should have a hard cap of 200 for autopilot mode', () => {
329
const conversation = createTestConversation(1);
330
const request = createMockChatRequest({
331
permissionLevel: 'autopilot',
332
} as Partial<ChatRequest>);
333
const loop = instantiationService.createInstance(
334
AutopilotTestToolCallingLoop,
335
{
336
conversation,
337
toolCallLimit: 150,
338
request,
339
}
340
);
341
disposables.add(loop);
342
343
expect((loop as any).options.toolCallLimit).toBe(150);
344
});
345
});
346
347
describe('ensureAutopilotTools', () => {
348
const mockTaskCompleteTool: LanguageModelToolInformation = {
349
name: 'task_complete',
350
description: 'Signal that the task is done',
351
inputSchema: { type: 'object', properties: {} },
352
tags: [],
353
source: undefined,
354
};
355
356
function registerTaskCompleteTool(): void {
357
const toolsService = instantiationService.invokeFunction(acc => acc.get(IToolsService)) as TestToolsService;
358
toolsService.addTestToolOverride(mockTaskCompleteTool, { invoke: () => ({ content: [] }) });
359
}
360
361
it('should add task_complete when missing in autopilot mode', () => {
362
registerTaskCompleteTool();
363
const loop = createLoop('autopilot');
364
const tools: LanguageModelToolInformation[] = [
365
{ name: 'read_file', description: '', inputSchema: undefined, tags: [], source: undefined },
366
];
367
const result = loop.testEnsureAutopilotTools(tools);
368
expect(result).toHaveLength(2);
369
expect(result.some(t => t.name === 'task_complete')).toBe(true);
370
});
371
372
it('should not duplicate task_complete when already present', () => {
373
registerTaskCompleteTool();
374
const loop = createLoop('autopilot');
375
const tools: LanguageModelToolInformation[] = [mockTaskCompleteTool];
376
const result = loop.testEnsureAutopilotTools(tools);
377
expect(result).toHaveLength(1);
378
});
379
380
it('should not add task_complete in non-autopilot mode', () => {
381
registerTaskCompleteTool();
382
const loop = createLoop('autoApprove');
383
const tools: LanguageModelToolInformation[] = [];
384
const result = loop.testEnsureAutopilotTools(tools);
385
expect(result).toHaveLength(0);
386
});
387
388
it('should return tools unchanged when not in autopilot mode', () => {
389
const loop = createLoop(undefined);
390
const tools: LanguageModelToolInformation[] = [
391
{ name: 'read_file', description: '', inputSchema: undefined, tags: [], source: undefined },
392
];
393
const result = loop.testEnsureAutopilotTools(tools);
394
expect(result).toBe(tools);
395
});
396
});
397
});
398
399