CoCalc -- toolCallingLoopAutopilot.spec.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/intents/test/node/toolCallingLoopAutopilot.spec.ts
¹³⁴⁰⁵ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
7
import type { ChatRequest, LanguageModelToolInformation } from 'vscode';
8
import { IChatHookService } from '../../../../platform/chat/common/chatHookService';
9
import { ChatFetchResponseType, ChatResponse } from '../../../../platform/chat/common/commonTypes';
10
import { CancellationTokenSource } from '../../../../util/vs/base/common/cancellation';
11
import { DisposableStore } from '../../../../util/vs/base/common/lifecycle';
12
import { generateUuid } from '../../../../util/vs/base/common/uuid';
13
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
14
import { Conversation, Turn } from '../../../prompt/common/conversation';
15
import { IBuildPromptContext, IToolCallRound } from '../../../prompt/common/intents';
16
import { IBuildPromptResult, nullRenderPromptResult } from '../../../prompt/node/intents';
17
import { createExtensionUnitTestingServices } from '../../../test/node/services';
18
import { IToolsService } from '../../../tools/common/toolsService';
19
import { TestToolsService } from '../../../tools/node/test/testToolsService';
20
import { IToolCallingLoopOptions, IToolCallSingleResult, ToolCallingLoop } from '../../node/toolCallingLoop';
21
import { MockChatHookService } from './toolCallingLoopHooks.spec';
22

23
/**
24
 * Concrete test implementation that exposes autopilot-related protected methods.
25
 */
26
class AutopilotTestToolCallingLoop extends ToolCallingLoop<IToolCallingLoopOptions> {
27
	protected override async buildPrompt(_buildPromptContext: IBuildPromptContext): Promise<IBuildPromptResult> {
28
		return nullRenderPromptResult();
29
	}
30

31
	protected override async getAvailableTools(): Promise<LanguageModelToolInformation[]> {
32
		return [];
33
	}
34

35
	protected override async fetch(): Promise<never> {
36
		throw new Error('fetch should not be called in these tests');
37
	}
38

39
	public testShouldAutopilotContinue(result: IToolCallSingleResult): string | undefined {
40
		return this.shouldAutopilotContinue(result);
41
	}
42

43
	public testShouldAutoRetry(response: ChatResponse): boolean {
44
		return (this as any).shouldAutoRetry(response);
45
	}
46

47
	public incrementAutopilotRetryCount(): void {
48
		(this as any).autopilotRetryCount++;
49
	}
50

51
	/**
52
	 * Simulate the autopilotStopHookActive flag being set (as it would be in run()).
53
	 */
54
	public setAutopilotStopHookActive(value: boolean): void {
55
		// Access the private-ish field via prototype trick
56
		(this as any).autopilotStopHookActive = value;
57
	}
58

59
	/**
60
	 * Push a fake round into the internal toolCallRounds.
61
	 */
62
	public addToolCallRound(round: IToolCallRound): void {
63
		(this as any).toolCallRounds.push(round);
64
	}
65

66
	/**
67
	 * Expose ensureAutopilotTools for testing.
68
	 */
69
	public testEnsureAutopilotTools(tools: LanguageModelToolInformation[]): LanguageModelToolInformation[] {
70
		return this.ensureAutopilotTools(tools);
71
	}
72
}
73

74
function createMockChatRequest(overrides: Partial<ChatRequest> = {}): ChatRequest {
75
	return {
76
		prompt: 'test prompt',
77
		command: undefined,
78
		references: [],
79
		location: 1,
80
		location2: undefined,
81
		attempt: 0,
82
		enableCommandDetection: false,
83
		isParticipantDetected: false,
84
		toolReferences: [],
85
		toolInvocationToken: {} as ChatRequest['toolInvocationToken'],
86
		model: null!,
87
		tools: new Map(),
88
		id: generateUuid(),
89
		sessionId: generateUuid(),
90
		...overrides,
91
	} as ChatRequest;
92
}
93

94
function createTestConversation(turnCount: number = 1): Conversation {
95
	const turns: Turn[] = [];
96
	for (let i = 0; i < turnCount; i++) {
97
		turns.push(new Turn(
98
			generateUuid(),
99
			{ message: `test message ${i}`, type: 'user' }
100
		));
101
	}
102
	return new Conversation(generateUuid(), turns);
103
}
104

105
function createMockRound(toolCallNames: string[] = [], response: string = ''): IToolCallRound {
106
	return {
107
		id: generateUuid(),
108
		response,
109
		toolInputRetry: 0,
110
		toolCalls: toolCallNames.map(name => ({
111
			id: generateUuid(),
112
			name,
113
			arguments: '{}',
114
		})),
115
	};
116
}
117

118
function createMockSingleResult(overrides: Partial<IToolCallSingleResult> = {}): IToolCallSingleResult {
119
	return {
120
		response: { type: 0, value: '' } as any,
121
		round: createMockRound(),
122
		hadIgnoredFiles: false,
123
		lastRequestMessages: [],
124
		availableTools: [],
125
		...overrides,
126
	};
127
}
128

129
describe('ToolCallingLoop autopilot', () => {
130
	let disposables: DisposableStore;
131
	let instantiationService: IInstantiationService;
132
	let tokenSource: CancellationTokenSource;
133

134
	beforeEach(() => {
135
		disposables = new DisposableStore();
136
		const mockChatHookService = new MockChatHookService();
137

138
		const serviceCollection = disposables.add(createExtensionUnitTestingServices());
139
		serviceCollection.define(IChatHookService, mockChatHookService);
140

141
		const accessor = serviceCollection.createTestingAccessor();
142
		instantiationService = accessor.get(IInstantiationService);
143

144
		tokenSource = new CancellationTokenSource();
145
		disposables.add(tokenSource);
146
	});
147

148
	afterEach(() => {
149
		disposables.dispose();
150
		vi.restoreAllMocks();
151
	});
152

153
	function createLoop(permissionLevel?: string, requestOverrides: Partial<ChatRequest> = {}): AutopilotTestToolCallingLoop {
154
		const conversation = createTestConversation(1);
155
		const request = createMockChatRequest({
156
			permissionLevel,
157
			...requestOverrides,
158
		} as Partial<ChatRequest>);
159
		const loop = instantiationService.createInstance(
160
			AutopilotTestToolCallingLoop,
161
			{
162
				conversation,
163
				toolCallLimit: 10,
164
				request,
165
			}
166
		);
167
		disposables.add(loop);
168
		return loop;
169
	}
170

171
	describe('shouldAutopilotContinue', () => {
172
		it('should return a nudge message when task_complete was not called', () => {
173
			const loop = createLoop('autopilot');
174
			const result = loop.testShouldAutopilotContinue(createMockSingleResult());
175
			expect(result).toContain('task_complete');
176
		});
177

178
		it('should return undefined when task_complete was called in a previous round', () => {
179
			const loop = createLoop('autopilot');
180
			loop.addToolCallRound(createMockRound(['task_complete']));
181

182
			const result = loop.testShouldAutopilotContinue(createMockSingleResult());
183
			expect(result).toBeUndefined();
184
		});
185

186
		it('should stop after MAX_AUTOPILOT_ITERATIONS', () => {
187
			const loop = createLoop('autopilot');
188

189
			// Iterate 5 times (MAX_AUTOPILOT_ITERATIONS = 5)
190
			for (let i = 0; i < 5; i++) {
191
				const msg = loop.testShouldAutopilotContinue(createMockSingleResult());
192
				expect(msg).toContain('task_complete');
193
			}
194

195
			// 6th call should return undefined — hit the cap
196
			const msg = loop.testShouldAutopilotContinue(createMockSingleResult());
197
			expect(msg).toBeUndefined();
198
		});
199

200
		it('should bail when prior nudge produced no tool calls', () => {
201
			const loop = createLoop('autopilot');
202

203
			// Simulate that we already nudged once and set the flag
204
			loop.setAutopilotStopHookActive(true);
205

206
			// Should bail — the previous nudge produced no tool calls, so further nudges
207
			// would just waste tokens (the model is effectively done).
208
			const result = loop.testShouldAutopilotContinue(createMockSingleResult());
209
			expect(result).toBeUndefined();
210
		});
211

212
		it('should skip the nudge when the model returned a text-only response (no tool calls)', () => {
213
			const loop = createLoop('autopilot');
214
			const result = loop.testShouldAutopilotContinue(createMockSingleResult({
215
				round: createMockRound([], 'Here is a summary of what I did.'),
216
			}));
217
			expect(result).toBeUndefined();
218
		});
219

220
		it('should allow another nudge after autopilotStopHookActive is reset', () => {
221
			const loop = createLoop('autopilot');
222

223
			// First nudge
224
			const msg1 = loop.testShouldAutopilotContinue(createMockSingleResult());
225
			expect(msg1).toContain('task_complete');
226

227
			// Simulate the run() loop setting the flag then the model making progress
228
			loop.setAutopilotStopHookActive(true);
229
			// Reset as if tool calls were made (what run() does now)
230
			loop.setAutopilotStopHookActive(false);
231

232
			// Second nudge should work
233
			const msg2 = loop.testShouldAutopilotContinue(createMockSingleResult());
234
			expect(msg2).toContain('task_complete');
235
		});
236
	});
237

238
	describe('shouldAutoRetry', () => {
239
		function mockResponse(type: ChatFetchResponseType): ChatResponse {
240
			return { type, reason: 'test', requestId: 'req-1', serverRequestId: undefined } as any;
241
		}
242

243
		it('should retry on network error in autoApprove mode', () => {
244
			const loop = createLoop('autoApprove');
245
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.NetworkError))).toBe(true);
246
		});
247

248
		it('should retry on Failed in autopilot mode', () => {
249
			const loop = createLoop('autopilot');
250
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Failed))).toBe(true);
251
		});
252

253
		it('should retry on BadRequest', () => {
254
			const loop = createLoop('autoApprove');
255
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.BadRequest))).toBe(true);
256
		});
257

258
		it('should not retry on RateLimited', () => {
259
			const loop = createLoop('autoApprove');
260
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.RateLimited))).toBe(false);
261
		});
262

263
		it('should not retry on QuotaExceeded', () => {
264
			const loop = createLoop('autopilot');
265
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.QuotaExceeded))).toBe(false);
266
		});
267

268
		it('should not retry on Canceled', () => {
269
			const loop = createLoop('autoApprove');
270
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Canceled))).toBe(false);
271
		});
272

273
		it('should not retry on OffTopic', () => {
274
			const loop = createLoop('autopilot');
275
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.OffTopic))).toBe(false);
276
		});
277

278
		it('should not retry on Success', () => {
279
			const loop = createLoop('autoApprove');
280
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Success))).toBe(false);
281
		});
282

283
		it('should not retry without autoApprove or autopilot permission', () => {
284
			const loop = createLoop(undefined);
285
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.NetworkError))).toBe(false);
286
		});
287

288
		it('should not retry after hitting MAX_AUTOPILOT_RETRIES', () => {
289
			const loop = createLoop('autoApprove');
290
			for (let i = 0; i < 3; i++) {
291
				loop.incrementAutopilotRetryCount();
292
			}
293
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.NetworkError))).toBe(false);
294
		});
295

296
		it('should allow retries up to the limit', () => {
297
			const loop = createLoop('autopilot');
298
			for (let i = 0; i < 2; i++) {
299
				loop.incrementAutopilotRetryCount();
300
			}
301
			// 2 retries done, still under the cap of 3
302
			expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Failed))).toBe(true);
303
		});
304
	});
305

306
	describe('tool call limit extension', () => {
307
		it('should have a hard cap of 200 for autoApprove mode', () => {
308
			const conversation = createTestConversation(1);
309
			const request = createMockChatRequest({
310
				permissionLevel: 'autoApprove',
311
			} as Partial<ChatRequest>);
312
			const loop = instantiationService.createInstance(
313
				AutopilotTestToolCallingLoop,
314
				{
315
					conversation,
316
					toolCallLimit: 150,
317
					request,
318
				}
319
			);
320
			disposables.add(loop);
321

322
			// The actual extension happens in run(), which we can't easily call
323
			// without a full mock of runOne, but we verified the cap of 200
324
			// exists in the source. The important thing is the constant behavior.
325
			expect((loop as any).options.toolCallLimit).toBe(150);
326
		});
327

328
		it('should have a hard cap of 200 for autopilot mode', () => {
329
			const conversation = createTestConversation(1);
330
			const request = createMockChatRequest({
331
				permissionLevel: 'autopilot',
332
			} as Partial<ChatRequest>);
333
			const loop = instantiationService.createInstance(
334
				AutopilotTestToolCallingLoop,
335
				{
336
					conversation,
337
					toolCallLimit: 150,
338
					request,
339
				}
340
			);
341
			disposables.add(loop);
342

343
			expect((loop as any).options.toolCallLimit).toBe(150);
344
		});
345
	});
346

347
	describe('ensureAutopilotTools', () => {
348
		const mockTaskCompleteTool: LanguageModelToolInformation = {
349
			name: 'task_complete',
350
			description: 'Signal that the task is done',
351
			inputSchema: { type: 'object', properties: {} },
352
			tags: [],
353
			source: undefined,
354
		};
355

356
		function registerTaskCompleteTool(): void {
357
			const toolsService = instantiationService.invokeFunction(acc => acc.get(IToolsService)) as TestToolsService;
358
			toolsService.addTestToolOverride(mockTaskCompleteTool, { invoke: () => ({ content: [] }) });
359
		}
360

361
		it('should add task_complete when missing in autopilot mode', () => {
362
			registerTaskCompleteTool();
363
			const loop = createLoop('autopilot');
364
			const tools: LanguageModelToolInformation[] = [
365
				{ name: 'read_file', description: '', inputSchema: undefined, tags: [], source: undefined },
366
			];
367
			const result = loop.testEnsureAutopilotTools(tools);
368
			expect(result).toHaveLength(2);
369
			expect(result.some(t => t.name === 'task_complete')).toBe(true);
370
		});
371

372
		it('should not duplicate task_complete when already present', () => {
373
			registerTaskCompleteTool();
374
			const loop = createLoop('autopilot');
375
			const tools: LanguageModelToolInformation[] = [mockTaskCompleteTool];
376
			const result = loop.testEnsureAutopilotTools(tools);
377
			expect(result).toHaveLength(1);
378
		});
379

380
		it('should not add task_complete in non-autopilot mode', () => {
381
			registerTaskCompleteTool();
382
			const loop = createLoop('autoApprove');
383
			const tools: LanguageModelToolInformation[] = [];
384
			const result = loop.testEnsureAutopilotTools(tools);
385
			expect(result).toHaveLength(0);
386
		});
387

388
		it('should return tools unchanged when not in autopilot mode', () => {
389
			const loop = createLoop(undefined);
390
			const tools: LanguageModelToolInformation[] = [
391
				{ name: 'read_file', description: '', inputSchema: undefined, tags: [], source: undefined },
392
			];
393
			const result = loop.testEnsureAutopilotTools(tools);
394
			expect(result).toBe(tools);
395
		});
396
	});
397
});
398

399
Product

Resources

Company