Path: blob/main/extensions/copilot/src/extension/intents/test/node/toolCallingLoopAutopilot.spec.ts
13405 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';6import type { ChatRequest, LanguageModelToolInformation } from 'vscode';7import { IChatHookService } from '../../../../platform/chat/common/chatHookService';8import { ChatFetchResponseType, ChatResponse } from '../../../../platform/chat/common/commonTypes';9import { CancellationTokenSource } from '../../../../util/vs/base/common/cancellation';10import { DisposableStore } from '../../../../util/vs/base/common/lifecycle';11import { generateUuid } from '../../../../util/vs/base/common/uuid';12import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';13import { Conversation, Turn } from '../../../prompt/common/conversation';14import { IBuildPromptContext, IToolCallRound } from '../../../prompt/common/intents';15import { IBuildPromptResult, nullRenderPromptResult } from '../../../prompt/node/intents';16import { createExtensionUnitTestingServices } from '../../../test/node/services';17import { IToolsService } from '../../../tools/common/toolsService';18import { TestToolsService } from '../../../tools/node/test/testToolsService';19import { IToolCallingLoopOptions, IToolCallSingleResult, ToolCallingLoop } from '../../node/toolCallingLoop';20import { MockChatHookService } from './toolCallingLoopHooks.spec';2122/**23* Concrete test implementation that exposes autopilot-related protected methods.24*/25class AutopilotTestToolCallingLoop extends ToolCallingLoop<IToolCallingLoopOptions> {26protected override async buildPrompt(_buildPromptContext: IBuildPromptContext): Promise<IBuildPromptResult> {27return nullRenderPromptResult();28}2930protected override async getAvailableTools(): Promise<LanguageModelToolInformation[]> {31return [];32}3334protected override async fetch(): Promise<never> {35throw new Error('fetch should not be called in these tests');36}3738public testShouldAutopilotContinue(result: IToolCallSingleResult): string | undefined {39return this.shouldAutopilotContinue(result);40}4142public testShouldAutoRetry(response: ChatResponse): boolean {43return (this as any).shouldAutoRetry(response);44}4546public incrementAutopilotRetryCount(): void {47(this as any).autopilotRetryCount++;48}4950/**51* Simulate the autopilotStopHookActive flag being set (as it would be in run()).52*/53public setAutopilotStopHookActive(value: boolean): void {54// Access the private-ish field via prototype trick55(this as any).autopilotStopHookActive = value;56}5758/**59* Push a fake round into the internal toolCallRounds.60*/61public addToolCallRound(round: IToolCallRound): void {62(this as any).toolCallRounds.push(round);63}6465/**66* Expose ensureAutopilotTools for testing.67*/68public testEnsureAutopilotTools(tools: LanguageModelToolInformation[]): LanguageModelToolInformation[] {69return this.ensureAutopilotTools(tools);70}71}7273function createMockChatRequest(overrides: Partial<ChatRequest> = {}): ChatRequest {74return {75prompt: 'test prompt',76command: undefined,77references: [],78location: 1,79location2: undefined,80attempt: 0,81enableCommandDetection: false,82isParticipantDetected: false,83toolReferences: [],84toolInvocationToken: {} as ChatRequest['toolInvocationToken'],85model: null!,86tools: new Map(),87id: generateUuid(),88sessionId: generateUuid(),89...overrides,90} as ChatRequest;91}9293function createTestConversation(turnCount: number = 1): Conversation {94const turns: Turn[] = [];95for (let i = 0; i < turnCount; i++) {96turns.push(new Turn(97generateUuid(),98{ message: `test message ${i}`, type: 'user' }99));100}101return new Conversation(generateUuid(), turns);102}103104function createMockRound(toolCallNames: string[] = [], response: string = ''): IToolCallRound {105return {106id: generateUuid(),107response,108toolInputRetry: 0,109toolCalls: toolCallNames.map(name => ({110id: generateUuid(),111name,112arguments: '{}',113})),114};115}116117function createMockSingleResult(overrides: Partial<IToolCallSingleResult> = {}): IToolCallSingleResult {118return {119response: { type: 0, value: '' } as any,120round: createMockRound(),121hadIgnoredFiles: false,122lastRequestMessages: [],123availableTools: [],124...overrides,125};126}127128describe('ToolCallingLoop autopilot', () => {129let disposables: DisposableStore;130let instantiationService: IInstantiationService;131let tokenSource: CancellationTokenSource;132133beforeEach(() => {134disposables = new DisposableStore();135const mockChatHookService = new MockChatHookService();136137const serviceCollection = disposables.add(createExtensionUnitTestingServices());138serviceCollection.define(IChatHookService, mockChatHookService);139140const accessor = serviceCollection.createTestingAccessor();141instantiationService = accessor.get(IInstantiationService);142143tokenSource = new CancellationTokenSource();144disposables.add(tokenSource);145});146147afterEach(() => {148disposables.dispose();149vi.restoreAllMocks();150});151152function createLoop(permissionLevel?: string, requestOverrides: Partial<ChatRequest> = {}): AutopilotTestToolCallingLoop {153const conversation = createTestConversation(1);154const request = createMockChatRequest({155permissionLevel,156...requestOverrides,157} as Partial<ChatRequest>);158const loop = instantiationService.createInstance(159AutopilotTestToolCallingLoop,160{161conversation,162toolCallLimit: 10,163request,164}165);166disposables.add(loop);167return loop;168}169170describe('shouldAutopilotContinue', () => {171it('should return a nudge message when task_complete was not called', () => {172const loop = createLoop('autopilot');173const result = loop.testShouldAutopilotContinue(createMockSingleResult());174expect(result).toContain('task_complete');175});176177it('should return undefined when task_complete was called in a previous round', () => {178const loop = createLoop('autopilot');179loop.addToolCallRound(createMockRound(['task_complete']));180181const result = loop.testShouldAutopilotContinue(createMockSingleResult());182expect(result).toBeUndefined();183});184185it('should stop after MAX_AUTOPILOT_ITERATIONS', () => {186const loop = createLoop('autopilot');187188// Iterate 5 times (MAX_AUTOPILOT_ITERATIONS = 5)189for (let i = 0; i < 5; i++) {190const msg = loop.testShouldAutopilotContinue(createMockSingleResult());191expect(msg).toContain('task_complete');192}193194// 6th call should return undefined — hit the cap195const msg = loop.testShouldAutopilotContinue(createMockSingleResult());196expect(msg).toBeUndefined();197});198199it('should bail when prior nudge produced no tool calls', () => {200const loop = createLoop('autopilot');201202// Simulate that we already nudged once and set the flag203loop.setAutopilotStopHookActive(true);204205// Should bail — the previous nudge produced no tool calls, so further nudges206// would just waste tokens (the model is effectively done).207const result = loop.testShouldAutopilotContinue(createMockSingleResult());208expect(result).toBeUndefined();209});210211it('should skip the nudge when the model returned a text-only response (no tool calls)', () => {212const loop = createLoop('autopilot');213const result = loop.testShouldAutopilotContinue(createMockSingleResult({214round: createMockRound([], 'Here is a summary of what I did.'),215}));216expect(result).toBeUndefined();217});218219it('should allow another nudge after autopilotStopHookActive is reset', () => {220const loop = createLoop('autopilot');221222// First nudge223const msg1 = loop.testShouldAutopilotContinue(createMockSingleResult());224expect(msg1).toContain('task_complete');225226// Simulate the run() loop setting the flag then the model making progress227loop.setAutopilotStopHookActive(true);228// Reset as if tool calls were made (what run() does now)229loop.setAutopilotStopHookActive(false);230231// Second nudge should work232const msg2 = loop.testShouldAutopilotContinue(createMockSingleResult());233expect(msg2).toContain('task_complete');234});235});236237describe('shouldAutoRetry', () => {238function mockResponse(type: ChatFetchResponseType): ChatResponse {239return { type, reason: 'test', requestId: 'req-1', serverRequestId: undefined } as any;240}241242it('should retry on network error in autoApprove mode', () => {243const loop = createLoop('autoApprove');244expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.NetworkError))).toBe(true);245});246247it('should retry on Failed in autopilot mode', () => {248const loop = createLoop('autopilot');249expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Failed))).toBe(true);250});251252it('should retry on BadRequest', () => {253const loop = createLoop('autoApprove');254expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.BadRequest))).toBe(true);255});256257it('should not retry on RateLimited', () => {258const loop = createLoop('autoApprove');259expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.RateLimited))).toBe(false);260});261262it('should not retry on QuotaExceeded', () => {263const loop = createLoop('autopilot');264expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.QuotaExceeded))).toBe(false);265});266267it('should not retry on Canceled', () => {268const loop = createLoop('autoApprove');269expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Canceled))).toBe(false);270});271272it('should not retry on OffTopic', () => {273const loop = createLoop('autopilot');274expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.OffTopic))).toBe(false);275});276277it('should not retry on Success', () => {278const loop = createLoop('autoApprove');279expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Success))).toBe(false);280});281282it('should not retry without autoApprove or autopilot permission', () => {283const loop = createLoop(undefined);284expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.NetworkError))).toBe(false);285});286287it('should not retry after hitting MAX_AUTOPILOT_RETRIES', () => {288const loop = createLoop('autoApprove');289for (let i = 0; i < 3; i++) {290loop.incrementAutopilotRetryCount();291}292expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.NetworkError))).toBe(false);293});294295it('should allow retries up to the limit', () => {296const loop = createLoop('autopilot');297for (let i = 0; i < 2; i++) {298loop.incrementAutopilotRetryCount();299}300// 2 retries done, still under the cap of 3301expect(loop.testShouldAutoRetry(mockResponse(ChatFetchResponseType.Failed))).toBe(true);302});303});304305describe('tool call limit extension', () => {306it('should have a hard cap of 200 for autoApprove mode', () => {307const conversation = createTestConversation(1);308const request = createMockChatRequest({309permissionLevel: 'autoApprove',310} as Partial<ChatRequest>);311const loop = instantiationService.createInstance(312AutopilotTestToolCallingLoop,313{314conversation,315toolCallLimit: 150,316request,317}318);319disposables.add(loop);320321// The actual extension happens in run(), which we can't easily call322// without a full mock of runOne, but we verified the cap of 200323// exists in the source. The important thing is the constant behavior.324expect((loop as any).options.toolCallLimit).toBe(150);325});326327it('should have a hard cap of 200 for autopilot mode', () => {328const conversation = createTestConversation(1);329const request = createMockChatRequest({330permissionLevel: 'autopilot',331} as Partial<ChatRequest>);332const loop = instantiationService.createInstance(333AutopilotTestToolCallingLoop,334{335conversation,336toolCallLimit: 150,337request,338}339);340disposables.add(loop);341342expect((loop as any).options.toolCallLimit).toBe(150);343});344});345346describe('ensureAutopilotTools', () => {347const mockTaskCompleteTool: LanguageModelToolInformation = {348name: 'task_complete',349description: 'Signal that the task is done',350inputSchema: { type: 'object', properties: {} },351tags: [],352source: undefined,353};354355function registerTaskCompleteTool(): void {356const toolsService = instantiationService.invokeFunction(acc => acc.get(IToolsService)) as TestToolsService;357toolsService.addTestToolOverride(mockTaskCompleteTool, { invoke: () => ({ content: [] }) });358}359360it('should add task_complete when missing in autopilot mode', () => {361registerTaskCompleteTool();362const loop = createLoop('autopilot');363const tools: LanguageModelToolInformation[] = [364{ name: 'read_file', description: '', inputSchema: undefined, tags: [], source: undefined },365];366const result = loop.testEnsureAutopilotTools(tools);367expect(result).toHaveLength(2);368expect(result.some(t => t.name === 'task_complete')).toBe(true);369});370371it('should not duplicate task_complete when already present', () => {372registerTaskCompleteTool();373const loop = createLoop('autopilot');374const tools: LanguageModelToolInformation[] = [mockTaskCompleteTool];375const result = loop.testEnsureAutopilotTools(tools);376expect(result).toHaveLength(1);377});378379it('should not add task_complete in non-autopilot mode', () => {380registerTaskCompleteTool();381const loop = createLoop('autoApprove');382const tools: LanguageModelToolInformation[] = [];383const result = loop.testEnsureAutopilotTools(tools);384expect(result).toHaveLength(0);385});386387it('should return tools unchanged when not in autopilot mode', () => {388const loop = createLoop(undefined);389const tools: LanguageModelToolInformation[] = [390{ name: 'read_file', description: '', inputSchema: undefined, tags: [], source: undefined },391];392const result = loop.testEnsureAutopilotTools(tools);393expect(result).toBe(tools);394});395});396});397398399