Path: blob/main/extensions/copilot/src/extension/intents/test/node/validateToolMessages.spec.ts
13405 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Raw } from '@vscode/prompt-tsx';6import { describe, expect, it } from 'vitest';7import { ToolCallingLoop } from '../../node/toolCallingLoop';89function textPart(text: string): Raw.ChatCompletionContentPartText {10return { type: Raw.ChatCompletionContentPartKind.Text, text };11}1213function assistantMsg(text: string, toolCalls?: Raw.ChatMessageToolCall[]): Raw.AssistantChatMessage {14return {15role: Raw.ChatRole.Assistant,16content: [textPart(text)],17toolCalls,18};19}2021function toolMsg(toolCallId: string, text: string): Raw.ToolChatMessage {22return {23role: Raw.ChatRole.Tool,24toolCallId,25content: [textPart(text)],26};27}2829function userMsg(text: string): Raw.UserChatMessage {30return {31role: Raw.ChatRole.User,32content: [textPart(text)],33};34}3536function tc(id: string, name: string, args = '{}'): Raw.ChatMessageToolCall {37return { id, type: 'function', function: { name, arguments: args } };38}3940describe('validateToolMessagesCore', () => {41const geminiOpts = { stripOrphanedToolCalls: true };4243it('passes through valid messages unchanged', () => {44const messages: Raw.ChatMessage[] = [45userMsg('hello'),46assistantMsg('calling tools', [tc('1', 'readFile'), tc('2', 'listDir')]),47toolMsg('1', 'file contents'),48toolMsg('2', 'dir listing'),49assistantMsg('done'),50];5152const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);53expect(filterReasons).toHaveLength(0);54expect(result).toHaveLength(5);55});5657it('removes orphaned tool result messages (no preceding assistant)', () => {58const messages: Raw.ChatMessage[] = [59userMsg('hello'),60toolMsg('1', 'orphaned result'),61];6263const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);64expect(result).toHaveLength(1);65expect(result[0].role).toBe(Raw.ChatRole.User);66expect(filterReasons).toContain('noPreviousAssistantMessage');67});6869it('removes tool result messages when assistant had no tool_calls', () => {70const messages: Raw.ChatMessage[] = [71assistantMsg('no tools called'),72toolMsg('1', 'orphaned result'),73];7475const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);76expect(result).toHaveLength(1);77expect(result[0].role).toBe(Raw.ChatRole.Assistant);78expect(filterReasons).toContain('noToolCalls');79});8081it('removes tool result messages with non-matching tool_call_id', () => {82const messages: Raw.ChatMessage[] = [83assistantMsg('calling', [tc('1', 'readFile')]),84toolMsg('1', 'result'),85toolMsg('999', 'wrong id'),86];8788const { messages: result } = ToolCallingLoop.validateToolMessagesCore(messages);89expect(result).toHaveLength(2);90expect(result[0].role).toBe(Raw.ChatRole.Assistant);91expect(result[1].role).toBe(Raw.ChatRole.Tool);92});9394it('strips orphaned tool_calls from assistant message when results are missing', () => {95const messages: Raw.ChatMessage[] = [96assistantMsg('calling 3 tools', [tc('1', 'readFile'), tc('2', 'listDir'), tc('3', 'grep')]),97toolMsg('1', 'result 1'),98// tool results for '2' and '3' are missing99];100101const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);102expect(result).toHaveLength(2);103const asstMsg = result[0] as Raw.AssistantChatMessage;104expect(asstMsg.toolCalls).toHaveLength(1);105expect(asstMsg.toolCalls![0].id).toBe('1');106expect(filterReasons).toHaveLength(0);107expect(strippedToolCallCount).toBe(2);108});109110it('clears toolCalls entirely when no results exist for any tool_call', () => {111const messages: Raw.ChatMessage[] = [112assistantMsg('calling', [tc('1', 'readFile'), tc('2', 'listDir')]),113userMsg('next message'),114];115116const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);117const asstMsg = result[0] as Raw.AssistantChatMessage;118expect(asstMsg.toolCalls).toBeUndefined();119expect(filterReasons).toHaveLength(0);120expect(strippedToolCallCount).toBe(2);121});122123it('handles multiple assistant turns with mixed valid/orphaned tool_calls', () => {124const messages: Raw.ChatMessage[] = [125// First round: all matched126assistantMsg('round 1', [tc('1', 'readFile'), tc('2', 'listDir')]),127toolMsg('1', 'result 1'),128toolMsg('2', 'result 2'),129// Second round: one orphaned130assistantMsg('round 2', [tc('3', 'grep'), tc('4', 'writeFile')]),131toolMsg('3', 'result 3'),132// '4' is missing133];134135const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);136expect(result).toHaveLength(5);137138const round1Asst = result[0] as Raw.AssistantChatMessage;139expect(round1Asst.toolCalls).toHaveLength(2);140141const round2Asst = result[3] as Raw.AssistantChatMessage;142expect(round2Asst.toolCalls).toHaveLength(1);143expect(round2Asst.toolCalls![0].id).toBe('3');144expect(filterReasons).toHaveLength(0);145expect(strippedToolCallCount).toBe(1);146});147148it('does not strip tool_calls when assistant has no toolCalls', () => {149const messages: Raw.ChatMessage[] = [150assistantMsg('just text, no tools'),151userMsg('ok'),152];153154const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);155expect(result).toHaveLength(2);156expect(filterReasons).toHaveLength(0);157});158159it('handles the boundary between two assistant messages correctly', () => {160// Ensure tool results are only matched to the immediately preceding assistant161const messages: Raw.ChatMessage[] = [162assistantMsg('first', [tc('1', 'readFile')]),163toolMsg('1', 'result for first'),164assistantMsg('second', [tc('2', 'listDir')]),165toolMsg('2', 'result for second'),166];167168const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);169expect(result).toHaveLength(4);170expect(filterReasons).toHaveLength(0);171});172173it('strips tool_calls when the last assistant message has unresolved calls', () => {174// This simulates the maxToolCallsExceeded scenario175const messages: Raw.ChatMessage[] = [176userMsg('do something'),177assistantMsg('round 1', [tc('1', 'readFile')]),178toolMsg('1', 'result'),179assistantMsg('round 2 — exceeded', [tc('2', 'listDir'), tc('3', 'grep')]),180// No tool results — tool call limit exceeded181];182183const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);184expect(result).toHaveLength(4);185const lastAsst = result[3] as Raw.AssistantChatMessage;186expect(lastAsst.toolCalls).toBeUndefined();187expect(filterReasons).toHaveLength(0);188expect(strippedToolCallCount).toBe(2);189});190191it('does not strip orphaned tool_calls when stripOrphanedToolCalls is not set', () => {192// For non-Gemini models, orphaned tool_calls should be left as-is193const messages: Raw.ChatMessage[] = [194assistantMsg('calling', [tc('1', 'readFile'), tc('2', 'listDir')]),195toolMsg('1', 'result 1'),196// '2' is missing197];198199const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);200expect(result).toHaveLength(2);201const asstMsg = result[0] as Raw.AssistantChatMessage;202// tool_calls preserved — no stripping for non-Gemini models203expect(asstMsg.toolCalls).toHaveLength(2);204expect(filterReasons).toHaveLength(0);205});206207it('matches tool results across an intervening user message', () => {208// Regression: Assistant(toolCalls) → User → Tool should still pair correctly209const messages: Raw.ChatMessage[] = [210assistantMsg('calling', [tc('1', 'readFile')]),211userMsg('some user message'),212toolMsg('1', 'result'),213];214215// First-pass keeps the tool result (previousAssistantMessage is not reset by user messages)216const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);217expect(result).toHaveLength(3);218// Second-pass should NOT strip the tool_call — the result exists after the user message219const asstMsg = result[0] as Raw.AssistantChatMessage;220expect(asstMsg.toolCalls).toHaveLength(1);221expect(asstMsg.toolCalls![0].id).toBe('1');222expect(filterReasons).toHaveLength(0);223});224225it('strips orphaned tool_calls when tool result is separated by a second assistant message', () => {226// Assistant(toolCalls) → User → Assistant → Tool should NOT pair across the second assistant227const messages: Raw.ChatMessage[] = [228assistantMsg('first', [tc('1', 'readFile')]),229userMsg('some user message'),230assistantMsg('second', [tc('2', 'listDir')]),231toolMsg('2', 'result for second'),232];233234const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);235expect(result).toHaveLength(4);236// First assistant's tool_call '1' has no matching result — should be stripped237const firstAsst = result[0] as Raw.AssistantChatMessage;238expect(firstAsst.toolCalls).toBeUndefined();239// Second assistant's tool_call '2' is properly matched240const secondAsst = result[2] as Raw.AssistantChatMessage;241expect(secondAsst.toolCalls).toHaveLength(1);242expect(secondAsst.toolCalls![0].id).toBe('2');243expect(filterReasons).toHaveLength(0);244expect(strippedToolCallCount).toBe(1);245});246247it('correctly matches tool results with empty-string toolCallId', () => {248// Edge case: empty string is a valid tool call ID and should not be treated as falsy249const messages: Raw.ChatMessage[] = [250assistantMsg('calling', [tc('', 'readFile')]),251toolMsg('', 'result'),252];253254const { messages: result, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);255expect(result).toHaveLength(2);256const asstMsg = result[0] as Raw.AssistantChatMessage;257expect(asstMsg.toolCalls).toHaveLength(1);258expect(asstMsg.toolCalls![0].id).toBe('');259expect(strippedToolCallCount).toBe(0);260});261});262263264