Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/intents/test/node/validateToolMessages.spec.ts
13405 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { Raw } from '@vscode/prompt-tsx';
7
import { describe, expect, it } from 'vitest';
8
import { ToolCallingLoop } from '../../node/toolCallingLoop';
9
10
function textPart(text: string): Raw.ChatCompletionContentPartText {
11
return { type: Raw.ChatCompletionContentPartKind.Text, text };
12
}
13
14
function assistantMsg(text: string, toolCalls?: Raw.ChatMessageToolCall[]): Raw.AssistantChatMessage {
15
return {
16
role: Raw.ChatRole.Assistant,
17
content: [textPart(text)],
18
toolCalls,
19
};
20
}
21
22
function toolMsg(toolCallId: string, text: string): Raw.ToolChatMessage {
23
return {
24
role: Raw.ChatRole.Tool,
25
toolCallId,
26
content: [textPart(text)],
27
};
28
}
29
30
function userMsg(text: string): Raw.UserChatMessage {
31
return {
32
role: Raw.ChatRole.User,
33
content: [textPart(text)],
34
};
35
}
36
37
function tc(id: string, name: string, args = '{}'): Raw.ChatMessageToolCall {
38
return { id, type: 'function', function: { name, arguments: args } };
39
}
40
41
describe('validateToolMessagesCore', () => {
42
const geminiOpts = { stripOrphanedToolCalls: true };
43
44
it('passes through valid messages unchanged', () => {
45
const messages: Raw.ChatMessage[] = [
46
userMsg('hello'),
47
assistantMsg('calling tools', [tc('1', 'readFile'), tc('2', 'listDir')]),
48
toolMsg('1', 'file contents'),
49
toolMsg('2', 'dir listing'),
50
assistantMsg('done'),
51
];
52
53
const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
54
expect(filterReasons).toHaveLength(0);
55
expect(result).toHaveLength(5);
56
});
57
58
it('removes orphaned tool result messages (no preceding assistant)', () => {
59
const messages: Raw.ChatMessage[] = [
60
userMsg('hello'),
61
toolMsg('1', 'orphaned result'),
62
];
63
64
const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);
65
expect(result).toHaveLength(1);
66
expect(result[0].role).toBe(Raw.ChatRole.User);
67
expect(filterReasons).toContain('noPreviousAssistantMessage');
68
});
69
70
it('removes tool result messages when assistant had no tool_calls', () => {
71
const messages: Raw.ChatMessage[] = [
72
assistantMsg('no tools called'),
73
toolMsg('1', 'orphaned result'),
74
];
75
76
const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);
77
expect(result).toHaveLength(1);
78
expect(result[0].role).toBe(Raw.ChatRole.Assistant);
79
expect(filterReasons).toContain('noToolCalls');
80
});
81
82
it('removes tool result messages with non-matching tool_call_id', () => {
83
const messages: Raw.ChatMessage[] = [
84
assistantMsg('calling', [tc('1', 'readFile')]),
85
toolMsg('1', 'result'),
86
toolMsg('999', 'wrong id'),
87
];
88
89
const { messages: result } = ToolCallingLoop.validateToolMessagesCore(messages);
90
expect(result).toHaveLength(2);
91
expect(result[0].role).toBe(Raw.ChatRole.Assistant);
92
expect(result[1].role).toBe(Raw.ChatRole.Tool);
93
});
94
95
it('strips orphaned tool_calls from assistant message when results are missing', () => {
96
const messages: Raw.ChatMessage[] = [
97
assistantMsg('calling 3 tools', [tc('1', 'readFile'), tc('2', 'listDir'), tc('3', 'grep')]),
98
toolMsg('1', 'result 1'),
99
// tool results for '2' and '3' are missing
100
];
101
102
const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
103
expect(result).toHaveLength(2);
104
const asstMsg = result[0] as Raw.AssistantChatMessage;
105
expect(asstMsg.toolCalls).toHaveLength(1);
106
expect(asstMsg.toolCalls![0].id).toBe('1');
107
expect(filterReasons).toHaveLength(0);
108
expect(strippedToolCallCount).toBe(2);
109
});
110
111
it('clears toolCalls entirely when no results exist for any tool_call', () => {
112
const messages: Raw.ChatMessage[] = [
113
assistantMsg('calling', [tc('1', 'readFile'), tc('2', 'listDir')]),
114
userMsg('next message'),
115
];
116
117
const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
118
const asstMsg = result[0] as Raw.AssistantChatMessage;
119
expect(asstMsg.toolCalls).toBeUndefined();
120
expect(filterReasons).toHaveLength(0);
121
expect(strippedToolCallCount).toBe(2);
122
});
123
124
it('handles multiple assistant turns with mixed valid/orphaned tool_calls', () => {
125
const messages: Raw.ChatMessage[] = [
126
// First round: all matched
127
assistantMsg('round 1', [tc('1', 'readFile'), tc('2', 'listDir')]),
128
toolMsg('1', 'result 1'),
129
toolMsg('2', 'result 2'),
130
// Second round: one orphaned
131
assistantMsg('round 2', [tc('3', 'grep'), tc('4', 'writeFile')]),
132
toolMsg('3', 'result 3'),
133
// '4' is missing
134
];
135
136
const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
137
expect(result).toHaveLength(5);
138
139
const round1Asst = result[0] as Raw.AssistantChatMessage;
140
expect(round1Asst.toolCalls).toHaveLength(2);
141
142
const round2Asst = result[3] as Raw.AssistantChatMessage;
143
expect(round2Asst.toolCalls).toHaveLength(1);
144
expect(round2Asst.toolCalls![0].id).toBe('3');
145
expect(filterReasons).toHaveLength(0);
146
expect(strippedToolCallCount).toBe(1);
147
});
148
149
it('does not strip tool_calls when assistant has no toolCalls', () => {
150
const messages: Raw.ChatMessage[] = [
151
assistantMsg('just text, no tools'),
152
userMsg('ok'),
153
];
154
155
const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);
156
expect(result).toHaveLength(2);
157
expect(filterReasons).toHaveLength(0);
158
});
159
160
it('handles the boundary between two assistant messages correctly', () => {
161
// Ensure tool results are only matched to the immediately preceding assistant
162
const messages: Raw.ChatMessage[] = [
163
assistantMsg('first', [tc('1', 'readFile')]),
164
toolMsg('1', 'result for first'),
165
assistantMsg('second', [tc('2', 'listDir')]),
166
toolMsg('2', 'result for second'),
167
];
168
169
const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
170
expect(result).toHaveLength(4);
171
expect(filterReasons).toHaveLength(0);
172
});
173
174
it('strips tool_calls when the last assistant message has unresolved calls', () => {
175
// This simulates the maxToolCallsExceeded scenario
176
const messages: Raw.ChatMessage[] = [
177
userMsg('do something'),
178
assistantMsg('round 1', [tc('1', 'readFile')]),
179
toolMsg('1', 'result'),
180
assistantMsg('round 2 — exceeded', [tc('2', 'listDir'), tc('3', 'grep')]),
181
// No tool results — tool call limit exceeded
182
];
183
184
const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
185
expect(result).toHaveLength(4);
186
const lastAsst = result[3] as Raw.AssistantChatMessage;
187
expect(lastAsst.toolCalls).toBeUndefined();
188
expect(filterReasons).toHaveLength(0);
189
expect(strippedToolCallCount).toBe(2);
190
});
191
192
it('does not strip orphaned tool_calls when stripOrphanedToolCalls is not set', () => {
193
// For non-Gemini models, orphaned tool_calls should be left as-is
194
const messages: Raw.ChatMessage[] = [
195
assistantMsg('calling', [tc('1', 'readFile'), tc('2', 'listDir')]),
196
toolMsg('1', 'result 1'),
197
// '2' is missing
198
];
199
200
const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);
201
expect(result).toHaveLength(2);
202
const asstMsg = result[0] as Raw.AssistantChatMessage;
203
// tool_calls preserved — no stripping for non-Gemini models
204
expect(asstMsg.toolCalls).toHaveLength(2);
205
expect(filterReasons).toHaveLength(0);
206
});
207
208
it('matches tool results across an intervening user message', () => {
209
// Regression: Assistant(toolCalls) → User → Tool should still pair correctly
210
const messages: Raw.ChatMessage[] = [
211
assistantMsg('calling', [tc('1', 'readFile')]),
212
userMsg('some user message'),
213
toolMsg('1', 'result'),
214
];
215
216
// First-pass keeps the tool result (previousAssistantMessage is not reset by user messages)
217
const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
218
expect(result).toHaveLength(3);
219
// Second-pass should NOT strip the tool_call — the result exists after the user message
220
const asstMsg = result[0] as Raw.AssistantChatMessage;
221
expect(asstMsg.toolCalls).toHaveLength(1);
222
expect(asstMsg.toolCalls![0].id).toBe('1');
223
expect(filterReasons).toHaveLength(0);
224
});
225
226
it('strips orphaned tool_calls when tool result is separated by a second assistant message', () => {
227
// Assistant(toolCalls) → User → Assistant → Tool should NOT pair across the second assistant
228
const messages: Raw.ChatMessage[] = [
229
assistantMsg('first', [tc('1', 'readFile')]),
230
userMsg('some user message'),
231
assistantMsg('second', [tc('2', 'listDir')]),
232
toolMsg('2', 'result for second'),
233
];
234
235
const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
236
expect(result).toHaveLength(4);
237
// First assistant's tool_call '1' has no matching result — should be stripped
238
const firstAsst = result[0] as Raw.AssistantChatMessage;
239
expect(firstAsst.toolCalls).toBeUndefined();
240
// Second assistant's tool_call '2' is properly matched
241
const secondAsst = result[2] as Raw.AssistantChatMessage;
242
expect(secondAsst.toolCalls).toHaveLength(1);
243
expect(secondAsst.toolCalls![0].id).toBe('2');
244
expect(filterReasons).toHaveLength(0);
245
expect(strippedToolCallCount).toBe(1);
246
});
247
248
it('correctly matches tool results with empty-string toolCallId', () => {
249
// Edge case: empty string is a valid tool call ID and should not be treated as falsy
250
const messages: Raw.ChatMessage[] = [
251
assistantMsg('calling', [tc('', 'readFile')]),
252
toolMsg('', 'result'),
253
];
254
255
const { messages: result, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
256
expect(result).toHaveLength(2);
257
const asstMsg = result[0] as Raw.AssistantChatMessage;
258
expect(asstMsg.toolCalls).toHaveLength(1);
259
expect(asstMsg.toolCalls![0].id).toBe('');
260
expect(strippedToolCallCount).toBe(0);
261
});
262
});
263
264