CoCalc -- validateToolMessages.spec.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/intents/test/node/validateToolMessages.spec.ts
¹³⁴⁰⁵ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { Raw } from '@vscode/prompt-tsx';
7
import { describe, expect, it } from 'vitest';
8
import { ToolCallingLoop } from '../../node/toolCallingLoop';
9

10
function textPart(text: string): Raw.ChatCompletionContentPartText {
11
	return { type: Raw.ChatCompletionContentPartKind.Text, text };
12
}
13

14
function assistantMsg(text: string, toolCalls?: Raw.ChatMessageToolCall[]): Raw.AssistantChatMessage {
15
	return {
16
		role: Raw.ChatRole.Assistant,
17
		content: [textPart(text)],
18
		toolCalls,
19
	};
20
}
21

22
function toolMsg(toolCallId: string, text: string): Raw.ToolChatMessage {
23
	return {
24
		role: Raw.ChatRole.Tool,
25
		toolCallId,
26
		content: [textPart(text)],
27
	};
28
}
29

30
function userMsg(text: string): Raw.UserChatMessage {
31
	return {
32
		role: Raw.ChatRole.User,
33
		content: [textPart(text)],
34
	};
35
}
36

37
function tc(id: string, name: string, args = '{}'): Raw.ChatMessageToolCall {
38
	return { id, type: 'function', function: { name, arguments: args } };
39
}
40

41
describe('validateToolMessagesCore', () => {
42
	const geminiOpts = { stripOrphanedToolCalls: true };
43

44
	it('passes through valid messages unchanged', () => {
45
		const messages: Raw.ChatMessage[] = [
46
			userMsg('hello'),
47
			assistantMsg('calling tools', [tc('1', 'readFile'), tc('2', 'listDir')]),
48
			toolMsg('1', 'file contents'),
49
			toolMsg('2', 'dir listing'),
50
			assistantMsg('done'),
51
		];
52

53
		const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
54
		expect(filterReasons).toHaveLength(0);
55
		expect(result).toHaveLength(5);
56
	});
57

58
	it('removes orphaned tool result messages (no preceding assistant)', () => {
59
		const messages: Raw.ChatMessage[] = [
60
			userMsg('hello'),
61
			toolMsg('1', 'orphaned result'),
62
		];
63

64
		const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);
65
		expect(result).toHaveLength(1);
66
		expect(result[0].role).toBe(Raw.ChatRole.User);
67
		expect(filterReasons).toContain('noPreviousAssistantMessage');
68
	});
69

70
	it('removes tool result messages when assistant had no tool_calls', () => {
71
		const messages: Raw.ChatMessage[] = [
72
			assistantMsg('no tools called'),
73
			toolMsg('1', 'orphaned result'),
74
		];
75

76
		const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);
77
		expect(result).toHaveLength(1);
78
		expect(result[0].role).toBe(Raw.ChatRole.Assistant);
79
		expect(filterReasons).toContain('noToolCalls');
80
	});
81

82
	it('removes tool result messages with non-matching tool_call_id', () => {
83
		const messages: Raw.ChatMessage[] = [
84
			assistantMsg('calling', [tc('1', 'readFile')]),
85
			toolMsg('1', 'result'),
86
			toolMsg('999', 'wrong id'),
87
		];
88

89
		const { messages: result } = ToolCallingLoop.validateToolMessagesCore(messages);
90
		expect(result).toHaveLength(2);
91
		expect(result[0].role).toBe(Raw.ChatRole.Assistant);
92
		expect(result[1].role).toBe(Raw.ChatRole.Tool);
93
	});
94

95
	it('strips orphaned tool_calls from assistant message when results are missing', () => {
96
		const messages: Raw.ChatMessage[] = [
97
			assistantMsg('calling 3 tools', [tc('1', 'readFile'), tc('2', 'listDir'), tc('3', 'grep')]),
98
			toolMsg('1', 'result 1'),
99
			// tool results for '2' and '3' are missing
100
		];
101

102
		const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
103
		expect(result).toHaveLength(2);
104
		const asstMsg = result[0] as Raw.AssistantChatMessage;
105
		expect(asstMsg.toolCalls).toHaveLength(1);
106
		expect(asstMsg.toolCalls![0].id).toBe('1');
107
		expect(filterReasons).toHaveLength(0);
108
		expect(strippedToolCallCount).toBe(2);
109
	});
110

111
	it('clears toolCalls entirely when no results exist for any tool_call', () => {
112
		const messages: Raw.ChatMessage[] = [
113
			assistantMsg('calling', [tc('1', 'readFile'), tc('2', 'listDir')]),
114
			userMsg('next message'),
115
		];
116

117
		const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
118
		const asstMsg = result[0] as Raw.AssistantChatMessage;
119
		expect(asstMsg.toolCalls).toBeUndefined();
120
		expect(filterReasons).toHaveLength(0);
121
		expect(strippedToolCallCount).toBe(2);
122
	});
123

124
	it('handles multiple assistant turns with mixed valid/orphaned tool_calls', () => {
125
		const messages: Raw.ChatMessage[] = [
126
			// First round: all matched
127
			assistantMsg('round 1', [tc('1', 'readFile'), tc('2', 'listDir')]),
128
			toolMsg('1', 'result 1'),
129
			toolMsg('2', 'result 2'),
130
			// Second round: one orphaned
131
			assistantMsg('round 2', [tc('3', 'grep'), tc('4', 'writeFile')]),
132
			toolMsg('3', 'result 3'),
133
			// '4' is missing
134
		];
135

136
		const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
137
		expect(result).toHaveLength(5);
138

139
		const round1Asst = result[0] as Raw.AssistantChatMessage;
140
		expect(round1Asst.toolCalls).toHaveLength(2);
141

142
		const round2Asst = result[3] as Raw.AssistantChatMessage;
143
		expect(round2Asst.toolCalls).toHaveLength(1);
144
		expect(round2Asst.toolCalls![0].id).toBe('3');
145
		expect(filterReasons).toHaveLength(0);
146
		expect(strippedToolCallCount).toBe(1);
147
	});
148

149
	it('does not strip tool_calls when assistant has no toolCalls', () => {
150
		const messages: Raw.ChatMessage[] = [
151
			assistantMsg('just text, no tools'),
152
			userMsg('ok'),
153
		];
154

155
		const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);
156
		expect(result).toHaveLength(2);
157
		expect(filterReasons).toHaveLength(0);
158
	});
159

160
	it('handles the boundary between two assistant messages correctly', () => {
161
		// Ensure tool results are only matched to the immediately preceding assistant
162
		const messages: Raw.ChatMessage[] = [
163
			assistantMsg('first', [tc('1', 'readFile')]),
164
			toolMsg('1', 'result for first'),
165
			assistantMsg('second', [tc('2', 'listDir')]),
166
			toolMsg('2', 'result for second'),
167
		];
168

169
		const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
170
		expect(result).toHaveLength(4);
171
		expect(filterReasons).toHaveLength(0);
172
	});
173

174
	it('strips tool_calls when the last assistant message has unresolved calls', () => {
175
		// This simulates the maxToolCallsExceeded scenario
176
		const messages: Raw.ChatMessage[] = [
177
			userMsg('do something'),
178
			assistantMsg('round 1', [tc('1', 'readFile')]),
179
			toolMsg('1', 'result'),
180
			assistantMsg('round 2 — exceeded', [tc('2', 'listDir'), tc('3', 'grep')]),
181
			// No tool results — tool call limit exceeded
182
		];
183

184
		const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
185
		expect(result).toHaveLength(4);
186
		const lastAsst = result[3] as Raw.AssistantChatMessage;
187
		expect(lastAsst.toolCalls).toBeUndefined();
188
		expect(filterReasons).toHaveLength(0);
189
		expect(strippedToolCallCount).toBe(2);
190
	});
191

192
	it('does not strip orphaned tool_calls when stripOrphanedToolCalls is not set', () => {
193
		// For non-Gemini models, orphaned tool_calls should be left as-is
194
		const messages: Raw.ChatMessage[] = [
195
			assistantMsg('calling', [tc('1', 'readFile'), tc('2', 'listDir')]),
196
			toolMsg('1', 'result 1'),
197
			// '2' is missing
198
		];
199

200
		const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages);
201
		expect(result).toHaveLength(2);
202
		const asstMsg = result[0] as Raw.AssistantChatMessage;
203
		// tool_calls preserved — no stripping for non-Gemini models
204
		expect(asstMsg.toolCalls).toHaveLength(2);
205
		expect(filterReasons).toHaveLength(0);
206
	});
207

208
	it('matches tool results across an intervening user message', () => {
209
		// Regression: Assistant(toolCalls) → User → Tool should still pair correctly
210
		const messages: Raw.ChatMessage[] = [
211
			assistantMsg('calling', [tc('1', 'readFile')]),
212
			userMsg('some user message'),
213
			toolMsg('1', 'result'),
214
		];
215

216
		// First-pass keeps the tool result (previousAssistantMessage is not reset by user messages)
217
		const { messages: result, filterReasons } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
218
		expect(result).toHaveLength(3);
219
		// Second-pass should NOT strip the tool_call — the result exists after the user message
220
		const asstMsg = result[0] as Raw.AssistantChatMessage;
221
		expect(asstMsg.toolCalls).toHaveLength(1);
222
		expect(asstMsg.toolCalls![0].id).toBe('1');
223
		expect(filterReasons).toHaveLength(0);
224
	});
225

226
	it('strips orphaned tool_calls when tool result is separated by a second assistant message', () => {
227
		// Assistant(toolCalls) → User → Assistant → Tool should NOT pair across the second assistant
228
		const messages: Raw.ChatMessage[] = [
229
			assistantMsg('first', [tc('1', 'readFile')]),
230
			userMsg('some user message'),
231
			assistantMsg('second', [tc('2', 'listDir')]),
232
			toolMsg('2', 'result for second'),
233
		];
234

235
		const { messages: result, filterReasons, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
236
		expect(result).toHaveLength(4);
237
		// First assistant's tool_call '1' has no matching result — should be stripped
238
		const firstAsst = result[0] as Raw.AssistantChatMessage;
239
		expect(firstAsst.toolCalls).toBeUndefined();
240
		// Second assistant's tool_call '2' is properly matched
241
		const secondAsst = result[2] as Raw.AssistantChatMessage;
242
		expect(secondAsst.toolCalls).toHaveLength(1);
243
		expect(secondAsst.toolCalls![0].id).toBe('2');
244
		expect(filterReasons).toHaveLength(0);
245
		expect(strippedToolCallCount).toBe(1);
246
	});
247

248
	it('correctly matches tool results with empty-string toolCallId', () => {
249
		// Edge case: empty string is a valid tool call ID and should not be treated as falsy
250
		const messages: Raw.ChatMessage[] = [
251
			assistantMsg('calling', [tc('', 'readFile')]),
252
			toolMsg('', 'result'),
253
		];
254

255
		const { messages: result, strippedToolCallCount } = ToolCallingLoop.validateToolMessagesCore(messages, geminiOpts);
256
		expect(result).toHaveLength(2);
257
		const asstMsg = result[0] as Raw.AssistantChatMessage;
258
		expect(asstMsg.toolCalls).toHaveLength(1);
259
		expect(asstMsg.toolCalls![0].id).toBe('');
260
		expect(strippedToolCallCount).toBe(0);
261
	});
262
});
263

264
Product

Resources

Company