CoCalc -- responsesApiToolSearch.spec.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/test/responsesApiToolSearch.spec.ts
¹³⁴⁰⁵ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { Raw } from '@vscode/prompt-tsx';
7
import { beforeEach, describe, expect, it } from 'vitest';
8
import { DisposableStore } from '../../../../util/vs/base/common/lifecycle';
9
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
10
import { ChatLocation } from '../../../chat/common/commonTypes';
11
import { ConfigKey, IConfigurationService } from '../../../configuration/common/configurationService';
12
import { InMemoryConfigurationService } from '../../../configuration/test/common/inMemoryConfigurationService';
13
import { IResponseDelta, OpenAiFunctionTool } from '../../../networking/common/fetch';
14
import { IChatEndpoint, ICreateEndpointBodyOptions } from '../../../networking/common/networking';
15
import { IToolDeferralService } from '../../../networking/common/toolDeferralService';
16
import { TelemetryData } from '../../../telemetry/common/telemetryData';
17
import { SpyingTelemetryService } from '../../../telemetry/node/spyingTelemetryService';
18
import { createPlatformServices } from '../../../test/node/services';
19
import { createResponsesRequestBody, OpenAIResponsesProcessor } from '../responsesApi';
20

21
function createMockEndpoint(model: string): IChatEndpoint {
22
	return {
23
		model,
24
		family: model,
25
		modelProvider: 'openai',
26
		supportsToolCalls: true,
27
		supportsVision: false,
28
		supportsPrediction: false,
29
		showInModelPicker: true,
30
		isFallback: false,
31
		maxOutputTokens: 4096,
32
		modelMaxPromptTokens: 128000,
33
		urlOrRequestMetadata: 'https://test',
34
		name: model,
35
		version: '1',
36
		tokenizer: 'cl100k_base' as any,
37
		acquireTokenizer: () => { throw new Error('Not implemented'); },
38
		processResponseFromChatEndpoint: () => { throw new Error('Not implemented'); },
39
		makeChatRequest: () => { throw new Error('Not implemented'); },
40
		makeChatRequest2: () => { throw new Error('Not implemented'); },
41
		createRequestBody: () => { throw new Error('Not implemented'); },
42
		cloneWithTokenOverride() { return this; },
43
	} as unknown as IChatEndpoint;
44
}
45

46
function createMockOptions(overrides: Partial<ICreateEndpointBodyOptions> = {}): ICreateEndpointBodyOptions {
47
	return {
48
		debugName: 'test',
49
		messages: [{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Hello' }] }],
50
		location: ChatLocation.Agent,
51
		finishedCb: undefined,
52
		requestId: 'test-req-1',
53
		postOptions: { max_tokens: 4096 },
54
		requestOptions: {
55
			tools: [
56
				{ type: 'function', function: { name: 'read_file', description: 'Read a file', parameters: { type: 'object', properties: { path: { type: 'string' } }, required: ['path'] } } },
57
				{ type: 'function', function: { name: 'grep_search', description: 'Search for text', parameters: { type: 'object', properties: { query: { type: 'string' } }, required: ['query'] } } },
58
				{ type: 'function', function: { name: 'some_mcp_tool', description: 'An MCP tool', parameters: { type: 'object', properties: { input: { type: 'string' } }, required: ['input'] } } },
59
				{ type: 'function', function: { name: 'another_deferred_tool', description: 'Another tool', parameters: { type: 'object', properties: {} } } },
60
				{ type: 'function', function: { name: 'tool_search', description: 'Search tools', parameters: { type: 'object', properties: { query: { type: 'string' } }, required: ['query'] } } },
61
			]
62
		},
63
		...overrides,
64
	} as ICreateEndpointBodyOptions;
65
}
66

67
function createFunctionTool(name: string, description: string, properties: Record<string, object>, required: string[] = []): OpenAiFunctionTool {
68
	return {
69
		type: 'function',
70
		function: {
71
			name,
72
			description,
73
			parameters: { type: 'object', properties, ...(required.length ? { required } : {}) }
74
		}
75
	};
76
}
77

78
describe('createResponsesRequestBody tools', () => {
79
	let disposables: DisposableStore;
80
	let services: ReturnType<typeof createPlatformServices>;
81
	let accessor: ReturnType<ReturnType<typeof createPlatformServices>['createTestingAccessor']>;
82

83
	beforeEach(() => {
84
		disposables = new DisposableStore();
85
		services = createPlatformServices(disposables);
86
		const coreNonDeferred = new Set(['read_file', 'list_dir', 'grep_search', 'semantic_search', 'file_search',
87
			'replace_string_in_file', 'create_file', 'run_in_terminal', 'get_terminal_output',
88
			'get_errors', 'manage_todo_list', 'runSubagent', 'search_subagent', 'execution_subagent',
89
			'runTests', 'tool_search', 'view_image', 'fetch_webpage']);
90
		services.define(IToolDeferralService, { _serviceBrand: undefined, isNonDeferredTool: (name: string) => coreNonDeferred.has(name) });
91
		accessor = services.createTestingAccessor();
92
	});
93

94
	function createToolSearchScenario(messages: Raw.ChatMessage[]) {
95
		const endpoint = createMockEndpoint('gpt-5.4');
96
		const configService = accessor.get(IConfigurationService) as InMemoryConfigurationService;
97
		configService.setConfig(ConfigKey.ResponsesApiToolSearchEnabled, true);
98

99
		const options = createMockOptions({
100
			messages,
101
			requestOptions: {
102
				tools: [
103
					createFunctionTool('file_search', 'Find files', { query: { type: 'string' } }, ['query']),
104
					createFunctionTool('read_file', 'Read a file', { path: { type: 'string' } }, ['path']),
105
					createFunctionTool('some_mcp_tool', 'An MCP tool', { input: { type: 'string' } }, ['input']),
106
					createFunctionTool('tool_search', 'Search tools', { query: { type: 'string' } }, ['query']),
107
				]
108
			}
109
		});
110

111
		return accessor.get(IInstantiationService).invokeFunction(
112
			createResponsesRequestBody, options, endpoint.model, endpoint
113
		);
114
	}
115

116
	it('passes tools through without defer_loading when tool search disabled', () => {
117
		const endpoint = createMockEndpoint('gpt-5.4');
118
		const configService = accessor.get(IConfigurationService) as InMemoryConfigurationService;
119
		configService.setConfig(ConfigKey.ResponsesApiToolSearchEnabled, false);
120

121
		const body = accessor.get(IInstantiationService).invokeFunction(
122
			createResponsesRequestBody, createMockOptions(), endpoint.model, endpoint
123
		);
124

125
		const tools = body.tools as any[];
126
		expect(tools).toBeDefined();
127
		expect(tools.find(t => t.type === 'tool_search')).toBeUndefined();
128
		expect(tools.every(t => !t.defer_loading)).toBe(true);
129
	});
130

131
	it('adds client tool_search and defer_loading when enabled', () => {
132
		const endpoint = createMockEndpoint('gpt-5.4');
133
		const configService = accessor.get(IConfigurationService) as InMemoryConfigurationService;
134
		configService.setConfig(ConfigKey.ResponsesApiToolSearchEnabled, true);
135

136
		const body = accessor.get(IInstantiationService).invokeFunction(
137
			createResponsesRequestBody, createMockOptions(), endpoint.model, endpoint
138
		);
139

140
		const tools = body.tools as any[];
141
		expect(tools).toBeDefined();
142

143
		// Should have client-executed tool_search
144
		const toolSearchTool = tools.find(t => t.type === 'tool_search');
145
		expect(toolSearchTool).toBeDefined();
146
		expect(toolSearchTool.execution).toBe('client');
147

148
		// Non-deferred tools should be present without defer_loading
149
		expect(tools.find(t => t.name === 'read_file')?.defer_loading).toBeUndefined();
150
		expect(tools.find(t => t.name === 'grep_search')?.defer_loading).toBeUndefined();
151

152
		// Deferred tools should NOT be in the request (client-executed mode excludes them entirely)
153
		expect(tools.find(t => t.name === 'some_mcp_tool')).toBeUndefined();
154
		expect(tools.find(t => t.name === 'another_deferred_tool')).toBeUndefined();
155
	});
156

157
	it('does not defer tools for unsupported models', () => {
158
		const endpoint = createMockEndpoint('gpt-4o');
159
		const configService = accessor.get(IConfigurationService) as InMemoryConfigurationService;
160
		configService.setConfig(ConfigKey.ResponsesApiToolSearchEnabled, true);
161

162
		const body = accessor.get(IInstantiationService).invokeFunction(
163
			createResponsesRequestBody, createMockOptions(), endpoint.model, endpoint
164
		);
165

166
		const tools = body.tools as any[];
167
		expect(tools.find(t => t.type === 'tool_search')).toBeUndefined();
168
		expect(tools.every(t => !t.defer_loading)).toBe(true);
169
	});
170

171
	it('does not defer tools for non-Agent locations', () => {
172
		const endpoint = createMockEndpoint('gpt-5.4');
173
		const configService = accessor.get(IConfigurationService) as InMemoryConfigurationService;
174
		configService.setConfig(ConfigKey.ResponsesApiToolSearchEnabled, true);
175

176
		const options = createMockOptions({ location: ChatLocation.Panel });
177
		const body = accessor.get(IInstantiationService).invokeFunction(
178
			createResponsesRequestBody, options, endpoint.model, endpoint
179
		);
180

181
		const tools = body.tools as any[];
182
		expect(tools.find(t => t.type === 'tool_search')).toBeUndefined();
183
		expect(tools.every(t => !t.defer_loading)).toBe(true);
184
	});
185

186
	it('does not defer tools when tool_search is not in the request tool list', () => {
187
		// Repro for https://github.com/microsoft/vscode/issues/311946: a custom agent with
188
		// `tools: ['my-mcp-server/*']` filters out tool_search. Without this gate, every
189
		// MCP tool would be marked deferred and stripped from the request, leaving the
190
		// agent with nothing to call.
191
		const endpoint = createMockEndpoint('gpt-5.4');
192
		const configService = accessor.get(IConfigurationService) as InMemoryConfigurationService;
193
		configService.setConfig(ConfigKey.ResponsesApiToolSearchEnabled, true);
194

195
		const options = createMockOptions({
196
			requestOptions: {
197
				tools: [
198
					{ type: 'function', function: { name: 'some_mcp_tool', description: 'An MCP tool', parameters: { type: 'object', properties: {} } } },
199
					{ type: 'function', function: { name: 'another_mcp_tool', description: 'Another MCP tool', parameters: { type: 'object', properties: {} } } },
200
				]
201
			}
202
		});
203
		const body = accessor.get(IInstantiationService).invokeFunction(
204
			createResponsesRequestBody, options, endpoint.model, endpoint
205
		);
206

207
		const tools = body.tools as any[];
208
		// No client tool_search should be added.
209
		expect(tools.find(t => t.type === 'tool_search')).toBeUndefined();
210
		// All user-listed tools should be sent to the model, not stripped.
211
		expect(tools.find(t => t.name === 'some_mcp_tool')).toBeDefined();
212
		expect(tools.find(t => t.name === 'another_mcp_tool')).toBeDefined();
213
	});
214

215
	it('always filters tool_search function tool from tools array', () => {
216
		const endpoint = createMockEndpoint('gpt-5.4');
217
		const configService = accessor.get(IConfigurationService) as InMemoryConfigurationService;
218
		configService.setConfig(ConfigKey.ResponsesApiToolSearchEnabled, false);
219

220
		const options = createMockOptions({
221
			requestOptions: {
222
				tools: [
223
					{ type: 'function', function: { name: 'read_file', description: 'Read a file', parameters: { type: 'object', properties: {} } } },
224
					{ type: 'function', function: { name: 'tool_search', description: 'Search tools', parameters: { type: 'object', properties: {} } } },
225
				]
226
			}
227
		});
228
		const body = accessor.get(IInstantiationService).invokeFunction(
229
			createResponsesRequestBody, options, endpoint.model, endpoint
230
		);
231

232
		const tools = body.tools as any[];
233
		expect(tools.find(t => t.name === 'tool_search')).toBeUndefined();
234
		expect(tools.find(t => t.name === 'read_file')).toBeDefined();
235
	});
236

237
	it('converts tool_search history even when feature flag is off', () => {
238
		const endpoint = createMockEndpoint('gpt-5.4');
239
		const configService = accessor.get(IConfigurationService) as InMemoryConfigurationService;
240
		configService.setConfig(ConfigKey.ResponsesApiToolSearchEnabled, false);
241

242
		const messages: Raw.ChatMessage[] = [
243
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Hello' }] },
244
			{
245
				role: Raw.ChatRole.Assistant,
246
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Let me search for tools.' }],
247
				toolCalls: [{ id: 'call_ts1', type: 'function', function: { name: 'tool_search', arguments: '{"query":"file tools"}' } }],
248
			},
249
			{
250
				role: Raw.ChatRole.Tool,
251
				toolCallId: 'call_ts1',
252
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: '["read_file","grep_search"]' }],
253
			},
254
		];
255

256
		const options = createMockOptions({ messages });
257
		const body = accessor.get(IInstantiationService).invokeFunction(
258
			createResponsesRequestBody, options, endpoint.model, endpoint
259
		);
260

261
		const input = body.input as any[];
262
		// tool_search tool call should be converted to tool_search_call, not function_call
263
		const toolSearchCall = input.find(i => i.type === 'tool_search_call');
264
		expect(toolSearchCall).toBeDefined();
265
		expect(toolSearchCall.execution).toBe('client');
266
		expect(toolSearchCall.call_id).toBe('call_ts1');
267

268
		// tool_search result should be converted to tool_search_output, not function_call_output
269
		const toolSearchOutput = input.find(i => i.type === 'tool_search_output');
270
		expect(toolSearchOutput).toBeDefined();
271
		expect(toolSearchOutput.execution).toBe('client');
272
		expect(toolSearchOutput.call_id).toBe('call_ts1');
273

274
		// No tools are currently deferred, so historical tool_search_output should not redeclare them.
275
		const loadedToolNames = (toolSearchOutput.tools as any[]).map((t: any) => t.name);
276
		expect(loadedToolNames).toEqual([]);
277

278
		// Should not have any function_call with name tool_search
279
		const badFunctionCall = input.find(i => i.type === 'function_call' && i.name === 'tool_search');
280
		expect(badFunctionCall).toBeUndefined();
281
	});
282

283
	it('converts tool_search history when current request has no tools', () => {
284
		const endpoint = createMockEndpoint('gpt-5.4');
285
		const messages: Raw.ChatMessage[] = [
286
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Hello' }] },
287
			{
288
				role: Raw.ChatRole.Assistant,
289
				content: [],
290
				toolCalls: [{ id: 'call_ts_no_tools', type: 'function', function: { name: 'tool_search', arguments: '{"query":"file tools"}' } }],
291
			},
292
			{
293
				role: Raw.ChatRole.Tool,
294
				toolCallId: 'call_ts_no_tools',
295
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: '["read_file"]' }],
296
			},
297
		];
298

299
		const options = createMockOptions({ messages, requestOptions: undefined });
300
		const body = accessor.get(IInstantiationService).invokeFunction(
301
			createResponsesRequestBody, options, endpoint.model, endpoint
302
		);
303

304
		const input = body.input as Array<{ type?: string; name?: string; execution?: string; call_id?: string; tools?: unknown[] }>;
305
		expect(input.find(i => i.type === 'tool_search_call')).toMatchObject({
306
			type: 'tool_search_call',
307
			execution: 'client',
308
			call_id: 'call_ts_no_tools',
309
		});
310
		expect(input.find(i => i.type === 'tool_search_output')).toMatchObject({
311
			type: 'tool_search_output',
312
			execution: 'client',
313
			call_id: 'call_ts_no_tools',
314
			tools: [],
315
		});
316
		expect(input.find(i => i.type === 'function_call' && i.name === 'tool_search')).toBeUndefined();
317
	});
318

319
	it('excludes non-deferred tools from tool_search_output history', () => {
320
		const messages: Raw.ChatMessage[] = [
321
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Find file tools' }] },
322
			{
323
				role: Raw.ChatRole.Assistant,
324
				content: [],
325
				toolCalls: [{ id: 'call_ts_file', type: 'function', function: { name: 'tool_search', arguments: '{"query":"file tools"}' } }],
326
			},
327
			{
328
				role: Raw.ChatRole.Tool,
329
				toolCallId: 'call_ts_file',
330
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: '["file_search","some_mcp_tool"]' }],
331
			},
332
			{
333
				role: Raw.ChatRole.Assistant,
334
				content: [],
335
				toolCalls: [
336
					{ id: 'call_file', type: 'function', function: { name: 'file_search', arguments: '{"query":"*.ts"}' } },
337
					{ id: 'call_mcp', type: 'function', function: { name: 'some_mcp_tool', arguments: '{"input":"x"}' } },
338
				],
339
			},
340
		];
341

342
		const body = createToolSearchScenario(messages);
343

344
		const input = body.input as Array<{ type?: string; name?: string; namespace?: string; tools?: Array<{ name: string }> }>;
345
		const toolSearchOutput = input.find(i => i.type === 'tool_search_output');
346
		const fileSearchCall = input.find(i => i.type === 'function_call' && i.name === 'file_search');
347
		const mcpToolCall = input.find(i => i.type === 'function_call' && i.name === 'some_mcp_tool');
348

349
		expect({
350
			loadedToolNames: toolSearchOutput?.tools?.map(t => t.name),
351
			fileSearchNamespace: fileSearchCall?.namespace,
352
			mcpToolNamespace: mcpToolCall?.namespace,
353
		}).toEqual({
354
			loadedToolNames: ['some_mcp_tool'],
355
			fileSearchNamespace: undefined,
356
			mcpToolNamespace: 'some_mcp_tool',
357
		});
358
	});
359

360
	it('does not load tools from tool_search_output when only non-deferred tools are returned', () => {
361
		const messages: Raw.ChatMessage[] = [
362
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Find core tools' }] },
363
			{
364
				role: Raw.ChatRole.Assistant,
365
				content: [],
366
				toolCalls: [{ id: 'call_ts_core', type: 'function', function: { name: 'tool_search', arguments: '{"query":"core tools"}' } }],
367
			},
368
			{
369
				role: Raw.ChatRole.Tool,
370
				toolCallId: 'call_ts_core',
371
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: '["file_search","read_file"]' }],
372
			},
373
		];
374

375
		const body = createToolSearchScenario(messages);
376

377
		const input = body.input as Array<{ type?: string; tools?: Array<{ name: string }> }>;
378
		const toolSearchOutput = input.find(i => i.type === 'tool_search_output');
379

380
		expect(toolSearchOutput?.tools?.map(t => t.name)).toEqual([]);
381
	});
382
});
383

384
describe('OpenAIResponsesProcessor tool search events', () => {
385
	function createProcessor() {
386
		const telemetryData = TelemetryData.createAndMarkAsIssued({}, {});
387
		const telemetryService = new SpyingTelemetryService();
388
		const ds = new DisposableStore();
389
		const services = createPlatformServices(ds);
390
		const accessor = services.createTestingAccessor();
391
		return accessor.get(IInstantiationService).createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, 'req-123', 'gh-req-456', '', undefined);
392
	}
393

394
	function collectDeltas(processor: OpenAIResponsesProcessor, events: any[]): IResponseDelta[] {
395
		const deltas: IResponseDelta[] = [];
396
		const finishedCb = async (_text: string, _index: number, delta: IResponseDelta) => {
397
			deltas.push(delta);
398
			return undefined;
399
		};
400
		for (const event of events) {
401
			processor.push({ sequence_number: 0, ...event }, finishedCb);
402
		}
403
		return deltas;
404
	}
405

406
	it('handles client tool_search_call as copilotToolCall', () => {
407
		const processor = createProcessor();
408
		const deltas = collectDeltas(processor, [
409
			{
410
				type: 'response.output_item.added',
411
				output_index: 0,
412
				item: {
413
					type: 'tool_search_call' as any,
414
					id: 'ts_002',
415
					execution: 'client',
416
					call_id: 'call_abc',
417
					status: 'in_progress',
418
					arguments: {},
419
				} as any,
420
			},
421
			{
422
				type: 'response.output_item.done',
423
				output_index: 0,
424
				item: {
425
					type: 'tool_search_call' as any,
426
					id: 'ts_002',
427
					execution: 'client',
428
					call_id: 'call_abc',
429
					status: 'completed',
430
					arguments: { query: 'Find shipping tools' },
431
				} as any,
432
			}
433
		]);
434

435
		// First delta: beginToolCalls for tool_search
436
		expect(deltas[0].beginToolCalls).toBeDefined();
437
		expect(deltas[0].beginToolCalls![0].name).toBe('tool_search');
438
		expect(deltas[0].beginToolCalls![0].id).toBe('call_abc');
439

440
		// Second delta: completed copilotToolCall
441
		expect(deltas[1].copilotToolCalls).toBeDefined();
442
		expect(deltas[1].copilotToolCalls![0]).toMatchObject({
443
			id: 'call_abc',
444
			name: 'tool_search',
445
			arguments: '{"query":"Find shipping tools"}',
446
		});
447
	});
448
});
449

450
Product

Resources

Company