CoCalc -- responsesApi.spec.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/test/responsesApi.spec.ts
¹³⁴⁰⁵ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { Raw } from '@vscode/prompt-tsx';
7
import type { OpenAI } from 'openai';
8
import { describe, expect, it } from 'vitest';
9
import { TokenizerType } from '../../../../util/common/tokenizer';
10
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
11
import { ChatLocation } from '../../../chat/common/commonTypes';
12
import { ConfigKey, IConfigurationService } from '../../../configuration/common/configurationService';
13
import { InMemoryConfigurationService } from '../../../configuration/test/common/inMemoryConfigurationService';
14
import { ILogService } from '../../../log/common/logService';
15
import { isOpenAIContextManagementResponse } from '../../../networking/common/fetch';
16
import { IChatEndpoint, ICreateEndpointBodyOptions } from '../../../networking/common/networking';
17
import { openAIContextManagementCompactionType, OpenAIContextManagementResponse } from '../../../networking/common/openai';
18
import { IToolDeferralService } from '../../../networking/common/toolDeferralService';
19
import { IChatWebSocketManager, NullChatWebSocketManager } from '../../../networking/node/chatWebSocketManager';
20
import { TelemetryData } from '../../../telemetry/common/telemetryData';
21
import { SpyingTelemetryService } from '../../../telemetry/node/spyingTelemetryService';
22
import { createFakeStreamResponse } from '../../../test/node/fetcher';
23
import { createPlatformServices } from '../../../test/node/services';
24
import { CustomDataPartMimeTypes } from '../../common/endpointTypes';
25
import { createResponsesRequestBody, getResponsesApiCompactionThresholdFromBody, processResponseFromChatEndpoint, responseApiInputToRawMessagesForLogging } from '../responsesApi';
26

27
const testEndpoint: IChatEndpoint = {
28
	urlOrRequestMetadata: 'https://example.test/chat',
29
	modelMaxPromptTokens: 128000,
30
	name: 'Test Endpoint',
31
	version: '1',
32
	family: 'gpt-5-mini',
33
	tokenizer: TokenizerType.O200K,
34
	maxOutputTokens: 4096,
35
	model: 'gpt-5-mini',
36
	modelProvider: 'openai',
37
	supportsToolCalls: true,
38
	supportsVision: true,
39
	supportsPrediction: true,
40
	showInModelPicker: true,
41
	isFallback: false,
42
	acquireTokenizer() {
43
		throw new Error('Not implemented in test');
44
	},
45
	async processResponseFromChatEndpoint() {
46
		throw new Error('Not implemented in test');
47
	},
48
	async makeChatRequest() {
49
		throw new Error('Not implemented in test');
50
	},
51
	async makeChatRequest2() {
52
		throw new Error('Not implemented in test');
53
	},
54
	createRequestBody() {
55
		throw new Error('Not implemented in test');
56
	},
57
	cloneWithTokenOverride() {
58
		return this;
59
	}
60
};
61

62
const createRequestOptions = (messages: Raw.ChatMessage[], useWebSocket: boolean): ICreateEndpointBodyOptions => ({
63
	debugName: 'test',
64
	messages,
65
	requestId: 'req-1',
66
	postOptions: {},
67
	finishedCb: undefined,
68
	location: undefined as any,
69
	useWebSocket,
70
});
71

72
const createStatefulMarkerMessage = (modelId: string, marker: string): Raw.ChatMessage => ({
73
	role: Raw.ChatRole.Assistant,
74
	content: [{
75
		type: Raw.ChatCompletionContentPartKind.Opaque,
76
		value: {
77
			type: CustomDataPartMimeTypes.StatefulMarker,
78
			value: {
79
				modelId,
80
				marker,
81
			}
82
		}
83
	}]
84
});
85

86
const createCompactionResponse = (id: string, encrypted_content: string): OpenAIContextManagementResponse => ({
87
	type: openAIContextManagementCompactionType,
88
	id,
89
	encrypted_content,
90
});
91

92
const createCompactionAssistantMessage = (compaction: OpenAIContextManagementResponse): Raw.ChatMessage => ({
93
	role: Raw.ChatRole.Assistant,
94
	content: [{
95
		type: Raw.ChatCompletionContentPartKind.Opaque,
96
		value: {
97
			type: CustomDataPartMimeTypes.ContextManagement,
98
			compaction,
99
		}
100
	}]
101
});
102

103
type ResponseFunctionCallInputItem = OpenAI.Responses.ResponseInputItem & {
104
	type: 'function_call';
105
	name: string;
106
	namespace?: string;
107
};
108

109
function isFunctionCallInputItem(item: OpenAI.Responses.ResponseInputItem, name: string): item is ResponseFunctionCallInputItem {
110
	return item.type === 'function_call' && 'name' in item && item.name === name;
111
}
112

113
describe('responseApiInputToRawMessagesForLogging', () => {
114

115
	it('converts simple string input to user message', () => {
116
		const body: OpenAI.Responses.ResponseCreateParams = {
117
			model: 'gpt-5-mini',
118
			input: 'Hello, world!'
119
		};
120

121
		const result = responseApiInputToRawMessagesForLogging(body);
122

123
		expect(result).toHaveLength(1);
124
		expect(result[0].role).toBe(Raw.ChatRole.User);
125
		expect(result[0].content).toEqual([
126
			{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Hello, world!' }
127
		]);
128
	});
129

130
	it('includes system instructions when provided', () => {
131
		const body: OpenAI.Responses.ResponseCreateParams = {
132
			model: 'gpt-5-mini',
133
			input: 'Hello',
134
			instructions: 'You are a helpful assistant'
135
		};
136

137
		const result = responseApiInputToRawMessagesForLogging(body);
138

139
		expect(result).toHaveLength(2);
140
		expect(result[0].role).toBe(Raw.ChatRole.System);
141
		expect(result[0].content).toEqual([
142
			{ type: Raw.ChatCompletionContentPartKind.Text, text: 'You are a helpful assistant' }
143
		]);
144
		expect(result[1].role).toBe(Raw.ChatRole.User);
145
	});
146

147
	it('converts user message with input_text content', () => {
148
		const body: OpenAI.Responses.ResponseCreateParams = {
149
			model: 'gpt-5-mini',
150
			input: [
151
				{
152
					role: 'user',
153
					content: [{ type: 'input_text', text: 'What is the weather?' }]
154
				}
155
			]
156
		};
157

158
		const result = responseApiInputToRawMessagesForLogging(body);
159

160
		expect(result).toHaveLength(1);
161
		expect(result[0].role).toBe(Raw.ChatRole.User);
162
		expect(result[0].content).toEqual([
163
			{ type: Raw.ChatCompletionContentPartKind.Text, text: 'What is the weather?' }
164
		]);
165
	});
166

167
	it('converts system/developer messages to system role', () => {
168
		const body: OpenAI.Responses.ResponseCreateParams = {
169
			model: 'gpt-5-mini',
170
			input: [
171
				{
172
					role: 'developer',
173
					content: 'Be concise'
174
				}
175
			]
176
		};
177

178
		const result = responseApiInputToRawMessagesForLogging(body);
179

180
		expect(result).toHaveLength(1);
181
		expect(result[0].role).toBe(Raw.ChatRole.System);
182
	});
183

184
	it('converts function_call items to assistant tool calls', () => {
185
		const body: OpenAI.Responses.ResponseCreateParams = {
186
			model: 'gpt-5-mini',
187
			input: [
188
				{
189
					type: 'function_call',
190
					call_id: 'call_123',
191
					name: 'get_weather',
192
					arguments: '{"location": "Seattle"}'
193
				}
194
			]
195
		};
196

197
		const result = responseApiInputToRawMessagesForLogging(body);
198

199
		expect(result).toHaveLength(1);
200
		expect(result[0].role).toBe(Raw.ChatRole.Assistant);
201
		const assistantMsg = result[0] as Raw.AssistantChatMessage;
202
		expect(assistantMsg.toolCalls).toHaveLength(1);
203
		expect(assistantMsg.toolCalls![0]).toEqual({
204
			id: 'call_123',
205
			type: 'function',
206
			function: {
207
				name: 'get_weather',
208
				arguments: '{"location": "Seattle"}'
209
			}
210
		});
211
	});
212

213
	it('converts function_call_output items to tool messages', () => {
214
		const body: OpenAI.Responses.ResponseCreateParams = {
215
			model: 'gpt-5-mini',
216
			input: [
217
				{
218
					type: 'function_call_output',
219
					call_id: 'call_123',
220
					output: 'Sunny, 72°F'
221
				}
222
			]
223
		};
224

225
		const result = responseApiInputToRawMessagesForLogging(body);
226

227
		expect(result).toHaveLength(1);
228
		expect(result[0].role).toBe(Raw.ChatRole.Tool);
229
		const toolMsg = result[0] as Raw.ToolChatMessage;
230
		expect(toolMsg.toolCallId).toBe('call_123');
231
		expect(toolMsg.content).toEqual([
232
			{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Sunny, 72°F' }
233
		]);
234
	});
235

236
	it('handles mixed conversation with multiple message types', () => {
237
		const body: OpenAI.Responses.ResponseCreateParams = {
238
			model: 'gpt-5-mini',
239
			instructions: 'You are a weather assistant',
240
			input: [
241
				{
242
					role: 'user',
243
					content: 'What is the weather in Seattle?'
244
				},
245
				{
246
					type: 'function_call',
247
					call_id: 'call_456',
248
					name: 'get_weather',
249
					arguments: '{"location": "Seattle"}'
250
				},
251
				{
252
					type: 'function_call_output',
253
					call_id: 'call_456',
254
					output: 'Rainy, 55°F'
255
				},
256
				{
257
					role: 'user',
258
					content: 'Thanks!'
259
				}
260
			]
261
		};
262

263
		const result = responseApiInputToRawMessagesForLogging(body);
264

265
		expect(result).toHaveLength(5);
266
		expect(result[0].role).toBe(Raw.ChatRole.System); // instructions
267
		expect(result[1].role).toBe(Raw.ChatRole.User); // first user message
268
		expect(result[2].role).toBe(Raw.ChatRole.Assistant); // function call
269
		expect((result[2] as Raw.AssistantChatMessage).toolCalls).toHaveLength(1);
270
		expect(result[3].role).toBe(Raw.ChatRole.Tool); // function output
271
		expect(result[4].role).toBe(Raw.ChatRole.User); // thanks message
272
	});
273

274
	it('returns empty array for undefined input', () => {
275
		const body: OpenAI.Responses.ResponseCreateParams = {
276
			model: 'gpt-5-mini',
277
			input: undefined as any
278
		};
279

280
		const result = responseApiInputToRawMessagesForLogging(body);
281

282
		expect(result).toHaveLength(0);
283
	});
284

285
	it('groups consecutive function calls into single assistant message', () => {
286
		const body: OpenAI.Responses.ResponseCreateParams = {
287
			model: 'gpt-5-mini',
288
			input: [
289
				{
290
					type: 'function_call',
291
					call_id: 'call_1',
292
					name: 'tool_a',
293
					arguments: '{}'
294
				},
295
				{
296
					type: 'function_call',
297
					call_id: 'call_2',
298
					name: 'tool_b',
299
					arguments: '{}'
300
				}
301
			]
302
		};
303

304
		const result = responseApiInputToRawMessagesForLogging(body);
305

306
		// Two consecutive function calls should be grouped into one assistant message
307
		expect(result).toHaveLength(1);
308
		expect(result[0].role).toBe(Raw.ChatRole.Assistant);
309
		expect((result[0] as Raw.AssistantChatMessage).toolCalls).toHaveLength(2);
310
	});
311

312
	it('converts tool_search_call and tool_search_output items to raw messages', () => {
313
		const body: OpenAI.Responses.ResponseCreateParams = {
314
			model: 'gpt-5-mini',
315
			input: [
316
				{
317
					type: 'tool_search_call',
318
					execution: 'client',
319
					call_id: 'ts_call_1',
320
					status: 'completed',
321
					arguments: { query: 'file editing tools' },
322
				} as unknown as OpenAI.Responses.ResponseInputItem,
323
				{
324
					type: 'tool_search_output',
325
					execution: 'client',
326
					call_id: 'ts_call_1',
327
					status: 'completed',
328
					tools: [
329
						{ type: 'function', name: 'grep_search', description: 'Search files', defer_loading: true, parameters: {} },
330
						{ type: 'function', name: 'file_search', description: 'Find files', defer_loading: true, parameters: {} },
331
					],
332
				} as unknown as OpenAI.Responses.ResponseInputItem
333
			]
334
		};
335

336
		const result = responseApiInputToRawMessagesForLogging(body);
337

338
		expect(result).toEqual([
339
			{
340
				role: Raw.ChatRole.Assistant,
341
				content: [],
342
				toolCalls: [{
343
					id: 'ts_call_1',
344
					type: 'function',
345
					function: {
346
						name: 'tool_search',
347
						arguments: '{"query":"file editing tools"}',
348
					}
349
				}]
350
			},
351
			{
352
				role: Raw.ChatRole.Tool,
353
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: '["grep_search","file_search"]' }],
354
				toolCallId: 'ts_call_1',
355
			}
356
		]);
357
	});
358
});
359

360
describe('createResponsesRequestBody', () => {
361
	it('extracts compaction threshold from request body context management', () => {
362
		expect(getResponsesApiCompactionThresholdFromBody({
363
			context_management: [{
364
				type: openAIContextManagementCompactionType,
365
				compact_threshold: 1234,
366
			}]
367
		})).toBe(1234);
368
	});
369

370
	it('still slices websocket requests by stateful marker index when compaction is disabled', () => {
371
		const services = createPlatformServices();
372
		const wsManager = new NullChatWebSocketManager();
373
		wsManager.getStatefulMarker = () => 'resp-prev';
374
		services.set(IChatWebSocketManager, wsManager);
375
		const accessor = services.createTestingAccessor();
376
		const instantiationService = accessor.get(IInstantiationService);
377
		const endpointWithoutCompaction = { ...testEndpoint, family: 'gpt-5' as const };
378
		const messages: Raw.ChatMessage[] = [
379
			{
380
				role: Raw.ChatRole.User,
381
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'before marker' }],
382
			},
383
			createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'),
384
			{
385
				role: Raw.ChatRole.User,
386
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }],
387
			},
388
		];
389

390
		const webSocketBody = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, { ...createRequestOptions(messages, true), conversationId: 'conv-1' }, endpointWithoutCompaction.model, endpointWithoutCompaction));
391

392
		expect(webSocketBody.previous_response_id).toBe('resp-prev');
393
		expect(webSocketBody.input).toHaveLength(1);
394
		expect(webSocketBody.input?.[0]).toMatchObject({
395
			role: 'user',
396
			content: [{ type: 'input_text', text: 'after marker' }],
397
		});
398

399
		accessor.dispose();
400
		services.dispose();
401
	});
402

403
	it('includes the newest compaction item in websocket requests when it predates the stateful marker', () => {
404
		const services = createPlatformServices();
405
		const wsManager = new NullChatWebSocketManager();
406
		wsManager.getStatefulMarker = () => 'resp-prev';
407
		services.set(IChatWebSocketManager, wsManager);
408
		const accessor = services.createTestingAccessor();
409
		const instantiationService = accessor.get(IInstantiationService);
410
		const latestCompaction = createCompactionResponse('cmp_ws', 'enc_ws');
411
		const messages: Raw.ChatMessage[] = [
412
			{
413
				role: Raw.ChatRole.User,
414
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'before compaction' }],
415
			},
416
			createCompactionAssistantMessage(latestCompaction),
417
			createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'),
418
			{
419
				role: Raw.ChatRole.User,
420
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }],
421
			},
422
		];
423

424
		const webSocketBody = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, { ...createRequestOptions(messages, true), conversationId: 'conv-1' }, testEndpoint.model, testEndpoint));
425

426
		expect(webSocketBody.previous_response_id).toBe('resp-prev');
427
		expect(webSocketBody.input).toContainEqual({
428
			type: openAIContextManagementCompactionType,
429
			id: 'cmp_ws',
430
			encrypted_content: 'enc_ws',
431
		});
432
		expect(webSocketBody.input).toContainEqual({
433
			role: 'user',
434
			content: [{ type: 'input_text', text: 'after marker' }],
435
		});
436

437
		accessor.dispose();
438
		services.dispose();
439
	});
440

441
	it('sends all messages when the websocket stateful marker is not in the current messages', () => {
442
		const services = createPlatformServices();
443
		const wsManager = new NullChatWebSocketManager();
444
		wsManager.getStatefulMarker = () => 'resp-stale';
445
		services.set(IChatWebSocketManager, wsManager);
446
		const accessor = services.createTestingAccessor();
447
		const instantiationService = accessor.get(IInstantiationService);
448
		const messages: Raw.ChatMessage[] = [
449
			{
450
				role: Raw.ChatRole.User,
451
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'first message' }],
452
			},
453
			createStatefulMarkerMessage(testEndpoint.model, 'resp-different'),
454
			{
455
				role: Raw.ChatRole.User,
456
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'second message' }],
457
			},
458
		];
459

460
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, { ...createRequestOptions(messages, true), conversationId: 'conv-1' }, testEndpoint.model, testEndpoint));
461

462
		expect(body.previous_response_id).toBeUndefined();
463
		expect(body.input).toHaveLength(2);
464
		expect(body.input?.[0]).toMatchObject({
465
			role: 'user',
466
			content: [{ type: 'input_text', text: 'first message' }],
467
		});
468
		expect(body.input?.[1]).toMatchObject({
469
			role: 'user',
470
			content: [{ type: 'input_text', text: 'second message' }],
471
		});
472

473
		accessor.dispose();
474
		services.dispose();
475
	});
476

477
	it('does not reuse a websocket stateful marker when modeChanged is true', () => {
478
		const services = createPlatformServices();
479
		const wsManager = new NullChatWebSocketManager();
480
		wsManager.getStatefulMarker = () => 'resp-prev';
481
		services.set(IChatWebSocketManager, wsManager);
482
		const accessor = services.createTestingAccessor();
483
		const instantiationService = accessor.get(IInstantiationService);
484
		const messages: Raw.ChatMessage[] = [
485
			{
486
				role: Raw.ChatRole.User,
487
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'before marker' }],
488
			},
489
			createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'),
490
			{
491
				role: Raw.ChatRole.User,
492
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }],
493
			},
494
		];
495

496
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, { ...createRequestOptions(messages, true), conversationId: 'conv-1', modeChanged: true }, testEndpoint.model, testEndpoint));
497

498
		expect(body.previous_response_id).toBeUndefined();
499
		expect(body.input).toHaveLength(2);
500
		expect(body.input?.[0]).toMatchObject({
501
			role: 'user',
502
			content: [{ type: 'input_text', text: 'before marker' }],
503
		});
504
		expect(body.input?.[1]).toMatchObject({
505
			role: 'user',
506
			content: [{ type: 'input_text', text: 'after marker' }],
507
		});
508

509
		accessor.dispose();
510
		services.dispose();
511
	});
512

513
	it('reuses the newly established websocket marker on follow-up requests after switching into plan mode', () => {
514
		const services = createPlatformServices();
515
		const wsManager = new NullChatWebSocketManager();
516
		wsManager.getStatefulMarker = () => 'resp-plan-1';
517
		services.set(IChatWebSocketManager, wsManager);
518
		const accessor = services.createTestingAccessor();
519
		const instantiationService = accessor.get(IInstantiationService);
520
		const websocketEndpoint = { ...testEndpoint, family: 'gpt-5.5', model: 'gpt-5.5' as const };
521
		const messages: Raw.ChatMessage[] = [
522
			{
523
				role: Raw.ChatRole.User,
524
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'implementation context before switching modes' }],
525
			},
526
			createStatefulMarkerMessage(websocketEndpoint.model, 'resp-agent-1'),
527
			{
528
				role: Raw.ChatRole.User,
529
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'switch to plan mode' }],
530
			},
531
			createStatefulMarkerMessage(websocketEndpoint.model, 'resp-plan-1'),
532
			{
533
				role: Raw.ChatRole.User,
534
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'plan follow up' }],
535
			},
536
		];
537

538
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(
539
			servicesAccessor,
540
			{ ...createRequestOptions(messages, true), conversationId: 'conv-plan-1' },
541
			websocketEndpoint.model,
542
			websocketEndpoint,
543
		));
544

545
		expect(body.previous_response_id).toBe('resp-plan-1');
546
		expect(body.input).toHaveLength(1);
547
		expect(body.input?.[0]).toMatchObject({
548
			role: 'user',
549
			content: [{ type: 'input_text', text: 'plan follow up' }],
550
		});
551

552
		accessor.dispose();
553
		services.dispose();
554
	});
555

556
	it('treats websocket requests from agent to plan and back to implementation as separate mode changes', () => {
557
		const services = createPlatformServices();
558
		const wsManager = new NullChatWebSocketManager();
559
		services.set(IChatWebSocketManager, wsManager);
560
		const accessor = services.createTestingAccessor();
561
		const instantiationService = accessor.get(IInstantiationService);
562
		const websocketEndpoint = { ...testEndpoint, family: 'gpt-5.4', model: 'gpt-5.4' as const };
563

564
		wsManager.getStatefulMarker = () => 'resp-agent-1';
565
		const planMessages: Raw.ChatMessage[] = [
566
			{
567
				role: Raw.ChatRole.User,
568
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'agent context before switching to plan' }],
569
			},
570
			createStatefulMarkerMessage(websocketEndpoint.model, 'resp-agent-1'),
571
			{
572
				role: Raw.ChatRole.User,
573
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'plan this change' }],
574
			},
575
		];
576

577
		const planBody = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(
578
			servicesAccessor,
579
			{ ...createRequestOptions(planMessages, true), conversationId: 'conv-mode-change', modeChanged: true },
580
			websocketEndpoint.model,
581
			websocketEndpoint,
582
		));
583

584
		expect(planBody.previous_response_id).toBeUndefined();
585
		expect(planBody.input).toHaveLength(2);
586
		expect(planBody.input?.[0]).toMatchObject({
587
			role: 'user',
588
			content: [{ type: 'input_text', text: 'agent context before switching to plan' }],
589
		});
590
		expect(planBody.input?.[1]).toMatchObject({
591
			role: 'user',
592
			content: [{ type: 'input_text', text: 'plan this change' }],
593
		});
594

595
		wsManager.getStatefulMarker = () => 'resp-plan-1';
596
		const implementationMessages: Raw.ChatMessage[] = [
597
			{
598
				role: Raw.ChatRole.User,
599
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'plan context before switching back to implementation' }],
600
			},
601
			createStatefulMarkerMessage(websocketEndpoint.model, 'resp-plan-1'),
602
			{
603
				role: Raw.ChatRole.User,
604
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'start implementation' }],
605
			},
606
		];
607

608
		const implementationBody = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(
609
			servicesAccessor,
610
			{ ...createRequestOptions(implementationMessages, true), conversationId: 'conv-mode-change', modeChanged: true },
611
			websocketEndpoint.model,
612
			websocketEndpoint,
613
		));
614

615
		expect(implementationBody.previous_response_id).toBeUndefined();
616
		expect(implementationBody.input).toHaveLength(2);
617
		expect(implementationBody.input?.[0]).toMatchObject({
618
			role: 'user',
619
			content: [{ type: 'input_text', text: 'plan context before switching back to implementation' }],
620
		});
621
		expect(implementationBody.input?.[1]).toMatchObject({
622
			role: 'user',
623
			content: [{ type: 'input_text', text: 'start implementation' }],
624
		});
625

626
		accessor.dispose();
627
		services.dispose();
628
	});
629

630
	it('includes the newest compaction item in non-websocket requests when it predates the stateful marker', () => {
631
		const services = createPlatformServices();
632
		const accessor = services.createTestingAccessor();
633
		const instantiationService = accessor.get(IInstantiationService);
634
		const latestCompaction = createCompactionResponse('cmp_http', 'enc_http');
635
		const messages: Raw.ChatMessage[] = [
636
			{
637
				role: Raw.ChatRole.User,
638
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'before compaction' }],
639
			},
640
			createCompactionAssistantMessage(latestCompaction),
641
			createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'),
642
			{
643
				role: Raw.ChatRole.User,
644
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }],
645
			},
646
		];
647

648
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions(messages, false), testEndpoint.model, testEndpoint));
649

650
		expect(body.previous_response_id).toBe('resp-prev');
651
		expect(body.input).toContainEqual({
652
			type: openAIContextManagementCompactionType,
653
			id: 'cmp_http',
654
			encrypted_content: 'enc_http',
655
		});
656
		expect(body.input).toContainEqual({
657
			role: 'user',
658
			content: [{ type: 'input_text', text: 'after marker' }],
659
		});
660

661
		accessor.dispose();
662
		services.dispose();
663
	});
664

665
	it('does not reuse an HTTP stateful marker when modeChanged is true', () => {
666
		const services = createPlatformServices();
667
		const accessor = services.createTestingAccessor();
668
		const instantiationService = accessor.get(IInstantiationService);
669
		const messages: Raw.ChatMessage[] = [
670
			{
671
				role: Raw.ChatRole.User,
672
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'before marker' }],
673
			},
674
			createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'),
675
			{
676
				role: Raw.ChatRole.User,
677
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }],
678
			},
679
		];
680

681
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, { ...createRequestOptions(messages, false), modeChanged: true }, testEndpoint.model, testEndpoint));
682

683
		expect(body.previous_response_id).toBeUndefined();
684
		expect(body.input).toHaveLength(2);
685
		expect(body.input?.[0]).toMatchObject({
686
			role: 'user',
687
			content: [{ type: 'input_text', text: 'before marker' }],
688
		});
689
		expect(body.input?.[1]).toMatchObject({
690
			role: 'user',
691
			content: [{ type: 'input_text', text: 'after marker' }],
692
		});
693

694
		accessor.dispose();
695
		services.dispose();
696
	});
697

698
	it('round-trips the newest stored compaction item', () => {
699
		const services = createPlatformServices();
700
		const accessor = services.createTestingAccessor();
701
		const instantiationService = accessor.get(IInstantiationService);
702
		const latestCompaction = createCompactionResponse('cmp_new', 'enc_new');
703
		const messages: Raw.ChatMessage[] = [
704
			{
705
				role: Raw.ChatRole.User,
706
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'earlier turn' }],
707
			},
708
			createCompactionAssistantMessage(latestCompaction),
709
			{
710
				role: Raw.ChatRole.User,
711
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'follow up' }],
712
			},
713
		];
714

715
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions(messages, false), testEndpoint.model, testEndpoint));
716

717
		expect(body.input).toContainEqual({
718
			type: openAIContextManagementCompactionType,
719
			id: 'cmp_new',
720
			encrypted_content: 'enc_new',
721
		});
722

723
		accessor.dispose();
724
		services.dispose();
725
	});
726

727
	it('sends assistant messages with output content and without a fake output message id', () => {
728
		const services = createPlatformServices();
729
		const accessor = services.createTestingAccessor();
730
		const instantiationService = accessor.get(IInstantiationService);
731
		const messages: Raw.ChatMessage[] = [
732
			{
733
				role: Raw.ChatRole.Assistant,
734
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'previous answer' }],
735
			},
736
		];
737

738
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions(messages, false), testEndpoint.model, testEndpoint));
739

740
		expect(body.input?.[0]).toMatchObject({
741
			role: 'assistant',
742
			content: [{ type: 'output_text', text: 'previous answer' }],
743
			type: 'message',
744
		});
745
		expect(body.input?.[0]).not.toHaveProperty('id');
746
		expect(body.input?.[0]).not.toHaveProperty('status');
747

748
		accessor.dispose();
749
		services.dispose();
750
	});
751

752
	it('does not send whitespace-only assistant messages', () => {
753
		const services = createPlatformServices();
754
		const accessor = services.createTestingAccessor();
755
		const instantiationService = accessor.get(IInstantiationService);
756
		const messages: Raw.ChatMessage[] = [
757
			{
758
				role: Raw.ChatRole.Assistant,
759
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: '   \n\t' }],
760
			},
761
		];
762

763
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions(messages, false), testEndpoint.model, testEndpoint));
764

765
		expect(body.input).toHaveLength(0);
766

767
		accessor.dispose();
768
		services.dispose();
769
	});
770

771
	it('adds namespace field only to function_call for tools loaded via tool_search_output', () => {
772
		const services = createPlatformServices();
773
		services.define(IToolDeferralService, { _serviceBrand: undefined, isNonDeferredTool: (name: string) => name === 'read_file' || name === 'tool_search' });
774
		const accessor = services.createTestingAccessor();
775
		const instantiationService = accessor.get(IInstantiationService);
776
		const configService = accessor.get(IConfigurationService) as InMemoryConfigurationService;
777
		configService.setConfig(ConfigKey.ResponsesApiToolSearchEnabled, true);
778
		const endpoint = { ...testEndpoint, model: 'gpt-5.4', family: 'gpt-5.4' };
779
		const tools = [
780
			{ type: 'function' as const, function: { name: 'tool_search', description: 'Search tools', parameters: {} } },
781
			{ type: 'function' as const, function: { name: 'some_mcp_tool', description: 'MCP tool', parameters: {} } },
782
			{ type: 'function' as const, function: { name: 'read_file', description: 'Read a file', parameters: {} } },
783
		];
784
		const messages: Raw.ChatMessage[] = [
785
			{
786
				role: Raw.ChatRole.User,
787
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'find something' }],
788
			},
789
			// Assistant calls tool_search
790
			{
791
				role: Raw.ChatRole.Assistant,
792
				content: [],
793
				toolCalls: [{ id: 'ts_1', type: 'function', function: { name: 'tool_search', arguments: '{"query":"search"}' } }],
794
			},
795
			// tool_search returns some_mcp_tool
796
			{
797
				role: Raw.ChatRole.Tool,
798
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: '["some_mcp_tool"]' }],
799
				toolCallId: 'ts_1',
800
			},
801
			// Assistant calls some_mcp_tool (loaded via tool_search) and read_file (not loaded via tool_search)
802
			{
803
				role: Raw.ChatRole.Assistant,
804
				content: [],
805
				toolCalls: [
806
					{ id: 'call_mcp', type: 'function', function: { name: 'some_mcp_tool', arguments: '{"q":"hello"}' } },
807
					{ id: 'call_read', type: 'function', function: { name: 'read_file', arguments: '{"path":"foo.ts"}' } },
808
				],
809
			},
810
		];
811

812
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, { ...createRequestOptions(messages, false), location: ChatLocation.Agent, requestOptions: { tools } }, endpoint.model, endpoint));
813

814
		const input = body.input as OpenAI.Responses.ResponseInputItem[];
815

816
		// some_mcp_tool was loaded via tool_search_output — should have namespace
817
		const mcpCall = input.find(item => isFunctionCallInputItem(item, 'some_mcp_tool'));
818
		expect(mcpCall).toBeDefined();
819
		expect(mcpCall?.namespace).toBe('some_mcp_tool');
820

821
		// read_file was NOT loaded via tool_search — should NOT have namespace
822
		const readCall = input.find(item => isFunctionCallInputItem(item, 'read_file'));
823
		expect(readCall).toBeDefined();
824
		expect(readCall).not.toHaveProperty('namespace');
825

826
		accessor.dispose();
827
		services.dispose();
828
	});
829
});
830

831
describe('processResponseFromChatEndpoint telemetry', () => {
832
	it('emits engine.messages for Responses API assistant output', async () => {
833
		const services = createPlatformServices();
834
		const accessor = services.createTestingAccessor();
835
		const instantiationService = accessor.get(IInstantiationService);
836
		const logService = accessor.get(ILogService);
837
		const telemetryService = new SpyingTelemetryService();
838

839
		const completedEvent = {
840
			type: 'response.completed',
841
			response: {
842
				id: 'resp_123',
843
				model: 'gpt-5-mini',
844
				created_at: 123,
845
				usage: {
846
					input_tokens: 11,
847
					output_tokens: 7,
848
					total_tokens: 18,
849
					input_tokens_details: { cached_tokens: 0 },
850
					output_tokens_details: { reasoning_tokens: 0 },
851
				},
852
				output: [
853
					{
854
						type: 'message',
855
						content: [{ type: 'output_text', text: 'final assistant reply' }],
856
					}
857
				],
858
			}
859
		};
860

861
		const response = createFakeStreamResponse(`data: ${JSON.stringify(completedEvent)}\n\n`);
862
		const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-1' }, {});
863

864
		const stream = await processResponseFromChatEndpoint(
865
			instantiationService,
866
			telemetryService,
867
			logService,
868
			response,
869
			1,
870
			async () => undefined,
871
			telemetryData
872
		);
873

874
		for await (const _ of stream) {
875
			// consume all completions to flush telemetry side effects
876
		}
877

878
		const events = telemetryService.getEvents().telemetryServiceEvents.filter(e => e.eventName === 'engine.messages');
879
		expect(events.length).toBeGreaterThan(0);
880

881
		const outputEvent = events[events.length - 1];
882
		const messagesJson = JSON.parse(String((outputEvent.properties as Record<string, string>)?.messagesJson));
883
		expect(messagesJson).toHaveLength(1);
884
		expect(messagesJson[0].role).toBe('assistant');
885
		expect(messagesJson[0].content).toBe('final assistant reply');
886

887
		accessor.dispose();
888
		services.dispose();
889
	});
890

891
	it('reconciles the newest compaction item from response.completed for the next request', async () => {
892
		const services = createPlatformServices();
893
		const accessor = services.createTestingAccessor();
894
		const instantiationService = accessor.get(IInstantiationService);
895
		const logService = accessor.get(ILogService);
896
		const telemetryService = new SpyingTelemetryService();
897
		const streamedCompactions: OpenAIContextManagementResponse[] = [];
898

899
		const olderCompaction = createCompactionResponse('cmp_old', 'enc_old');
900
		const newerCompaction = createCompactionResponse('cmp_new', 'enc_new');
901
		const compactionAddedEvent = {
902
			type: 'response.output_item.added',
903
			output_index: 0,
904
			item: olderCompaction,
905
		};
906
		const compactionEvent = {
907
			type: 'response.output_item.done',
908
			output_index: 0,
909
			item: olderCompaction,
910
		};
911
		const completedEvent = {
912
			type: 'response.completed',
913
			response: {
914
				id: 'resp_latest_compaction',
915
				model: 'gpt-5-mini',
916
				created_at: 123,
917
				usage: {
918
					input_tokens: 1200,
919
					output_tokens: 9,
920
					total_tokens: 1209,
921
					input_tokens_details: { cached_tokens: 0 },
922
					output_tokens_details: { reasoning_tokens: 0 },
923
				},
924
				output: [
925
					olderCompaction,
926
					{
927
						type: 'message',
928
						content: [{ type: 'output_text', text: 'reply' }],
929
					},
930
					newerCompaction,
931
				],
932
			}
933
		};
934

935
		const response = createFakeStreamResponse(`data: ${JSON.stringify(compactionAddedEvent)}\n\ndata: ${JSON.stringify(compactionEvent)}\n\ndata: ${JSON.stringify(completedEvent)}\n\n`);
936
		const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-latest-compaction' }, {});
937

938
		const stream = await processResponseFromChatEndpoint(
939
			instantiationService,
940
			telemetryService,
941
			logService,
942
			response,
943
			1,
944
			async (_text, _unused, delta) => {
945
				if (delta.contextManagement && isOpenAIContextManagementResponse(delta.contextManagement)) {
946
					streamedCompactions.push(delta.contextManagement);
947
				}
948
				return undefined;
949
			},
950
			telemetryData,
951
			1000
952
		);
953

954
		for await (const _ of stream) {
955
			// consume stream
956
		}
957

958
		expect(streamedCompactions.map(item => item.id)).toEqual(['cmp_old', 'cmp_new']);
959

960
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions([
961
			createCompactionAssistantMessage(streamedCompactions[streamedCompactions.length - 1]),
962
			{
963
				role: Raw.ChatRole.User,
964
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'continue' }],
965
			},
966
		], false), testEndpoint.model, testEndpoint));
967

968
		expect(body.input).toContainEqual({
969
			type: openAIContextManagementCompactionType,
970
			id: 'cmp_new',
971
			encrypted_content: 'enc_new',
972
		});
973
		expect(body.input).not.toContainEqual({
974
			type: openAIContextManagementCompactionType,
975
			id: 'cmp_old',
976
			encrypted_content: 'enc_old',
977
		});
978

979
		accessor.dispose();
980
		services.dispose();
981
	});
982

983
	it('does not emit compaction telemetry when compaction is disabled', async () => {
984
		const services = createPlatformServices();
985
		const accessor = services.createTestingAccessor();
986
		const instantiationService = accessor.get(IInstantiationService);
987
		const logService = accessor.get(ILogService);
988
		const telemetryService = new SpyingTelemetryService();
989

990
		const compactionEvent = {
991
			type: 'response.output_item.done',
992
			output_index: 0,
993
			item: {
994
				type: openAIContextManagementCompactionType,
995
				id: 'cmp_disabled',
996
				encrypted_content: 'enc',
997
			}
998
		};
999
		const completedEvent = {
1000
			type: 'response.completed',
1001
			response: {
1002
				id: 'resp_disabled',
1003
				model: 'gpt-5-mini',
1004
				created_at: 123,
1005
				usage: {
1006
					input_tokens: 1500,
1007
					output_tokens: 9,
1008
					total_tokens: 1509,
1009
					input_tokens_details: { cached_tokens: 0 },
1010
					output_tokens_details: { reasoning_tokens: 0 },
1011
				},
1012
				output: []
1013
			}
1014
		};
1015

1016
		const response = createFakeStreamResponse(`data: ${JSON.stringify(compactionEvent)}\n\ndata: ${JSON.stringify(completedEvent)}\n\n`);
1017
		const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-4' }, {});
1018

1019
		const stream = await processResponseFromChatEndpoint(
1020
			instantiationService,
1021
			telemetryService,
1022
			logService,
1023
			response,
1024
			1,
1025
			async () => undefined,
1026
			telemetryData,
1027
			undefined
1028
		);
1029

1030
		for await (const _ of stream) {
1031
			// consume stream
1032
		}
1033

1034
		const event = telemetryService.getEvents().telemetryServiceEvents.find(e => e.eventName === 'responsesApi.compactionOutcome');
1035
		expect(event).toBeUndefined();
1036

1037
		accessor.dispose();
1038
		services.dispose();
1039
	});
1040

1041
	it('captures compaction returned before output_item.done for the next request', async () => {
1042
		const services = createPlatformServices();
1043
		const accessor = services.createTestingAccessor();
1044
		const instantiationService = accessor.get(IInstantiationService);
1045
		const logService = accessor.get(ILogService);
1046
		const telemetryService = new SpyingTelemetryService();
1047
		const streamedCompactions: OpenAIContextManagementResponse[] = [];
1048

1049
		const earlyCompaction = createCompactionResponse('cmp_early', 'enc_early');
1050
		const compactionAddedEvent = {
1051
			type: 'response.output_item.added',
1052
			output_index: 0,
1053
			item: earlyCompaction,
1054
		};
1055
		const completedEvent = {
1056
			type: 'response.completed',
1057
			response: {
1058
				id: 'resp_early_compaction',
1059
				model: 'gpt-5-mini',
1060
				created_at: 123,
1061
				usage: {
1062
					input_tokens: 1200,
1063
					output_tokens: 9,
1064
					total_tokens: 1209,
1065
					input_tokens_details: { cached_tokens: 0 },
1066
					output_tokens_details: { reasoning_tokens: 0 },
1067
				},
1068
				output: [
1069
					{
1070
						type: 'message',
1071
						content: [{ type: 'output_text', text: 'reply' }],
1072
					},
1073
				],
1074
			}
1075
		};
1076

1077
		const response = createFakeStreamResponse(`data: ${JSON.stringify(compactionAddedEvent)}\n\ndata: ${JSON.stringify(completedEvent)}\n\n`);
1078
		const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-early-compaction' }, {});
1079

1080
		const stream = await processResponseFromChatEndpoint(
1081
			instantiationService,
1082
			telemetryService,
1083
			logService,
1084
			response,
1085
			1,
1086
			async (_text, _unused, delta) => {
1087
				if (delta.contextManagement && isOpenAIContextManagementResponse(delta.contextManagement)) {
1088
					streamedCompactions.push(delta.contextManagement);
1089
				}
1090
				return undefined;
1091
			},
1092
			telemetryData,
1093
			1000
1094
		);
1095

1096
		for await (const _ of stream) {
1097
			// consume stream
1098
		}
1099

1100
		expect(streamedCompactions.map(item => item.id)).toEqual(['cmp_early']);
1101

1102
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions([
1103
			createCompactionAssistantMessage(streamedCompactions[streamedCompactions.length - 1]),
1104
			{
1105
				role: Raw.ChatRole.User,
1106
				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'continue' }],
1107
			},
1108
		], false), testEndpoint.model, testEndpoint));
1109

1110
		expect(body.input).toContainEqual({
1111
			type: openAIContextManagementCompactionType,
1112
			id: 'cmp_early',
1113
			encrypted_content: 'enc_early',
1114
		});
1115

1116
		accessor.dispose();
1117
		services.dispose();
1118
	});
1119

1120
	it('emits telemetry when the server returns a compaction item', async () => {
1121
		const services = createPlatformServices();
1122
		const accessor = services.createTestingAccessor();
1123
		const instantiationService = accessor.get(IInstantiationService);
1124
		const logService = accessor.get(ILogService);
1125
		const telemetryService = new SpyingTelemetryService();
1126

1127
		const compactionEvent = {
1128
			type: 'response.output_item.done',
1129
			output_index: 0,
1130
			item: {
1131
				type: openAIContextManagementCompactionType,
1132
				id: 'cmp_123',
1133
				encrypted_content: 'enc',
1134
			}
1135
		};
1136
		const completedEvent = {
1137
			type: 'response.completed',
1138
			response: {
1139
				id: 'resp_456',
1140
				model: 'gpt-5-mini',
1141
				created_at: 123,
1142
				usage: {
1143
					input_tokens: 1200,
1144
					output_tokens: 7,
1145
					total_tokens: 1207,
1146
					input_tokens_details: { cached_tokens: 0 },
1147
					output_tokens_details: { reasoning_tokens: 0 },
1148
				},
1149
				output: []
1150
			}
1151
		};
1152

1153
		const response = createFakeStreamResponse(`data: ${JSON.stringify(compactionEvent)}\n\ndata: ${JSON.stringify(completedEvent)}\n\n`);
1154
		const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-2' }, {});
1155

1156
		const stream = await processResponseFromChatEndpoint(
1157
			instantiationService,
1158
			telemetryService,
1159
			logService,
1160
			response,
1161
			1,
1162
			async () => undefined,
1163
			telemetryData,
1164
			1000
1165
		);
1166

1167
		for await (const _ of stream) {
1168
			// consume stream
1169
		}
1170

1171
		const event = telemetryService.getEvents().telemetryServiceEvents.find(e => e.eventName === 'responsesApi.compactionOutcome');
1172
		expect(event).toBeDefined();
1173
		expect(event?.properties).toMatchObject({
1174
			outcome: 'compaction_returned',
1175
			model: 'gpt-5-mini',
1176
		});
1177
		expect(event?.measurements).toMatchObject({
1178
			compactThreshold: 1000,
1179
			promptTokens: 1200,
1180
			totalTokens: 1207,
1181
		});
1182

1183
		accessor.dispose();
1184
		services.dispose();
1185
	});
1186

1187
	it('emits telemetry when the server exceeds threshold without returning a compaction item', async () => {
1188
		const services = createPlatformServices();
1189
		const accessor = services.createTestingAccessor();
1190
		const instantiationService = accessor.get(IInstantiationService);
1191
		const logService = accessor.get(ILogService);
1192
		const telemetryService = new SpyingTelemetryService();
1193

1194
		const completedEvent = {
1195
			type: 'response.completed',
1196
			response: {
1197
				id: 'resp_789',
1198
				model: 'gpt-5-mini',
1199
				created_at: 123,
1200
				usage: {
1201
					input_tokens: 1500,
1202
					output_tokens: 9,
1203
					total_tokens: 1509,
1204
					input_tokens_details: { cached_tokens: 0 },
1205
					output_tokens_details: { reasoning_tokens: 0 },
1206
				},
1207
				output: [
1208
					{
1209
						type: 'message',
1210
						content: [{ type: 'output_text', text: 'reply' }],
1211
					}
1212
				]
1213
			}
1214
		};
1215

1216
		const response = createFakeStreamResponse(`data: ${JSON.stringify(completedEvent)}\n\n`);
1217
		const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-3' }, {});
1218

1219
		const stream = await processResponseFromChatEndpoint(
1220
			instantiationService,
1221
			telemetryService,
1222
			logService,
1223
			response,
1224
			1,
1225
			async () => undefined,
1226
			telemetryData,
1227
			1000
1228
		);
1229

1230
		for await (const _ of stream) {
1231
			// consume stream
1232
		}
1233

1234
		const event = telemetryService.getEvents().telemetryServiceEvents.find(e => e.eventName === 'responsesApi.compactionOutcome');
1235
		expect(event).toBeDefined();
1236
		expect(event?.properties).toMatchObject({
1237
			outcome: 'threshold_met_no_compaction',
1238
			model: 'gpt-5-mini',
1239
		});
1240
		expect(event?.measurements).toMatchObject({
1241
			compactThreshold: 1000,
1242
			promptTokens: 1500,
1243
			totalTokens: 1509,
1244
		});
1245

1246
		accessor.dispose();
1247
		services.dispose();
1248
	});
1249
});
1250

1251
describe('summarizedAtRoundId and stateful marker interaction', () => {
1252
	it('skips stateful marker when summarizedAtRoundId differs from connection', () => {
1253
		const services = createPlatformServices();
1254
		const wsManager: IChatWebSocketManager = {
1255
			_serviceBrand: undefined,
1256
			getOrCreateConnection: () => { throw new Error('not implemented'); },
1257
			hasActiveConnection: () => false,
1258
			getStatefulMarker: () => 'resp-prev',
1259
			getSummarizedAtRoundId: () => 'round-old',
1260
			closeConnection: () => { },
1261
			closeAll: () => { },
1262
		};
1263
		services.set(IChatWebSocketManager, wsManager);
1264
		const accessor = services.createTestingAccessor();
1265
		const instantiationService = accessor.get(IInstantiationService);
1266
		const messages: Raw.ChatMessage[] = [
1267
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'summarized history' }] },
1268
			createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'),
1269
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }] },
1270
		];
1271

1272
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(
1273
			servicesAccessor,
1274
			{ ...createRequestOptions(messages, true), conversationId: 'conv-1', summarizedAtRoundId: 'round-new' },
1275
			testEndpoint.model, testEndpoint,
1276
		));
1277

1278
		expect(body.previous_response_id).toBeUndefined();
1279
		expect(body.input).toHaveLength(2);
1280

1281
		accessor.dispose();
1282
		services.dispose();
1283
	});
1284

1285
	it('uses stateful marker when summarizedAtRoundId matches connection', () => {
1286
		const services = createPlatformServices();
1287
		const wsManager = new NullChatWebSocketManager();
1288
		wsManager.getStatefulMarker = () => 'resp-prev';
1289
		wsManager.getSummarizedAtRoundId = () => 'round-5';
1290
		services.set(IChatWebSocketManager, wsManager);
1291
		const accessor = services.createTestingAccessor();
1292
		const instantiationService = accessor.get(IInstantiationService);
1293
		const messages: Raw.ChatMessage[] = [
1294
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'summarized history' }] },
1295
			createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'),
1296
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }] },
1297
		];
1298

1299
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(
1300
			servicesAccessor,
1301
			{ ...createRequestOptions(messages, true), conversationId: 'conv-1', summarizedAtRoundId: 'round-5' },
1302
			testEndpoint.model, testEndpoint,
1303
		));
1304

1305
		expect(body.previous_response_id).toBe('resp-prev');
1306
		expect(body.input).toHaveLength(1);
1307

1308
		accessor.dispose();
1309
		services.dispose();
1310
	});
1311

1312
	it('uses stateful marker when both sides have no summary', () => {
1313
		const services = createPlatformServices();
1314
		const wsManager = new NullChatWebSocketManager();
1315
		wsManager.getStatefulMarker = () => 'resp-prev';
1316
		wsManager.getSummarizedAtRoundId = () => undefined;
1317
		services.set(IChatWebSocketManager, wsManager);
1318
		const accessor = services.createTestingAccessor();
1319
		const instantiationService = accessor.get(IInstantiationService);
1320
		const messages: Raw.ChatMessage[] = [
1321
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'first message' }] },
1322
			createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'),
1323
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'second message' }] },
1324
		];
1325

1326
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(
1327
			servicesAccessor,
1328
			{ ...createRequestOptions(messages, true), conversationId: 'conv-1' },
1329
			testEndpoint.model, testEndpoint,
1330
		));
1331

1332
		expect(body.previous_response_id).toBe('resp-prev');
1333
		expect(body.input).toHaveLength(1);
1334

1335
		accessor.dispose();
1336
		services.dispose();
1337
	});
1338

1339
	it('skips stateful marker when conversation is rolled back past summary', () => {
1340
		const services = createPlatformServices();
1341
		const wsManager = new NullChatWebSocketManager();
1342
		wsManager.getStatefulMarker = () => 'resp-prev';
1343
		wsManager.getSummarizedAtRoundId = () => 'round-5';
1344
		services.set(IChatWebSocketManager, wsManager);
1345
		const accessor = services.createTestingAccessor();
1346
		const instantiationService = accessor.get(IInstantiationService);
1347
		const messages: Raw.ChatMessage[] = [
1348
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'first message' }] },
1349
			createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'),
1350
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'second message' }] },
1351
		];
1352

1353
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(
1354
			servicesAccessor,
1355
			{ ...createRequestOptions(messages, true), conversationId: 'conv-1', summarizedAtRoundId: undefined },
1356
			testEndpoint.model, testEndpoint,
1357
		));
1358

1359
		expect(body.previous_response_id).toBeUndefined();
1360
		expect(body.input).toHaveLength(2);
1361

1362
		accessor.dispose();
1363
		services.dispose();
1364
	});
1365

1366
	it('skips stateful marker on first request after new summarization', () => {
1367
		const services = createPlatformServices();
1368
		const wsManager = new NullChatWebSocketManager();
1369
		wsManager.getStatefulMarker = () => 'resp-prev';
1370
		wsManager.getSummarizedAtRoundId = () => undefined;
1371
		services.set(IChatWebSocketManager, wsManager);
1372
		const accessor = services.createTestingAccessor();
1373
		const instantiationService = accessor.get(IInstantiationService);
1374
		const messages: Raw.ChatMessage[] = [
1375
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'summarized history' }] },
1376
			createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'),
1377
			{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }] },
1378
		];
1379

1380
		const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(
1381
			servicesAccessor,
1382
			{ ...createRequestOptions(messages, true), conversationId: 'conv-1', summarizedAtRoundId: 'round-new' },
1383
			testEndpoint.model, testEndpoint,
1384
		));
1385

1386
		expect(body.previous_response_id).toBeUndefined();
1387
		expect(body.input).toHaveLength(2);
1388

1389
		accessor.dispose();
1390
		services.dispose();
1391
	});
1392
});
1393

1394
describe('phase commentary followed by phase final_answer', () => {
1395
	it('inserts a separator between commentary and final_answer text in the stream', async () => {
1396
		const services = createPlatformServices();
1397
		const accessor = services.createTestingAccessor();
1398
		const instantiationService = accessor.get(IInstantiationService);
1399
		const logService = accessor.get(ILogService);
1400
		const telemetryService = new SpyingTelemetryService();
1401
		const accumulatedTexts: string[] = [];
1402
		const phases: string[] = [];
1403

1404
		const commentaryText = 'Responding directly in commentary as requested. My name is GitHub Copilot.';
1405
		const finalText = 'My name is GitHub Copilot.';
1406

1407
		// Real-world Responses API stream: commentary message (output_index 0)
1408
		// followed by final_answer message (output_index 1), with incremental
1409
		// text deltas for each.
1410
		const events = [
1411
			{ type: 'response.output_item.added', output_index: 0, item: { type: 'message', role: 'assistant', content: [], phase: 'commentary', status: 'in_progress' }, sequence_number: 2 },
1412
			{ type: 'response.content_part.added', output_index: 0, content_index: 0, item_id: 'item-0', part: { type: 'output_text', text: '', annotations: [], logprobs: [] }, sequence_number: 3 },
1413
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: 'Respond', logprobs: [], sequence_number: 4 },
1414
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: 'ing', logprobs: [], sequence_number: 5 },
1415
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: ' directly', logprobs: [], sequence_number: 6 },
1416
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: ' in', logprobs: [], sequence_number: 7 },
1417
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: ' commentary', logprobs: [], sequence_number: 8 },
1418
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: ' as', logprobs: [], sequence_number: 9 },
1419
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: ' requested', logprobs: [], sequence_number: 10 },
1420
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: '.', logprobs: [], sequence_number: 11 },
1421
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: ' My', logprobs: [], sequence_number: 12 },
1422
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: ' name', logprobs: [], sequence_number: 13 },
1423
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: ' is', logprobs: [], sequence_number: 14 },
1424
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: ' Git', logprobs: [], sequence_number: 15 },
1425
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: 'Hub', logprobs: [], sequence_number: 16 },
1426
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: ' Cop', logprobs: [], sequence_number: 17 },
1427
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: 'ilot', logprobs: [], sequence_number: 18 },
1428
			{ type: 'response.output_text.delta', output_index: 0, content_index: 0, item_id: 'item-0', delta: '.', logprobs: [], sequence_number: 19 },
1429
			{ type: 'response.output_text.done', output_index: 0, content_index: 0, item_id: 'item-0', text: commentaryText, logprobs: [], sequence_number: 20 },
1430
			{ type: 'response.content_part.done', output_index: 0, content_index: 0, item_id: 'item-0', part: { type: 'output_text', text: commentaryText, annotations: [], logprobs: [] }, sequence_number: 21 },
1431
			{ type: 'response.output_item.done', output_index: 0, item: { type: 'message', role: 'assistant', content: [{ type: 'output_text', text: commentaryText, annotations: [], logprobs: [] }], phase: 'commentary', status: 'completed' }, sequence_number: 22 },
1432
			{ type: 'response.output_item.added', output_index: 1, item: { type: 'message', role: 'assistant', content: [], phase: 'final_answer', status: 'in_progress' }, sequence_number: 23 },
1433
			{ type: 'response.content_part.added', output_index: 1, content_index: 0, item_id: 'item-1', part: { type: 'output_text', text: '', annotations: [], logprobs: [] }, sequence_number: 24 },
1434
			{ type: 'response.output_text.delta', output_index: 1, content_index: 0, item_id: 'item-1', delta: 'My', logprobs: [], sequence_number: 25 },
1435
			{ type: 'response.output_text.delta', output_index: 1, content_index: 0, item_id: 'item-1', delta: ' name', logprobs: [], sequence_number: 26 },
1436
			{ type: 'response.output_text.delta', output_index: 1, content_index: 0, item_id: 'item-1', delta: ' is', logprobs: [], sequence_number: 27 },
1437
			{ type: 'response.output_text.delta', output_index: 1, content_index: 0, item_id: 'item-1', delta: ' Git', logprobs: [], sequence_number: 28 },
1438
			{ type: 'response.output_text.delta', output_index: 1, content_index: 0, item_id: 'item-1', delta: 'Hub', logprobs: [], sequence_number: 29 },
1439
			{ type: 'response.output_text.delta', output_index: 1, content_index: 0, item_id: 'item-1', delta: ' Cop', logprobs: [], sequence_number: 30 },
1440
			{ type: 'response.output_text.delta', output_index: 1, content_index: 0, item_id: 'item-1', delta: 'ilot', logprobs: [], sequence_number: 31 },
1441
			{ type: 'response.output_text.delta', output_index: 1, content_index: 0, item_id: 'item-1', delta: '.', logprobs: [], sequence_number: 32 },
1442
			{ type: 'response.output_text.done', output_index: 1, content_index: 0, item_id: 'item-1', text: finalText, logprobs: [], sequence_number: 33 },
1443
			{ type: 'response.content_part.done', output_index: 1, content_index: 0, item_id: 'item-1', part: { type: 'output_text', text: finalText, annotations: [], logprobs: [] }, sequence_number: 34 },
1444
			{ type: 'response.output_item.done', output_index: 1, item: { type: 'message', role: 'assistant', content: [{ type: 'output_text', text: finalText, annotations: [], logprobs: [] }], phase: 'final_answer', status: 'completed' }, sequence_number: 35 },
1445
			{
1446
				type: 'response.completed',
1447
				response: {
1448
					id: 'resp_phase_test',
1449
					model: 'gpt-5.4-2026-03-05',
1450
					created_at: 1776962259,
1451
					usage: { input_tokens: 8432, output_tokens: 35, total_tokens: 8467, input_tokens_details: { cached_tokens: 0 }, output_tokens_details: { reasoning_tokens: 0 } },
1452
					output: [
1453
						{ type: 'message', content: [{ type: 'output_text', text: commentaryText, annotations: [], logprobs: [] }], phase: 'commentary', role: 'assistant', status: 'completed' },
1454
						{ type: 'message', content: [{ type: 'output_text', text: finalText, annotations: [], logprobs: [] }], phase: 'final_answer', role: 'assistant', status: 'completed' },
1455
					],
1456
				},
1457
				sequence_number: 36,
1458
			}
1459
		];
1460

1461
		const sseBody = events.map(e => `data: ${JSON.stringify(e)}\n\n`).join('');
1462
		const response = createFakeStreamResponse(sseBody);
1463
		const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-phase-test' }, {});
1464

1465
		const stream = await processResponseFromChatEndpoint(
1466
			instantiationService,
1467
			telemetryService,
1468
			logService,
1469
			response,
1470
			1,
1471
			async (text, _unused, delta) => {
1472
				accumulatedTexts.push(text);
1473
				if (delta.phase) {
1474
					phases.push(delta.phase);
1475
				}
1476
				return undefined;
1477
			},
1478
			telemetryData,
1479
		);
1480

1481
		for await (const _ of stream) {
1482
			// consume stream
1483
		}
1484

1485
		expect(phases).toEqual(['commentary', 'final_answer']);
1486

1487
		// The accumulated text must separate commentary and final_answer text
1488
		const finalAccumulatedText = accumulatedTexts[accumulatedTexts.length - 1];
1489
		expect(finalAccumulatedText).toBe(
1490
			commentaryText + '\n\n' + finalText
1491
		);
1492

1493
		accessor.dispose();
1494
		services.dispose();
1495
	});
1496
});
1497

1498
Product

Resources

Company