CoCalc -- responsesApi.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/responsesApi.ts
¹³⁴⁰¹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { Raw } from '@vscode/prompt-tsx';
7
import type { OpenAI } from 'openai';
8
import { Response } from '../../../platform/networking/common/fetcherService';
9
import { coalesce } from '../../../util/vs/base/common/arrays';
10
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
11
import { binaryIndexOf } from '../../../util/vs/base/common/buffer';
12
import { Lazy } from '../../../util/vs/base/common/lazy';
13
import { SSEParser } from '../../../util/vs/base/common/sseParser';
14
import { isDefined } from '../../../util/vs/base/common/types';
15
import { generateUuid } from '../../../util/vs/base/common/uuid';
16
import { IInstantiationService, ServicesAccessor } from '../../../util/vs/platform/instantiation/common/instantiation';
17
import { ChatLocation } from '../../chat/common/commonTypes';
18
import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';
19
import { ILogService } from '../../log/common/logService';
20
import { CUSTOM_TOOL_SEARCH_NAME } from '../../networking/common/anthropic';
21
import { FinishedCallback, getRequestId, IResponseDelta, OpenAiFunctionTool, OpenAiResponsesFunctionTool, OpenAiToolSearchTool } from '../../networking/common/fetch';
22
import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody } from '../../networking/common/networking';
23
import { ChatCompletion, FinishedCompletionReason, modelsWithoutResponsesContextManagement, openAIContextManagementCompactionType, OpenAIContextManagementResponse, rawMessageToCAPI, TokenLogProb } from '../../networking/common/openai';
24
import { IToolDeferralService } from '../../networking/common/toolDeferralService';
25
import { sendEngineMessagesTelemetry, sendResponsesApiCompactionTelemetry } from '../../networking/node/chatStream';
26
import { IChatWebSocketManager } from '../../networking/node/chatWebSocketManager';
27
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
28
import { ITelemetryService } from '../../telemetry/common/telemetry';
29
import { TelemetryData } from '../../telemetry/common/telemetryData';
30
import { getVerbosityForModelSync, isResponsesApiToolSearchEnabled } from '../common/chatModelCapabilities';
31
import { rawPartAsCompactionData } from '../common/compactionDataContainer';
32
import { rawPartAsPhaseData } from '../common/phaseDataContainer';
33
import { getIndexOfStatefulMarker, getStatefulMarkerAndIndex } from '../common/statefulMarkerContainer';
34
import { rawPartAsThinkingData } from '../common/thinkingDataContainer';
35
import { createResponsesStreamDumper } from './responsesApiDebugDump';
36

37
export function getResponsesApiCompactionThreshold(configService: IConfigurationService, expService: IExperimentationService, endpoint: IChatEndpoint): number | undefined {
38
	const contextManagementEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiContextManagementEnabled, expService) && !modelsWithoutResponsesContextManagement.has(endpoint.family);
39
	if (!contextManagementEnabled) {
40
		return undefined;
41
	}
42

43
	return endpoint.modelMaxPromptTokens > 0
44
		? Math.floor(endpoint.modelMaxPromptTokens * 0.9)
45
		: 50000;
46
}
47

48
export function createResponsesRequestBody(accessor: ServicesAccessor, options: ICreateEndpointBodyOptions, model: string, endpoint: IChatEndpoint): IEndpointBody {
49
	const configService = accessor.get(IConfigurationService);
50
	const expService = accessor.get(IExperimentationService);
51
	const verbosity = getVerbosityForModelSync(endpoint);
52
	const compactThreshold = getResponsesApiCompactionThreshold(configService, expService, endpoint);
53
	// compaction supported for all the models but works well for codex models and any future models after 5.3
54

55
	const webSocketStatefulMarker = resolveWebSocketStatefulMarker(accessor, options);
56
	// When WebSocket is in use, always defer to the WebSocket marker (which may be
57
	// undefined if the connection is new or the summary state changed). Never fall
58
	// back to the HTTP marker lookup in that case.
59
	const ignoreStatefulMarker = !!options.ignoreStatefulMarker || !!options.useWebSocket;
60
	const modeChanged = !!options.modeChanged;
61

62
	// Tool search: when enabled, split tools into non-deferred (included in the request) and deferred
63
	// (excluded from the request entirely). Uses OpenAI's client-executed tool search protocol: we add
64
	// { type: 'tool_search', execution: 'client' }. The model emits tool_search_call, which we handle via
65
	// our ToolSearchTool embeddings search, then round-trip as tool_search_output in the next request.
66
	const toolSearchEnabled = isResponsesApiToolSearchEnabled(endpoint, configService, expService);
67
	const isAllowedConversationAgent = options.location === ChatLocation.Agent || options.location === ChatLocation.MessagesProxy;
68
	const isSubagent = options.telemetryProperties?.subType?.startsWith('subagent') ?? false;
69
	const toolSearchInRequest = !!options.requestOptions?.tools?.some(t => t.function.name === CUSTOM_TOOL_SEARCH_NAME);
70
	const shouldDeferTools = toolSearchEnabled && isAllowedConversationAgent && !isSubagent && toolSearchInRequest;
71
	const toolDeferralService = shouldDeferTools ? accessor.get(IToolDeferralService) : undefined;
72

73
	type ResponsesFunctionTool = OpenAI.Responses.FunctionTool & OpenAiResponsesFunctionTool;
74
	const functionTools: ResponsesFunctionTool[] = [];
75
	if (options.requestOptions?.tools) {
76
		for (const tool of options.requestOptions.tools) {
77
			if (!tool.function.name || tool.function.name.length === 0) {
78
				continue;
79
			}
80
			// Always skip the tool_search function tool — 'tool_search' is a reserved namespace in the
81
			// Responses API. Client-executed tool search uses { type: 'tool_search', execution: 'client' } instead.
82
			if (tool.function.name === CUSTOM_TOOL_SEARCH_NAME) {
83
				continue;
84
			}
85
			const isDeferred = shouldDeferTools && !toolDeferralService!.isNonDeferredTool(tool.function.name);
86
			// Client-executed tool search: deferred tools are NOT sent in the request.
87
			// They are returned via tool_search_output when the model searches for them.
88
			if (isDeferred) {
89
				continue;
90
			}
91
			functionTools.push({
92
				...tool.function,
93
				type: 'function',
94
				strict: false,
95
				parameters: (tool.function.parameters || {}) as Record<string, unknown>,
96
			});
97
		}
98
	}
99

100
	// Build final tools array
101
	const finalTools: Array<ResponsesFunctionTool | OpenAiToolSearchTool | ClientToolSearchTool> = [...functionTools];
102
	if (shouldDeferTools) {
103
		// Client-executed tool search: the model emits tool_search_call, our ToolSearchTool
104
		// handles the embeddings search, and we return tool_search_output with full definitions.
105
		finalTools.unshift({
106
			type: 'tool_search',
107
			execution: 'client',
108
			description: 'Search for relevant tools by describing what you need. Returns tool definitions for tools matching your query.',
109
			parameters: {
110
				type: 'object',
111
				properties: {
112
					query: {
113
						type: 'string',
114
						description: 'Natural language description of what tool capability you are looking for.',
115
					},
116
				},
117
				required: ['query'],
118
			},
119
		} as ClientToolSearchTool);
120
	}
121

122
	const toolsMap = options.requestOptions?.tools
123
		? new Map(options.requestOptions.tools.map(t => [t.function.name, t]))
124
		: undefined;
125
	const shouldLoadToolFromToolSearch = shouldDeferTools ? (name: string) => !toolDeferralService!.isNonDeferredTool(name) : undefined;
126

127
	const body: IEndpointBody = {
128
		model,
129
		...rawMessagesToResponseAPI(model, options.messages, ignoreStatefulMarker, webSocketStatefulMarker, {
130
			toolsMap,
131
			shouldLoadToolFromToolSearch,
132
			modeChanged,
133
		}),
134
		stream: true,
135
		tools: finalTools.length > 0 ? finalTools : undefined,
136
		// Only a subset of completion post options are supported, and some
137
		// are renamed. Handle them manually:
138
		max_output_tokens: options.postOptions.max_tokens,
139
		tool_choice: typeof options.postOptions.tool_choice === 'object'
140
			? { type: 'function', name: options.postOptions.tool_choice.function.name }
141
			: options.postOptions.tool_choice,
142
		top_logprobs: options.postOptions.logprobs ? 3 : undefined,
143
		store: false,
144
		text: verbosity ? { verbosity } : undefined,
145
	};
146

147
	if (compactThreshold !== undefined) {
148
		body.context_management = [{
149
			'type': openAIContextManagementCompactionType,
150
			// Trigger compaction at 90% of the model max prompt context to keep headroom for active turns.
151
			'compact_threshold': compactThreshold
152
		}];
153
	}
154

155
	body.truncation = configService.getConfig(ConfigKey.Advanced.UseResponsesApiTruncation) ?
156
		'auto' :
157
		'disabled';
158
	const thinkingExplicitlyDisabled = options.modelCapabilities?.enableThinking === false;
159
	const summaryConfig = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiReasoningSummary, expService);
160
	const shouldDisableReasoningSummary = endpoint.family === 'gpt-5.3-codex-spark-preview' || thinkingExplicitlyDisabled;
161
	const effortFromSetting = configService.getConfig(ConfigKey.Advanced.ReasoningEffortOverride);
162
	const effort = endpoint.supportsReasoningEffort?.length
163
		? (effortFromSetting || options.modelCapabilities?.reasoningEffort || 'medium')
164
		: undefined;
165
	const summary = summaryConfig === 'off' || shouldDisableReasoningSummary ? undefined : summaryConfig;
166
	if (effort || summary) {
167
		body.reasoning = {
168
			...(effort ? { effort } : {}),
169
			...(summary ? { summary } : {})
170
		};
171
	}
172

173
	body.include = ['reasoning.encrypted_content'];
174

175
	const promptCacheKeyEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiPromptCacheKeyEnabled, expService);
176
	if (promptCacheKeyEnabled && options.conversationId) {
177
		body.prompt_cache_key = `${options.conversationId}:${endpoint.family}`;
178
	}
179

180
	return body;
181
}
182

183
export function getResponsesApiCompactionThresholdFromBody(body: Pick<IEndpointBody, 'context_management'>): number | undefined {
184
	const contextManagement = body.context_management;
185
	if (!Array.isArray(contextManagement)) {
186
		return undefined;
187
	}
188

189
	for (const item of contextManagement) {
190
		if (item.type === openAIContextManagementCompactionType && typeof item.compact_threshold === 'number') {
191
			return item.compact_threshold;
192
		}
193
	}
194

195
	return undefined;
196
}
197

198
interface ResponseInputAssistantTextContentPart {
199
	type: 'output_text';
200
	text: string;
201
}
202

203
interface ResponseInputAssistantMessageWithPhase {
204
	type: 'message';
205
	role: 'assistant';
206
	content: ResponseInputAssistantTextContentPart[];
207
	phase?: string;
208
}
209

210
interface ResponseOutputItemWithPhase {
211
	phase?: string;
212
}
213

214
// ── Responses API tool search types ──────────────────────────────────
215
// These match the shapes from https://developers.openai.com/api/docs/guides/tools-tool-search
216

217
/** Client-executed tool_search tool definition for the Responses API */
218
interface ClientToolSearchTool {
219
	type: 'tool_search';
220
	execution: 'client';
221
	description: string;
222
	parameters: Record<string, unknown>;
223
}
224

225
interface ResponsesToolSearchCall {
226
	type: 'tool_search_call';
227
	id: string;
228
	execution: 'client';
229
	call_id: string | null;
230
	status: string;
231
	arguments?: Record<string, unknown>;
232
}
233

234
/** Input item shape for a client-executed tool_search_call in conversation history */
235
interface ResponsesToolSearchCallInput {
236
	type: 'tool_search_call';
237
	execution: 'client';
238
	call_id: string;
239
	status: string;
240
	arguments: Record<string, unknown>;
241
}
242

243
/** Input item shape for a client-executed tool_search_output in conversation history */
244
interface ResponsesToolSearchOutputInput {
245
	type: 'tool_search_output';
246
	execution: 'client';
247
	call_id: string;
248
	status: string;
249
	tools: ToolSearchLoadedTool[];
250
}
251

252
/** A tool definition returned in tool_search_output */
253
interface ToolSearchLoadedTool {
254
	type: 'function';
255
	name: string;
256
	description: string;
257
	defer_loading: true;
258
	parameters: object;
259
}
260

261
interface LatestCompactionOutput {
262
	readonly item: OpenAIContextManagementResponse;
263
	readonly outputIndex: number;
264
}
265

266
type CompactionResponseOutputItem = OpenAI.Responses.ResponseOutputItem & OpenAIContextManagementResponse;
267

268
interface CompactionItemInChunk {
269
	readonly item: OpenAIContextManagementResponse;
270
	readonly outputIndex: number | undefined;
271
}
272

273
interface ResponseStreamEventWithOutputItem {
274
	readonly item: unknown;
275
	readonly output_index: number;
276
}
277

278
interface ResponseStreamEventWithResponseOutput {
279
	readonly response: {
280
		readonly output: OpenAI.Responses.ResponseOutputItem[];
281
	};
282
}
283

284
function resolveWebSocketStatefulMarker(accessor: ServicesAccessor, options: ICreateEndpointBodyOptions): string | undefined {
285
	if (options.ignoreStatefulMarker || !options.useWebSocket || !options.conversationId) {
286
		return undefined;
287
	}
288
	const wsManager = accessor.get(IChatWebSocketManager);
289
	// If client-side summarization state changed since the stateful marker
290
	// was stored (new summary, or rollback removing a summary), the server's
291
	// state no longer matches. Skip the marker so the full history is sent.
292
	const connSummarizedAt = wsManager.getSummarizedAtRoundId(options.conversationId);
293
	if (options.summarizedAtRoundId !== connSummarizedAt) {
294
		return undefined;
295
	}
296
	return wsManager.getStatefulMarker(options.conversationId);
297
}
298

299
interface RawMessagesToResponseAPIOptions {
300
	readonly toolsMap?: Map<string, OpenAiFunctionTool>;
301
	readonly shouldLoadToolFromToolSearch?: (name: string) => boolean;
302
	readonly modeChanged?: boolean;
303
}
304

305
function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMessage[], ignoreStatefulMarker: boolean, webSocketStatefulMarker: string | undefined, options: RawMessagesToResponseAPIOptions = {}): { input: OpenAI.Responses.ResponseInputItem[]; previous_response_id?: string } {
306
	const { toolsMap, shouldLoadToolFromToolSearch, modeChanged = false } = options;
307
	const latestCompactionMessageIndex = getLatestCompactionMessageIndex(messages);
308
	const latestCompactionMessage = latestCompactionMessageIndex !== undefined ? createCompactionRoundTripMessage(messages[latestCompactionMessageIndex]) : undefined;
309

310
	let previousResponseId: string | undefined;
311
	let markerIndex: number | undefined;
312

313
	if (webSocketStatefulMarker) {
314
		// WebSocket path: use the connection's current stateful marker if present in messages
315
		markerIndex = getIndexOfStatefulMarker(webSocketStatefulMarker, messages);
316
		if (markerIndex !== undefined) {
317
			previousResponseId = webSocketStatefulMarker;
318
		}
319
	} else if (!ignoreStatefulMarker) {
320
		// HTTP path: look up the latest marker for this model from messages
321
		const statefulMarkerAndIndex = getStatefulMarkerAndIndex(modelId, messages);
322
		if (statefulMarkerAndIndex) {
323
			previousResponseId = statefulMarkerAndIndex.statefulMarker;
324
			markerIndex = statefulMarkerAndIndex.index;
325
		}
326
	}
327

328
	if (modeChanged) {
329
		previousResponseId = undefined;
330
		markerIndex = undefined;
331
	}
332

333
	if (markerIndex !== undefined) {
334
		// Requests that resume from previous_response_id send only post-marker history,
335
		// but they still need the latest compaction item even when that item predates
336
		// the marker. This keeps both websocket and non-websocket traffic aligned.
337
		messages = messages.slice(markerIndex + 1);
338
		if (latestCompactionMessageIndex !== undefined) {
339
			if (latestCompactionMessageIndex > markerIndex) {
340
				messages = messages.slice(latestCompactionMessageIndex - (markerIndex + 1));
341
			} else if (latestCompactionMessage) {
342
				messages = [latestCompactionMessage, ...messages];
343
			}
344
		}
345
	} else if (latestCompactionMessageIndex !== undefined) {
346
		messages = messages.slice(latestCompactionMessageIndex);
347
	}
348

349
	// Track which call_ids are tool_search_calls (from client-executed tool search)
350
	const toolSearchCallIds = new Set<string>();
351
	// Track tool names loaded via tool_search_output — these need a namespace field on function_call
352
	const toolSearchLoadedTools = new Set<string>();
353

354
	const input: OpenAI.Responses.ResponseInputItem[] = [];
355
	for (const message of messages) {
356
		switch (message.role) {
357
			case Raw.ChatRole.Assistant:
358
				if (message.content.length) {
359
					input.push(...extractCompactionData(message.content));
360
					input.push(...extractThinkingData(message.content));
361
					const asstContent = message.content.map(rawContentToResponsesAssistantContent).filter(isDefined);
362
					if (asstContent.length) {
363
						const assistantMessage: ResponseInputAssistantMessageWithPhase = {
364
							role: 'assistant',
365
							content: asstContent,
366
							type: 'message',
367
							phase: extractPhaseData(message.content),
368
						};
369
						// The Responses API expects previous assistant message content as output_text/refusal,
370
						// but the SDK's ResponseOutputMessage type requires response-only id/status fields.
371
						input.push(assistantMessage as OpenAI.Responses.ResponseInputItem);
372
					}
373
				}
374
				if (message.toolCalls) {
375
					for (const toolCall of message.toolCalls) {
376
						if (toolCall.function.name === CUSTOM_TOOL_SEARCH_NAME) {
377
							// Client-executed tool search: emit as tool_search_call instead of function_call
378
							toolSearchCallIds.add(toolCall.id);
379
							let parsedArgs: Record<string, unknown> = {};
380
							try { parsedArgs = JSON.parse(toolCall.function.arguments || '{}'); } catch { }
381
							input.push({
382
								type: 'tool_search_call',
383
								execution: 'client',
384
								call_id: toolCall.id,
385
								status: 'completed',
386
								arguments: parsedArgs,
387
							} satisfies ResponsesToolSearchCallInput as unknown as OpenAI.Responses.ResponseInputItem);
388
						} else {
389
							// Tools loaded via tool_search need a namespace field to round-trip correctly
390
							const namespace = toolSearchLoadedTools.has(toolCall.function.name) ? toolCall.function.name : undefined;
391
							input.push({ type: 'function_call', name: toolCall.function.name, arguments: toolCall.function.arguments, call_id: toolCall.id, ...(namespace ? { namespace } : {}) });
392
						}
393
					}
394
				}
395
				break;
396
			case Raw.ChatRole.Tool:
397
				if (message.toolCallId) {
398
					if (toolSearchCallIds.has(message.toolCallId)) {
399
						// Client-executed tool search result: convert tool names to tool_search_output with full definitions
400
						const resultText = message.content
401
							.filter(c => c.type === Raw.ChatCompletionContentPartKind.Text)
402
							.map(c => c.text)
403
							.join('');
404
						const loadedTools = toolsMap ? buildToolSearchOutputTools(resultText, toolsMap, shouldLoadToolFromToolSearch) : [];
405
						for (const t of loadedTools) {
406
							toolSearchLoadedTools.add(t.name);
407
						}
408
						input.push({
409
							type: 'tool_search_output',
410
							execution: 'client',
411
							call_id: message.toolCallId,
412
							status: 'completed',
413
							tools: loadedTools,
414
						} satisfies ResponsesToolSearchOutputInput as unknown as OpenAI.Responses.ResponseInputItem);
415
					} else {
416
						const asText = message.content
417
							.filter(c => c.type === Raw.ChatCompletionContentPartKind.Text)
418
							.map(c => c.text)
419
							.join('');
420
						const asImages = message.content
421
							.filter(c => c.type === Raw.ChatCompletionContentPartKind.Image)
422
							.map((c): OpenAI.Responses.ResponseInputImage => ({
423
								type: 'input_image',
424
								detail: c.imageUrl.detail || 'auto',
425
								image_url: c.imageUrl.url,
426
							}));
427

428
						// todod@connor4312: hack while responses API only supports text output from tools
429
						input.push({ type: 'function_call_output', call_id: message.toolCallId, output: asText });
430
						if (asImages.length) {
431
							input.push({ role: 'user', content: [{ type: 'input_text', text: 'Image associated with the above tool call:' }, ...asImages] });
432
						}
433
					}
434
				}
435
				break;
436
			case Raw.ChatRole.User:
437
				input.push({ role: 'user', content: message.content.map(rawContentToResponsesContent).filter(isDefined) });
438
				break;
439
			case Raw.ChatRole.System:
440
				input.push({ role: 'system', content: message.content.map(rawContentToResponsesContent).filter(isDefined) });
441
				break;
442
		}
443
	}
444

445
	return { input, previous_response_id: previousResponseId };
446
}
447

448
/**
449
 * Converts a JSON array of tool names (from ToolSearchTool) into full tool definitions
450
 * for the tool_search_output. Falls back to an empty array on parse failure.
451
 */
452
function buildToolSearchOutputTools(resultText: string, toolsMap: Map<string, OpenAiFunctionTool>, shouldLoadToolFromToolSearch: ((name: string) => boolean) | undefined): ToolSearchLoadedTool[] {
453
	let toolNames: unknown;
454
	try { toolNames = JSON.parse(resultText); } catch { return []; }
455
	if (!Array.isArray(toolNames)) { return []; }
456

457
	return toolNames
458
		.filter((name): name is string => typeof name === 'string' && name !== CUSTOM_TOOL_SEARCH_NAME && toolsMap.has(name) && shouldLoadToolFromToolSearch?.(name) === true)
459
		.map(name => {
460
			const tool = toolsMap.get(name)!;
461
			return {
462
				type: 'function' as const,
463
				name: tool.function.name,
464
				description: tool.function.description || '',
465
				defer_loading: true as const,
466
				parameters: tool.function.parameters || { type: 'object', properties: {} },
467
			};
468
		});
469
}
470

471
function createCompactionRoundTripMessage(message: Raw.ChatMessage): Raw.ChatMessage | undefined {
472
	if (message.role !== Raw.ChatRole.Assistant) {
473
		return undefined;
474
	}
475

476
	const content = message.content.filter(part => part.type === Raw.ChatCompletionContentPartKind.Opaque && rawPartAsCompactionData(part));
477
	if (!content.length) {
478
		return undefined;
479
	}
480

481
	return {
482
		role: Raw.ChatRole.Assistant,
483
		content,
484
	};
485
}
486

487
function getLatestCompactionMessageIndex(messages: readonly Raw.ChatMessage[]): number | undefined {
488
	for (let idx = messages.length - 1; idx >= 0; idx--) {
489
		const message = messages[idx];
490
		for (const part of message.content) {
491
			if (part.type === Raw.ChatCompletionContentPartKind.Opaque && rawPartAsCompactionData(part)) {
492
				return idx;
493
			}
494
		}
495
	}
496

497
	return undefined;
498
}
499

500
function rawContentToResponsesContent(part: Raw.ChatCompletionContentPart): OpenAI.Responses.ResponseInputContent | undefined {
501
	switch (part.type) {
502
		case Raw.ChatCompletionContentPartKind.Text:
503
			return { type: 'input_text', text: part.text };
504
		case Raw.ChatCompletionContentPartKind.Image:
505
			return { type: 'input_image', detail: part.imageUrl.detail || 'auto', image_url: part.imageUrl.url };
506
		case Raw.ChatCompletionContentPartKind.Opaque: {
507
			const maybeCast = part.value as OpenAI.Responses.ResponseInputContent;
508
			if (maybeCast.type === 'input_text' || maybeCast.type === 'input_image' || maybeCast.type === 'input_file') {
509
				return maybeCast;
510
			}
511
		}
512
	}
513
}
514

515
function rawContentToResponsesAssistantContent(part: Raw.ChatCompletionContentPart): Pick<OpenAI.Responses.ResponseOutputText, 'type' | 'text'> | undefined {
516
	switch (part.type) {
517
		case Raw.ChatCompletionContentPartKind.Text:
518
			if (part.text.trim()) {
519
				return { type: 'output_text', text: part.text };
520
			}
521
	}
522
}
523

524
function extractThinkingData(content: Raw.ChatCompletionContentPart[]): OpenAI.Responses.ResponseReasoningItem[] {
525
	return coalesce(content.map(part => {
526
		if (part.type === Raw.ChatCompletionContentPartKind.Opaque) {
527
			const thinkingData = rawPartAsThinkingData(part);
528
			if (thinkingData) {
529
				return {
530
					type: 'reasoning',
531
					id: thinkingData.id,
532
					summary: [],
533
					encrypted_content: thinkingData.encrypted,
534
				} satisfies OpenAI.Responses.ResponseReasoningItem;
535
			}
536
		}
537
	}));
538
}
539

540
function extractPhaseData(content: Raw.ChatCompletionContentPart[]): string | undefined {
541
	for (const part of content) {
542
		if (part.type === Raw.ChatCompletionContentPartKind.Opaque) {
543
			const phase = rawPartAsPhaseData(part);
544
			if (phase) {
545
				return phase;
546
			}
547
		}
548
	}
549
	return undefined;
550
}
551

552
/**
553
 * Extracts compaction data from opaque content parts and converts them to
554
 * Responses API input items for round-tripping.
555
 */
556
function extractCompactionData(content: Raw.ChatCompletionContentPart[]): OpenAI.Responses.ResponseInputItem[] {
557
	return coalesce(content.map(part => {
558
		if (part.type === Raw.ChatCompletionContentPartKind.Opaque) {
559
			const compaction = rawPartAsCompactionData(part);
560
			if (compaction) {
561
				return {
562
					type: openAIContextManagementCompactionType,
563
					id: compaction.id,
564
					encrypted_content: compaction.encrypted_content,
565
				} as unknown as OpenAI.Responses.ResponseInputItem;
566
			}
567
		}
568
	}));
569
}
570

571
/**
572
 * This is an approximate responses input -> raw messages helper, should be used for logging only
573
 */
574
export function responseApiInputToRawMessagesForLogging(body: OpenAI.Responses.ResponseCreateParams): Raw.ChatMessage[] {
575
	const messages: Raw.ChatMessage[] = [];
576
	const pendingFunctionCalls: Raw.ChatMessageToolCall[] = [];
577

578
	const flushPendingFunctionCalls = () => {
579
		if (pendingFunctionCalls.length > 0) {
580
			messages.push({
581
				role: Raw.ChatRole.Assistant,
582
				content: [],
583
				toolCalls: pendingFunctionCalls.splice(0)
584
			});
585
		}
586
	};
587

588
	// Add system instructions if provided
589
	if (body.instructions) {
590
		messages.push({
591
			role: Raw.ChatRole.System,
592
			content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: body.instructions }]
593
		});
594
	}
595

596
	// Convert input to array format if it's a string
597
	const inputItems = typeof body.input === 'string' ? [{ role: 'user' as const, content: body.input, type: 'message' as const }] : (body.input ?? []);
598

599
	for (const item of inputItems) {
600
		// Handle message items with roles
601
		if ('role' in item) {
602
			switch (item.role) {
603
				case 'user':
604
					flushPendingFunctionCalls();
605
					messages.push({
606
						role: Raw.ChatRole.User,
607
						content: ensureContentArray(item.content).map(responseContentToRawContent).filter(isDefined)
608
					});
609
					break;
610
				case 'system':
611
				case 'developer':
612
					flushPendingFunctionCalls();
613
					messages.push({
614
						role: Raw.ChatRole.System,
615
						content: ensureContentArray(item.content).map(responseContentToRawContent).filter(isDefined)
616
					});
617
					break;
618
				case 'assistant':
619
					flushPendingFunctionCalls();
620
					if (isResponseOutputMessage(item)) {
621
						messages.push({
622
							role: Raw.ChatRole.Assistant,
623
							content: item.content.map(responseOutputToRawContent).filter(isDefined)
624
						});
625
					} else if (isResponseInputItemMessage(item)) {
626
						messages.push({
627
							role: Raw.ChatRole.Assistant,
628
							content: ensureContentArray(item.content).map(responseContentToRawContent).filter(isDefined)
629
						});
630
					}
631
					break;
632
			}
633
		} else if ('type' in item) {
634
			// Handle other item types without roles
635
			switch (item.type) {
636
				case 'function_call':
637
					// Collect function calls to be grouped with the next assistant message
638
					pendingFunctionCalls.push({
639
						id: item.call_id,
640
						type: 'function',
641
						function: {
642
							name: item.name,
643
							arguments: item.arguments
644
						}
645
					});
646
					break;
647
				case 'function_call_output': {
648
					flushPendingFunctionCalls();
649
					const content = responseFunctionOutputToRawContents(item.output);
650
					messages.push({
651
						role: Raw.ChatRole.Tool,
652
						content,
653
						toolCallId: item.call_id
654
					});
655
					break;
656
				}
657
				case 'reasoning':
658
					// We can't perfectly reconstruct the original thinking data
659
					// but we can add a placeholder for logging
660
					flushPendingFunctionCalls();
661
					messages.push({
662
						role: Raw.ChatRole.Assistant,
663
						content: [{
664
							type: Raw.ChatCompletionContentPartKind.Text,
665
							text: `Reasoning summary: ${item.summary.map(s => s.text).join('\n\n')}`
666
						}]
667
					});
668
					break;
669
				default: {
670
					// Client-executed tool search items (tool_search_call / tool_search_output)
671
					const tsItem = item as unknown as ResponsesToolSearchCallInput | ResponsesToolSearchOutputInput;
672
					if (tsItem.type === 'tool_search_call') {
673
						pendingFunctionCalls.push({
674
							id: tsItem.call_id,
675
							type: 'function',
676
							function: {
677
								name: CUSTOM_TOOL_SEARCH_NAME,
678
								arguments: JSON.stringify(tsItem.arguments ?? {}),
679
							}
680
						});
681
					} else if (tsItem.type === 'tool_search_output') {
682
						flushPendingFunctionCalls();
683
						const toolNames = tsItem.tools.map(t => t.name);
684
						messages.push({
685
							role: Raw.ChatRole.Tool,
686
							content: [{
687
								type: Raw.ChatCompletionContentPartKind.Text,
688
								text: JSON.stringify(toolNames),
689
							}],
690
							toolCallId: tsItem.call_id,
691
						});
692
					}
693
					break;
694
				}
695
			}
696
		}
697
	}
698

699
	// Flush any remaining function calls at the end
700
	if (pendingFunctionCalls.length > 0) {
701
		messages.push({
702
			role: Raw.ChatRole.Assistant,
703
			content: [],
704
			toolCalls: pendingFunctionCalls.splice(0)
705
		});
706
	}
707

708
	return messages;
709
}
710

711
function isResponseOutputMessage(item: OpenAI.Responses.ResponseInputItem): item is OpenAI.Responses.ResponseOutputMessage {
712
	return 'role' in item && item.role === 'assistant' && 'type' in item && item.type === 'message' && 'content' in item && Array.isArray(item.content);
713
}
714

715
function isResponseInputItemMessage(item: OpenAI.Responses.ResponseInputItem): item is OpenAI.Responses.ResponseInputItem.Message {
716
	return 'role' in item && item.role === 'assistant' && (!('type' in item) || item.type !== 'message');
717
}
718

719
function ensureContentArray(content: string | OpenAI.Responses.ResponseInputMessageContentList): OpenAI.Responses.ResponseInputMessageContentList {
720
	if (typeof content === 'string') {
721
		return [{ type: 'input_text', text: content }];
722
	}
723
	return content;
724
}
725

726
function responseContentToRawContent(part: OpenAI.Responses.ResponseInputContent | OpenAI.Responses.ResponseFunctionCallOutputItem): Raw.ChatCompletionContentPart | undefined {
727
	switch (part.type) {
728
		case 'input_text':
729
			return { type: Raw.ChatCompletionContentPartKind.Text, text: part.text };
730
		case 'input_image':
731
			return {
732
				type: Raw.ChatCompletionContentPartKind.Image,
733
				imageUrl: {
734
					url: part.image_url || '',
735
					detail: part.detail === 'auto' ?
736
						undefined :
737
						(part.detail ?? undefined)
738
				}
739
			};
740
		case 'input_file':
741
			// This is a rough approximation for logging
742
			return {
743
				type: Raw.ChatCompletionContentPartKind.Opaque,
744
				value: `[File Input - Filename: ${part.filename || 'unknown'}]`
745
			};
746
	}
747
}
748

749
function responseOutputToRawContent(part: OpenAI.Responses.ResponseOutputText | OpenAI.Responses.ResponseOutputRefusal): Raw.ChatCompletionContentPart | undefined {
750
	switch (part.type) {
751
		case 'output_text':
752
			return { type: Raw.ChatCompletionContentPartKind.Text, text: part.text };
753
		case 'refusal':
754
			return { type: Raw.ChatCompletionContentPartKind.Text, text: `[Refusal: ${part.refusal}]` };
755
	}
756
}
757

758
function responseFunctionOutputToRawContents(output: string | OpenAI.Responses.ResponseFunctionCallOutputItemList): Raw.ChatCompletionContentPart[] {
759
	if (typeof output === 'string') {
760
		return [{ type: Raw.ChatCompletionContentPartKind.Text, text: output }];
761
	}
762
	return coalesce(output.map(responseContentToRawContent));
763
}
764

765
function isCompactionItem(value: unknown): value is OpenAIContextManagementResponse {
766
	return typeof value === 'object' && value !== null && 'type' in value && String(value.type) === openAIContextManagementCompactionType;
767
}
768

769
function hasOutputItem(chunk: OpenAI.Responses.ResponseStreamEvent): chunk is OpenAI.Responses.ResponseStreamEvent & ResponseStreamEventWithOutputItem {
770
	return 'item' in chunk && 'output_index' in chunk && typeof chunk.output_index === 'number';
771
}
772

773
function hasResponseOutput(chunk: OpenAI.Responses.ResponseStreamEvent): chunk is OpenAI.Responses.ResponseStreamEvent & ResponseStreamEventWithResponseOutput {
774
	return 'response' in chunk && Array.isArray(chunk.response.output);
775
}
776

777
function getOutputItemIndex(chunk: ResponseStreamEventWithOutputItem): number {
778
	return chunk.output_index;
779
}
780

781
function isCompactionOutputItem(item: OpenAI.Responses.ResponseOutputItem): item is CompactionResponseOutputItem {
782
	return isCompactionItem(item);
783
}
784

785
function getLatestCompactionOutput(output: OpenAI.Responses.ResponseOutputItem[], preferredOutputIndex: number | undefined): LatestCompactionOutput | undefined {
786
	let latestCompactionOutput: LatestCompactionOutput | undefined;
787
	for (let idx = output.length - 1; idx >= 0; idx--) {
788
		const item = output[idx];
789
		if (isCompactionOutputItem(item)) {
790
			latestCompactionOutput = { item, outputIndex: idx };
791
			break;
792
		}
793
	}
794

795
	if (preferredOutputIndex !== undefined) {
796
		const preferredItem = output[preferredOutputIndex];
797
		if (preferredItem && isCompactionOutputItem(preferredItem) && (!latestCompactionOutput || preferredOutputIndex >= latestCompactionOutput.outputIndex)) {
798
			return { item: preferredItem, outputIndex: preferredOutputIndex };
799
		}
800
	}
801

802
	return latestCompactionOutput;
803
}
804

805
function keepLatestCompactionOutput(output: OpenAI.Responses.ResponseOutputItem[], preferredOutputIndex: number | undefined): OpenAI.Responses.ResponseOutputItem[] {
806
	const latestCompactionOutput = getLatestCompactionOutput(output, preferredOutputIndex);
807
	if (!latestCompactionOutput) {
808
		return output;
809
	}
810

811
	return output.filter((item, idx) => !isCompactionOutputItem(item) || idx === latestCompactionOutput.outputIndex);
812
}
813

814
export async function processResponseFromChatEndpoint(instantiationService: IInstantiationService, telemetryService: ITelemetryService, logService: ILogService, response: Response, expectedNumChoices: number, finishCallback: FinishedCallback, telemetryData: TelemetryData, compactionThreshold?: number): Promise<AsyncIterableObject<ChatCompletion>> {
815
	return new AsyncIterableObject<ChatCompletion>(async feed => {
816
		const requestId = response.headers.get('X-Request-ID') ?? generateUuid();
817
		const ghRequestId = response.headers.get('x-github-request-id') ?? '';
818
		const { serverExperiments } = getRequestId(response.headers);
819
		const processor = instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, requestId, ghRequestId, serverExperiments, compactionThreshold);
820
		const dumper = createResponsesStreamDumper(requestId, logService);
821
		const parser = new SSEParser((ev) => {
822
			try {
823
				logService.trace(`SSE: ${ev.data}`);
824
				const parsedData = JSON.parse(ev.data);
825
				const responseStreamEvent: OpenAI.Responses.ResponseStreamEvent = { type: ev.type, ...parsedData };
826
				dumper.logEvent(responseStreamEvent);
827
				const completion = processor.push(responseStreamEvent, finishCallback);
828
				if (completion) {
829
					sendCompletionOutputTelemetry(telemetryService, logService, completion, telemetryData);
830
					feed.emitOne(completion);
831
				}
832
			} catch (e) {
833
				feed.reject(e);
834
			}
835
		});
836

837
		for await (const chunk of response.body) {
838
			parser.feed(chunk);
839
		}
840
	}, async () => {
841
		await response.body.destroy();
842
	});
843
}
844

845
export function sendCompletionOutputTelemetry(telemetryService: ITelemetryService, logService: ILogService, completion: ChatCompletion, telemetryData: TelemetryData): void {
846
	const telemetryMessage = rawMessageToCAPI(completion.message);
847
	let telemetryDataWithUsage = telemetryData;
848
	if (completion.usage) {
849
		telemetryDataWithUsage = telemetryData.extendedBy({}, {
850
			promptTokens: completion.usage.prompt_tokens,
851
			completionTokens: completion.usage.completion_tokens,
852
			totalTokens: completion.usage.total_tokens,
853
			...(completion.usage.prompt_tokens_details && { cachedTokens: completion.usage.prompt_tokens_details.cached_tokens }),
854
			...(completion.usage.completion_tokens_details && {
855
				reasoningTokens: completion.usage.completion_tokens_details.reasoning_tokens,
856
				acceptedPredictionTokens: completion.usage.completion_tokens_details.accepted_prediction_tokens,
857
				rejectedPredictionTokens: completion.usage.completion_tokens_details.rejected_prediction_tokens,
858
			}),
859
		});
860
	}
861
	sendEngineMessagesTelemetry(telemetryService, [telemetryMessage], telemetryDataWithUsage, true, logService);
862
}
863

864
interface CapiResponsesTextDeltaEvent extends Omit<OpenAI.Responses.ResponseTextDeltaEvent, 'logprobs'> {
865
	logprobs: Array<OpenAI.Responses.ResponseTextDeltaEvent.Logprob> | undefined;
866
}
867

868
export class OpenAIResponsesProcessor {
869
	private textAccumulator: string = '';
870
	private hasReceivedReasoningSummary = false;
871
	private sawCompactionMessage = false;
872
	private latestCompactionOutputIndex: number | undefined;
873
	private latestCompactionItem: OpenAIContextManagementResponse | undefined;
874
	/** Tracks the output_index of the last text delta to detect output item boundaries */
875
	private lastTextDeltaOutputIndex: number | undefined;
876
	/** Maps output_index to { name, callId, arguments } for streaming tool call updates */
877
	private readonly toolCallInfo = new Map<number, { name: string; callId: string; arguments: string }>();
878

879
	constructor(
880
		private readonly telemetryData: TelemetryData,
881
		private readonly telemetryService: ITelemetryService,
882
		private readonly requestId: string,
883
		private readonly ghRequestId: string,
884
		private readonly serverExperiments: string,
885
		private readonly compactionThreshold: number | undefined,
886
		@ILogService private readonly logService: ILogService,
887
	) { }
888

889
	private getCompactionItemsInChunk(chunk: OpenAI.Responses.ResponseStreamEvent): CompactionItemInChunk[] {
890
		const compactionItems: CompactionItemInChunk[] = [];
891

892
		if (hasOutputItem(chunk) && isCompactionItem(chunk.item)) {
893
			const outputIndex = getOutputItemIndex(chunk);
894
			compactionItems.push({ item: chunk.item, outputIndex });
895
		}
896

897
		if (hasResponseOutput(chunk)) {
898
			for (let idx = 0; idx < chunk.response.output.length; idx++) {
899
				const item = chunk.response.output[idx];
900
				if (isCompactionItem(item)) {
901
					compactionItems.push({ item, outputIndex: idx });
902
				}
903
			}
904
		}
905

906
		return compactionItems;
907
	}
908

909
	private captureCompactionItem(item: OpenAIContextManagementResponse, outputIndex: number | undefined, onProgress: (delta: IResponseDelta) => undefined): void {
910
		if (outputIndex !== undefined && this.latestCompactionOutputIndex !== undefined && outputIndex < this.latestCompactionOutputIndex) {
911
			return;
912
		}
913

914
		const previousCompactionItem = this.latestCompactionItem;
915
		this.sawCompactionMessage = true;
916
		this.latestCompactionOutputIndex = outputIndex ?? this.latestCompactionOutputIndex;
917
		this.latestCompactionItem = item;
918

919
		if (previousCompactionItem?.id === item.id && previousCompactionItem.encrypted_content === item.encrypted_content) {
920
			return;
921
		}
922

923
		onProgress({
924
			text: '',
925
			contextManagement: {
926
				type: openAIContextManagementCompactionType,
927
				id: item.id,
928
				encrypted_content: item.encrypted_content,
929
			}
930
		});
931
	}
932

933
	public push(chunk: OpenAI.Responses.ResponseStreamEvent, _onProgress: FinishedCallback): ChatCompletion | undefined {
934
		const onProgress = (delta: IResponseDelta): undefined => {
935
			this.textAccumulator += delta.text;
936
			_onProgress(this.textAccumulator, 0, delta);
937
		};
938
		const compactionItems = this.getCompactionItemsInChunk(chunk);
939
		if (chunk.type !== 'response.completed') {
940
			for (const { item, outputIndex } of compactionItems) {
941
				this.captureCompactionItem(item, outputIndex, onProgress);
942
			}
943
		}
944

945
		switch (chunk.type) {
946
			case 'error':
947
				return onProgress({ text: '', copilotErrors: [{ agent: 'openai', code: chunk.code || 'unknown', message: chunk.message, type: 'error', identifier: chunk.param || undefined }] });
948
			case 'response.output_text.delta': {
949
				const capiChunk: CapiResponsesTextDeltaEvent = chunk;
950
				// When text arrives from a new output item, emit a paragraph
951
				// separator so that e.g. commentary and final text don't fuse.
952
				if (this.lastTextDeltaOutputIndex !== undefined && capiChunk.output_index !== this.lastTextDeltaOutputIndex) {
953
					onProgress({ text: '\n\n' });
954
				}
955
				this.lastTextDeltaOutputIndex = capiChunk.output_index;
956
				const haystack = new Lazy(() => new TextEncoder().encode(capiChunk.delta));
957
				return onProgress({
958
					text: capiChunk.delta,
959
					logprobs: capiChunk.logprobs && {
960
						content: capiChunk.logprobs.map(lp => ({
961
							...mapLogProp(haystack, lp),
962
							top_logprobs: lp.top_logprobs?.map(l => mapLogProp(haystack, l)) || []
963
						}))
964
					},
965
				});
966
			}
967
			case 'response.output_item.added':
968
				if (chunk.item.type === 'function_call') {
969
					this.toolCallInfo.set(chunk.output_index, { name: chunk.item.name, callId: chunk.item.call_id, arguments: '' });
970
					onProgress({
971
						text: '',
972
						beginToolCalls: [{ name: chunk.item.name, id: chunk.item.call_id }]
973
					});
974
				} else if (chunk.item.type.toString() === 'tool_search_call') {
975
					const tsItem = chunk.item as unknown as ResponsesToolSearchCall;
976
					if (tsItem.execution === 'client' && tsItem.call_id) {
977
						// Client-executed tool search: treat as a regular tool call so our ToolSearchTool handles it.
978
						this.toolCallInfo.set(chunk.output_index, { name: CUSTOM_TOOL_SEARCH_NAME, callId: tsItem.call_id, arguments: '' });
979
						onProgress({
980
							text: '',
981
							beginToolCalls: [{ name: CUSTOM_TOOL_SEARCH_NAME, id: tsItem.call_id }]
982
						});
983
					}
984
				}
985
				return;
986
			case 'response.function_call_arguments.delta': {
987
				const info = this.toolCallInfo.get(chunk.output_index);
988
				if (info) {
989
					info.arguments += chunk.delta;
990
					onProgress({
991
						text: '',
992
						copilotToolCallStreamUpdates: [{
993
							id: info.callId,
994
							name: info.name,
995
							arguments: info.arguments,
996
						}],
997
					});
998
				}
999
				return;
1000
			}
1001
			case 'response.output_item.done':
1002
				if (chunk.item.type === 'function_call') {
1003
					this.toolCallInfo.delete(chunk.output_index);
1004
					onProgress({
1005
						text: '',
1006
						copilotToolCalls: [{
1007
							id: chunk.item.call_id,
1008
							name: chunk.item.name,
1009
							arguments: chunk.item.arguments,
1010
						}],
1011
						phase: (chunk.item as ResponseOutputItemWithPhase).phase
1012
					});
1013
				} else if (chunk.item.type.toString() === 'tool_search_call') {
1014
					const tsCall = chunk.item as unknown as ResponsesToolSearchCall;
1015
					if (tsCall.execution === 'client' && tsCall.call_id) {
1016
						// Client-executed tool search completed: emit as a completed copilotToolCall
1017
						this.toolCallInfo.delete(chunk.output_index);
1018
						onProgress({
1019
							text: '',
1020
							copilotToolCalls: [{
1021
								id: tsCall.call_id,
1022
								name: CUSTOM_TOOL_SEARCH_NAME,
1023
								arguments: JSON.stringify(tsCall.arguments ?? {}),
1024
							}],
1025
						});
1026
					}
1027
				} else if (chunk.item.type === 'reasoning') {
1028
					onProgress({
1029
						text: '',
1030
						thinking: chunk.item.encrypted_content ? {
1031
							id: chunk.item.id,
1032
							// CAPI models don't stream the reasoning summary for some reason, byok do, so don't duplicate it
1033
							text: this.hasReceivedReasoningSummary ?
1034
								undefined :
1035
								chunk.item.summary.map(s => s.text),
1036
							encrypted: chunk.item.encrypted_content,
1037
						} : undefined
1038
					});
1039
				} else if (chunk.item.type === 'message') {
1040
					onProgress({
1041
						text: '',
1042
						phase: (chunk.item as ResponseOutputItemWithPhase).phase
1043
					});
1044
				}
1045
				return;
1046
			case 'response.reasoning_summary_text.delta':
1047
				this.hasReceivedReasoningSummary = true;
1048
				return onProgress({
1049
					text: '',
1050
					thinking: {
1051
						id: chunk.item_id,
1052
						text: chunk.delta,
1053
					}
1054
				});
1055
			case 'response.reasoning_summary_part.done':
1056
				this.hasReceivedReasoningSummary = true;
1057
				return onProgress({
1058
					text: '',
1059
					thinking: {
1060
						id: chunk.item_id
1061
					}
1062
				});
1063
			case 'response.completed': {
1064
				const normalizedOutput = keepLatestCompactionOutput(chunk.response.output, this.latestCompactionOutputIndex);
1065
				const latestCompactionOutput = getLatestCompactionOutput(normalizedOutput, this.latestCompactionOutputIndex);
1066
				const latestCompactionItem = latestCompactionOutput?.item;
1067
				const previousCompactionItem = this.latestCompactionItem;
1068
				if (latestCompactionItem) {
1069
					this.sawCompactionMessage = true;
1070
					this.latestCompactionOutputIndex = latestCompactionOutput.outputIndex;
1071
				}
1072

1073
				const shouldEmitResolvedCompaction = latestCompactionItem && (
1074
					!previousCompactionItem ||
1075
					previousCompactionItem.id !== latestCompactionItem.id ||
1076
					previousCompactionItem.encrypted_content !== latestCompactionItem.encrypted_content
1077
				);
1078
				if (latestCompactionItem) {
1079
					this.latestCompactionItem = latestCompactionItem;
1080
				}
1081
				if (this.compactionThreshold !== undefined && this.sawCompactionMessage) {
1082
					const promptTokens = chunk.response.usage?.input_tokens ?? 0;
1083
					const totalTokens = chunk.response.usage?.total_tokens ?? 0;
1084
					sendResponsesApiCompactionTelemetry(this.telemetryService, {
1085
						outcome: 'compaction_returned',
1086
						headerRequestId: this.requestId,
1087
						gitHubRequestId: this.ghRequestId,
1088
						model: chunk.response.model,
1089
					}, {
1090
						compactThreshold: this.compactionThreshold,
1091
						promptTokens,
1092
						totalTokens,
1093
					});
1094
					this.logService.debug(`[responsesAPI_compaction] Compaction enabled. headerRequestId=${this.requestId}`);
1095
				} else if (this.compactionThreshold !== undefined && (chunk.response.usage?.input_tokens ?? 0) >= this.compactionThreshold) {
1096
					const promptTokens = chunk.response.usage?.input_tokens ?? 0;
1097
					const totalTokens = chunk.response.usage?.total_tokens ?? 0;
1098
					sendResponsesApiCompactionTelemetry(this.telemetryService, {
1099
						outcome: 'threshold_met_no_compaction',
1100
						headerRequestId: this.requestId,
1101
						gitHubRequestId: this.ghRequestId,
1102
						model: chunk.response.model,
1103
					}, {
1104
						compactThreshold: this.compactionThreshold,
1105
						promptTokens,
1106
						totalTokens,
1107
					});
1108
					this.logService.debug(`[responsesAPI_compaction] Compaction enabled but context not compacted after threshold was met. headerRequestId=${this.requestId}, gitHubRequestId=${this.ghRequestId}, promptTokens=${promptTokens}, totalTokens=${totalTokens}`);
1109
				}
1110
				onProgress({
1111
					text: '',
1112
					statefulMarker: chunk.response.id,
1113
					contextManagement: shouldEmitResolvedCompaction ? latestCompactionItem : undefined,
1114
				});
1115
				return {
1116
					blockFinished: true,
1117
					choiceIndex: 0,
1118
					model: chunk.response.model,
1119
					tokens: [],
1120
					telemetryData: this.telemetryData,
1121
					requestId: { headerRequestId: this.requestId, gitHubRequestId: this.ghRequestId, completionId: chunk.response.id, created: chunk.response.created_at, deploymentId: '', serverExperiments: this.serverExperiments },
1122
					usage: {
1123
						prompt_tokens: chunk.response.usage?.input_tokens ?? 0,
1124
						completion_tokens: chunk.response.usage?.output_tokens ?? 0,
1125
						total_tokens: chunk.response.usage?.total_tokens ?? 0,
1126
						prompt_tokens_details: {
1127
							cached_tokens: chunk.response.usage?.input_tokens_details.cached_tokens ?? 0,
1128
						},
1129
						completion_tokens_details: {
1130
							reasoning_tokens: chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0,
1131
							accepted_prediction_tokens: 0,
1132
							rejected_prediction_tokens: 0,
1133
						},
1134
					},
1135
					finishReason: FinishedCompletionReason.Stop,
1136
					message: {
1137
						role: Raw.ChatRole.Assistant,
1138
						content: normalizedOutput.map((item): Raw.ChatCompletionContentPart | undefined => {
1139
							if (item.type === 'message') {
1140
								return { type: Raw.ChatCompletionContentPartKind.Text, text: item.content.map(c => c.type === 'output_text' ? c.text : c.refusal).join('') };
1141
							} else if (item.type === 'image_generation_call' && item.result) {
1142
								return { type: Raw.ChatCompletionContentPartKind.Image, imageUrl: { url: item.result } };
1143
							}
1144
						}).filter(isDefined),
1145
					}
1146
				};
1147
			}
1148
		}
1149
	}
1150
}
1151

1152
function mapLogProp(text: Lazy<Uint8Array>, lp: OpenAI.Responses.ResponseTextDeltaEvent.Logprob.TopLogprob): TokenLogProb {
1153
	let bytes: number[] = [];
1154
	if (lp.token) {
1155
		const needle = new TextEncoder().encode(lp.token);
1156
		const haystack = text.value;
1157
		const idx = binaryIndexOf(haystack, needle);
1158
		if (idx !== -1) {
1159
			bytes = [idx, idx + needle.length];
1160
		}
1161
	}
1162

1163
	return {
1164
		token: lp.token!,
1165
		bytes,
1166
		logprob: lp.logprob!,
1167
	};
1168
}
1169

1170
Product

Resources

Company