Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/responsesApi.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { Raw } from '@vscode/prompt-tsx';
7
import type { OpenAI } from 'openai';
8
import { Response } from '../../../platform/networking/common/fetcherService';
9
import { coalesce } from '../../../util/vs/base/common/arrays';
10
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
11
import { binaryIndexOf } from '../../../util/vs/base/common/buffer';
12
import { Lazy } from '../../../util/vs/base/common/lazy';
13
import { SSEParser } from '../../../util/vs/base/common/sseParser';
14
import { isDefined } from '../../../util/vs/base/common/types';
15
import { generateUuid } from '../../../util/vs/base/common/uuid';
16
import { IInstantiationService, ServicesAccessor } from '../../../util/vs/platform/instantiation/common/instantiation';
17
import { ChatLocation } from '../../chat/common/commonTypes';
18
import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';
19
import { ILogService } from '../../log/common/logService';
20
import { CUSTOM_TOOL_SEARCH_NAME } from '../../networking/common/anthropic';
21
import { FinishedCallback, getRequestId, IResponseDelta, OpenAiFunctionTool, OpenAiResponsesFunctionTool, OpenAiToolSearchTool } from '../../networking/common/fetch';
22
import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody } from '../../networking/common/networking';
23
import { ChatCompletion, FinishedCompletionReason, modelsWithoutResponsesContextManagement, openAIContextManagementCompactionType, OpenAIContextManagementResponse, rawMessageToCAPI, TokenLogProb } from '../../networking/common/openai';
24
import { IToolDeferralService } from '../../networking/common/toolDeferralService';
25
import { sendEngineMessagesTelemetry, sendResponsesApiCompactionTelemetry } from '../../networking/node/chatStream';
26
import { IChatWebSocketManager } from '../../networking/node/chatWebSocketManager';
27
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
28
import { ITelemetryService } from '../../telemetry/common/telemetry';
29
import { TelemetryData } from '../../telemetry/common/telemetryData';
30
import { getVerbosityForModelSync, isResponsesApiToolSearchEnabled } from '../common/chatModelCapabilities';
31
import { rawPartAsCompactionData } from '../common/compactionDataContainer';
32
import { rawPartAsPhaseData } from '../common/phaseDataContainer';
33
import { getIndexOfStatefulMarker, getStatefulMarkerAndIndex } from '../common/statefulMarkerContainer';
34
import { rawPartAsThinkingData } from '../common/thinkingDataContainer';
35
import { createResponsesStreamDumper } from './responsesApiDebugDump';
36
37
export function getResponsesApiCompactionThreshold(configService: IConfigurationService, expService: IExperimentationService, endpoint: IChatEndpoint): number | undefined {
38
const contextManagementEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiContextManagementEnabled, expService) && !modelsWithoutResponsesContextManagement.has(endpoint.family);
39
if (!contextManagementEnabled) {
40
return undefined;
41
}
42
43
return endpoint.modelMaxPromptTokens > 0
44
? Math.floor(endpoint.modelMaxPromptTokens * 0.9)
45
: 50000;
46
}
47
48
export function createResponsesRequestBody(accessor: ServicesAccessor, options: ICreateEndpointBodyOptions, model: string, endpoint: IChatEndpoint): IEndpointBody {
49
const configService = accessor.get(IConfigurationService);
50
const expService = accessor.get(IExperimentationService);
51
const verbosity = getVerbosityForModelSync(endpoint);
52
const compactThreshold = getResponsesApiCompactionThreshold(configService, expService, endpoint);
53
// compaction supported for all the models but works well for codex models and any future models after 5.3
54
55
const webSocketStatefulMarker = resolveWebSocketStatefulMarker(accessor, options);
56
// When WebSocket is in use, always defer to the WebSocket marker (which may be
57
// undefined if the connection is new or the summary state changed). Never fall
58
// back to the HTTP marker lookup in that case.
59
const ignoreStatefulMarker = !!options.ignoreStatefulMarker || !!options.useWebSocket;
60
const modeChanged = !!options.modeChanged;
61
62
// Tool search: when enabled, split tools into non-deferred (included in the request) and deferred
63
// (excluded from the request entirely). Uses OpenAI's client-executed tool search protocol: we add
64
// { type: 'tool_search', execution: 'client' }. The model emits tool_search_call, which we handle via
65
// our ToolSearchTool embeddings search, then round-trip as tool_search_output in the next request.
66
const toolSearchEnabled = isResponsesApiToolSearchEnabled(endpoint, configService, expService);
67
const isAllowedConversationAgent = options.location === ChatLocation.Agent || options.location === ChatLocation.MessagesProxy;
68
const isSubagent = options.telemetryProperties?.subType?.startsWith('subagent') ?? false;
69
const toolSearchInRequest = !!options.requestOptions?.tools?.some(t => t.function.name === CUSTOM_TOOL_SEARCH_NAME);
70
const shouldDeferTools = toolSearchEnabled && isAllowedConversationAgent && !isSubagent && toolSearchInRequest;
71
const toolDeferralService = shouldDeferTools ? accessor.get(IToolDeferralService) : undefined;
72
73
type ResponsesFunctionTool = OpenAI.Responses.FunctionTool & OpenAiResponsesFunctionTool;
74
const functionTools: ResponsesFunctionTool[] = [];
75
if (options.requestOptions?.tools) {
76
for (const tool of options.requestOptions.tools) {
77
if (!tool.function.name || tool.function.name.length === 0) {
78
continue;
79
}
80
// Always skip the tool_search function tool — 'tool_search' is a reserved namespace in the
81
// Responses API. Client-executed tool search uses { type: 'tool_search', execution: 'client' } instead.
82
if (tool.function.name === CUSTOM_TOOL_SEARCH_NAME) {
83
continue;
84
}
85
const isDeferred = shouldDeferTools && !toolDeferralService!.isNonDeferredTool(tool.function.name);
86
// Client-executed tool search: deferred tools are NOT sent in the request.
87
// They are returned via tool_search_output when the model searches for them.
88
if (isDeferred) {
89
continue;
90
}
91
functionTools.push({
92
...tool.function,
93
type: 'function',
94
strict: false,
95
parameters: (tool.function.parameters || {}) as Record<string, unknown>,
96
});
97
}
98
}
99
100
// Build final tools array
101
const finalTools: Array<ResponsesFunctionTool | OpenAiToolSearchTool | ClientToolSearchTool> = [...functionTools];
102
if (shouldDeferTools) {
103
// Client-executed tool search: the model emits tool_search_call, our ToolSearchTool
104
// handles the embeddings search, and we return tool_search_output with full definitions.
105
finalTools.unshift({
106
type: 'tool_search',
107
execution: 'client',
108
description: 'Search for relevant tools by describing what you need. Returns tool definitions for tools matching your query.',
109
parameters: {
110
type: 'object',
111
properties: {
112
query: {
113
type: 'string',
114
description: 'Natural language description of what tool capability you are looking for.',
115
},
116
},
117
required: ['query'],
118
},
119
} as ClientToolSearchTool);
120
}
121
122
const toolsMap = options.requestOptions?.tools
123
? new Map(options.requestOptions.tools.map(t => [t.function.name, t]))
124
: undefined;
125
const shouldLoadToolFromToolSearch = shouldDeferTools ? (name: string) => !toolDeferralService!.isNonDeferredTool(name) : undefined;
126
127
const body: IEndpointBody = {
128
model,
129
...rawMessagesToResponseAPI(model, options.messages, ignoreStatefulMarker, webSocketStatefulMarker, {
130
toolsMap,
131
shouldLoadToolFromToolSearch,
132
modeChanged,
133
}),
134
stream: true,
135
tools: finalTools.length > 0 ? finalTools : undefined,
136
// Only a subset of completion post options are supported, and some
137
// are renamed. Handle them manually:
138
max_output_tokens: options.postOptions.max_tokens,
139
tool_choice: typeof options.postOptions.tool_choice === 'object'
140
? { type: 'function', name: options.postOptions.tool_choice.function.name }
141
: options.postOptions.tool_choice,
142
top_logprobs: options.postOptions.logprobs ? 3 : undefined,
143
store: false,
144
text: verbosity ? { verbosity } : undefined,
145
};
146
147
if (compactThreshold !== undefined) {
148
body.context_management = [{
149
'type': openAIContextManagementCompactionType,
150
// Trigger compaction at 90% of the model max prompt context to keep headroom for active turns.
151
'compact_threshold': compactThreshold
152
}];
153
}
154
155
body.truncation = configService.getConfig(ConfigKey.Advanced.UseResponsesApiTruncation) ?
156
'auto' :
157
'disabled';
158
const thinkingExplicitlyDisabled = options.modelCapabilities?.enableThinking === false;
159
const summaryConfig = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiReasoningSummary, expService);
160
const shouldDisableReasoningSummary = endpoint.family === 'gpt-5.3-codex-spark-preview' || thinkingExplicitlyDisabled;
161
const effortFromSetting = configService.getConfig(ConfigKey.Advanced.ReasoningEffortOverride);
162
const effort = endpoint.supportsReasoningEffort?.length
163
? (effortFromSetting || options.modelCapabilities?.reasoningEffort || 'medium')
164
: undefined;
165
const summary = summaryConfig === 'off' || shouldDisableReasoningSummary ? undefined : summaryConfig;
166
if (effort || summary) {
167
body.reasoning = {
168
...(effort ? { effort } : {}),
169
...(summary ? { summary } : {})
170
};
171
}
172
173
body.include = ['reasoning.encrypted_content'];
174
175
const promptCacheKeyEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiPromptCacheKeyEnabled, expService);
176
if (promptCacheKeyEnabled && options.conversationId) {
177
body.prompt_cache_key = `${options.conversationId}:${endpoint.family}`;
178
}
179
180
return body;
181
}
182
183
export function getResponsesApiCompactionThresholdFromBody(body: Pick<IEndpointBody, 'context_management'>): number | undefined {
184
const contextManagement = body.context_management;
185
if (!Array.isArray(contextManagement)) {
186
return undefined;
187
}
188
189
for (const item of contextManagement) {
190
if (item.type === openAIContextManagementCompactionType && typeof item.compact_threshold === 'number') {
191
return item.compact_threshold;
192
}
193
}
194
195
return undefined;
196
}
197
198
interface ResponseInputAssistantTextContentPart {
199
type: 'output_text';
200
text: string;
201
}
202
203
interface ResponseInputAssistantMessageWithPhase {
204
type: 'message';
205
role: 'assistant';
206
content: ResponseInputAssistantTextContentPart[];
207
phase?: string;
208
}
209
210
interface ResponseOutputItemWithPhase {
211
phase?: string;
212
}
213
214
// ── Responses API tool search types ──────────────────────────────────
215
// These match the shapes from https://developers.openai.com/api/docs/guides/tools-tool-search
216
217
/** Client-executed tool_search tool definition for the Responses API */
218
interface ClientToolSearchTool {
219
type: 'tool_search';
220
execution: 'client';
221
description: string;
222
parameters: Record<string, unknown>;
223
}
224
225
interface ResponsesToolSearchCall {
226
type: 'tool_search_call';
227
id: string;
228
execution: 'client';
229
call_id: string | null;
230
status: string;
231
arguments?: Record<string, unknown>;
232
}
233
234
/** Input item shape for a client-executed tool_search_call in conversation history */
235
interface ResponsesToolSearchCallInput {
236
type: 'tool_search_call';
237
execution: 'client';
238
call_id: string;
239
status: string;
240
arguments: Record<string, unknown>;
241
}
242
243
/** Input item shape for a client-executed tool_search_output in conversation history */
244
interface ResponsesToolSearchOutputInput {
245
type: 'tool_search_output';
246
execution: 'client';
247
call_id: string;
248
status: string;
249
tools: ToolSearchLoadedTool[];
250
}
251
252
/** A tool definition returned in tool_search_output */
253
interface ToolSearchLoadedTool {
254
type: 'function';
255
name: string;
256
description: string;
257
defer_loading: true;
258
parameters: object;
259
}
260
261
interface LatestCompactionOutput {
262
readonly item: OpenAIContextManagementResponse;
263
readonly outputIndex: number;
264
}
265
266
type CompactionResponseOutputItem = OpenAI.Responses.ResponseOutputItem & OpenAIContextManagementResponse;
267
268
interface CompactionItemInChunk {
269
readonly item: OpenAIContextManagementResponse;
270
readonly outputIndex: number | undefined;
271
}
272
273
interface ResponseStreamEventWithOutputItem {
274
readonly item: unknown;
275
readonly output_index: number;
276
}
277
278
interface ResponseStreamEventWithResponseOutput {
279
readonly response: {
280
readonly output: OpenAI.Responses.ResponseOutputItem[];
281
};
282
}
283
284
function resolveWebSocketStatefulMarker(accessor: ServicesAccessor, options: ICreateEndpointBodyOptions): string | undefined {
285
if (options.ignoreStatefulMarker || !options.useWebSocket || !options.conversationId) {
286
return undefined;
287
}
288
const wsManager = accessor.get(IChatWebSocketManager);
289
// If client-side summarization state changed since the stateful marker
290
// was stored (new summary, or rollback removing a summary), the server's
291
// state no longer matches. Skip the marker so the full history is sent.
292
const connSummarizedAt = wsManager.getSummarizedAtRoundId(options.conversationId);
293
if (options.summarizedAtRoundId !== connSummarizedAt) {
294
return undefined;
295
}
296
return wsManager.getStatefulMarker(options.conversationId);
297
}
298
299
interface RawMessagesToResponseAPIOptions {
300
readonly toolsMap?: Map<string, OpenAiFunctionTool>;
301
readonly shouldLoadToolFromToolSearch?: (name: string) => boolean;
302
readonly modeChanged?: boolean;
303
}
304
305
function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMessage[], ignoreStatefulMarker: boolean, webSocketStatefulMarker: string | undefined, options: RawMessagesToResponseAPIOptions = {}): { input: OpenAI.Responses.ResponseInputItem[]; previous_response_id?: string } {
306
const { toolsMap, shouldLoadToolFromToolSearch, modeChanged = false } = options;
307
const latestCompactionMessageIndex = getLatestCompactionMessageIndex(messages);
308
const latestCompactionMessage = latestCompactionMessageIndex !== undefined ? createCompactionRoundTripMessage(messages[latestCompactionMessageIndex]) : undefined;
309
310
let previousResponseId: string | undefined;
311
let markerIndex: number | undefined;
312
313
if (webSocketStatefulMarker) {
314
// WebSocket path: use the connection's current stateful marker if present in messages
315
markerIndex = getIndexOfStatefulMarker(webSocketStatefulMarker, messages);
316
if (markerIndex !== undefined) {
317
previousResponseId = webSocketStatefulMarker;
318
}
319
} else if (!ignoreStatefulMarker) {
320
// HTTP path: look up the latest marker for this model from messages
321
const statefulMarkerAndIndex = getStatefulMarkerAndIndex(modelId, messages);
322
if (statefulMarkerAndIndex) {
323
previousResponseId = statefulMarkerAndIndex.statefulMarker;
324
markerIndex = statefulMarkerAndIndex.index;
325
}
326
}
327
328
if (modeChanged) {
329
previousResponseId = undefined;
330
markerIndex = undefined;
331
}
332
333
if (markerIndex !== undefined) {
334
// Requests that resume from previous_response_id send only post-marker history,
335
// but they still need the latest compaction item even when that item predates
336
// the marker. This keeps both websocket and non-websocket traffic aligned.
337
messages = messages.slice(markerIndex + 1);
338
if (latestCompactionMessageIndex !== undefined) {
339
if (latestCompactionMessageIndex > markerIndex) {
340
messages = messages.slice(latestCompactionMessageIndex - (markerIndex + 1));
341
} else if (latestCompactionMessage) {
342
messages = [latestCompactionMessage, ...messages];
343
}
344
}
345
} else if (latestCompactionMessageIndex !== undefined) {
346
messages = messages.slice(latestCompactionMessageIndex);
347
}
348
349
// Track which call_ids are tool_search_calls (from client-executed tool search)
350
const toolSearchCallIds = new Set<string>();
351
// Track tool names loaded via tool_search_output — these need a namespace field on function_call
352
const toolSearchLoadedTools = new Set<string>();
353
354
const input: OpenAI.Responses.ResponseInputItem[] = [];
355
for (const message of messages) {
356
switch (message.role) {
357
case Raw.ChatRole.Assistant:
358
if (message.content.length) {
359
input.push(...extractCompactionData(message.content));
360
input.push(...extractThinkingData(message.content));
361
const asstContent = message.content.map(rawContentToResponsesAssistantContent).filter(isDefined);
362
if (asstContent.length) {
363
const assistantMessage: ResponseInputAssistantMessageWithPhase = {
364
role: 'assistant',
365
content: asstContent,
366
type: 'message',
367
phase: extractPhaseData(message.content),
368
};
369
// The Responses API expects previous assistant message content as output_text/refusal,
370
// but the SDK's ResponseOutputMessage type requires response-only id/status fields.
371
input.push(assistantMessage as OpenAI.Responses.ResponseInputItem);
372
}
373
}
374
if (message.toolCalls) {
375
for (const toolCall of message.toolCalls) {
376
if (toolCall.function.name === CUSTOM_TOOL_SEARCH_NAME) {
377
// Client-executed tool search: emit as tool_search_call instead of function_call
378
toolSearchCallIds.add(toolCall.id);
379
let parsedArgs: Record<string, unknown> = {};
380
try { parsedArgs = JSON.parse(toolCall.function.arguments || '{}'); } catch { }
381
input.push({
382
type: 'tool_search_call',
383
execution: 'client',
384
call_id: toolCall.id,
385
status: 'completed',
386
arguments: parsedArgs,
387
} satisfies ResponsesToolSearchCallInput as unknown as OpenAI.Responses.ResponseInputItem);
388
} else {
389
// Tools loaded via tool_search need a namespace field to round-trip correctly
390
const namespace = toolSearchLoadedTools.has(toolCall.function.name) ? toolCall.function.name : undefined;
391
input.push({ type: 'function_call', name: toolCall.function.name, arguments: toolCall.function.arguments, call_id: toolCall.id, ...(namespace ? { namespace } : {}) });
392
}
393
}
394
}
395
break;
396
case Raw.ChatRole.Tool:
397
if (message.toolCallId) {
398
if (toolSearchCallIds.has(message.toolCallId)) {
399
// Client-executed tool search result: convert tool names to tool_search_output with full definitions
400
const resultText = message.content
401
.filter(c => c.type === Raw.ChatCompletionContentPartKind.Text)
402
.map(c => c.text)
403
.join('');
404
const loadedTools = toolsMap ? buildToolSearchOutputTools(resultText, toolsMap, shouldLoadToolFromToolSearch) : [];
405
for (const t of loadedTools) {
406
toolSearchLoadedTools.add(t.name);
407
}
408
input.push({
409
type: 'tool_search_output',
410
execution: 'client',
411
call_id: message.toolCallId,
412
status: 'completed',
413
tools: loadedTools,
414
} satisfies ResponsesToolSearchOutputInput as unknown as OpenAI.Responses.ResponseInputItem);
415
} else {
416
const asText = message.content
417
.filter(c => c.type === Raw.ChatCompletionContentPartKind.Text)
418
.map(c => c.text)
419
.join('');
420
const asImages = message.content
421
.filter(c => c.type === Raw.ChatCompletionContentPartKind.Image)
422
.map((c): OpenAI.Responses.ResponseInputImage => ({
423
type: 'input_image',
424
detail: c.imageUrl.detail || 'auto',
425
image_url: c.imageUrl.url,
426
}));
427
428
// todod@connor4312: hack while responses API only supports text output from tools
429
input.push({ type: 'function_call_output', call_id: message.toolCallId, output: asText });
430
if (asImages.length) {
431
input.push({ role: 'user', content: [{ type: 'input_text', text: 'Image associated with the above tool call:' }, ...asImages] });
432
}
433
}
434
}
435
break;
436
case Raw.ChatRole.User:
437
input.push({ role: 'user', content: message.content.map(rawContentToResponsesContent).filter(isDefined) });
438
break;
439
case Raw.ChatRole.System:
440
input.push({ role: 'system', content: message.content.map(rawContentToResponsesContent).filter(isDefined) });
441
break;
442
}
443
}
444
445
return { input, previous_response_id: previousResponseId };
446
}
447
448
/**
449
* Converts a JSON array of tool names (from ToolSearchTool) into full tool definitions
450
* for the tool_search_output. Falls back to an empty array on parse failure.
451
*/
452
function buildToolSearchOutputTools(resultText: string, toolsMap: Map<string, OpenAiFunctionTool>, shouldLoadToolFromToolSearch: ((name: string) => boolean) | undefined): ToolSearchLoadedTool[] {
453
let toolNames: unknown;
454
try { toolNames = JSON.parse(resultText); } catch { return []; }
455
if (!Array.isArray(toolNames)) { return []; }
456
457
return toolNames
458
.filter((name): name is string => typeof name === 'string' && name !== CUSTOM_TOOL_SEARCH_NAME && toolsMap.has(name) && shouldLoadToolFromToolSearch?.(name) === true)
459
.map(name => {
460
const tool = toolsMap.get(name)!;
461
return {
462
type: 'function' as const,
463
name: tool.function.name,
464
description: tool.function.description || '',
465
defer_loading: true as const,
466
parameters: tool.function.parameters || { type: 'object', properties: {} },
467
};
468
});
469
}
470
471
function createCompactionRoundTripMessage(message: Raw.ChatMessage): Raw.ChatMessage | undefined {
472
if (message.role !== Raw.ChatRole.Assistant) {
473
return undefined;
474
}
475
476
const content = message.content.filter(part => part.type === Raw.ChatCompletionContentPartKind.Opaque && rawPartAsCompactionData(part));
477
if (!content.length) {
478
return undefined;
479
}
480
481
return {
482
role: Raw.ChatRole.Assistant,
483
content,
484
};
485
}
486
487
function getLatestCompactionMessageIndex(messages: readonly Raw.ChatMessage[]): number | undefined {
488
for (let idx = messages.length - 1; idx >= 0; idx--) {
489
const message = messages[idx];
490
for (const part of message.content) {
491
if (part.type === Raw.ChatCompletionContentPartKind.Opaque && rawPartAsCompactionData(part)) {
492
return idx;
493
}
494
}
495
}
496
497
return undefined;
498
}
499
500
function rawContentToResponsesContent(part: Raw.ChatCompletionContentPart): OpenAI.Responses.ResponseInputContent | undefined {
501
switch (part.type) {
502
case Raw.ChatCompletionContentPartKind.Text:
503
return { type: 'input_text', text: part.text };
504
case Raw.ChatCompletionContentPartKind.Image:
505
return { type: 'input_image', detail: part.imageUrl.detail || 'auto', image_url: part.imageUrl.url };
506
case Raw.ChatCompletionContentPartKind.Opaque: {
507
const maybeCast = part.value as OpenAI.Responses.ResponseInputContent;
508
if (maybeCast.type === 'input_text' || maybeCast.type === 'input_image' || maybeCast.type === 'input_file') {
509
return maybeCast;
510
}
511
}
512
}
513
}
514
515
function rawContentToResponsesAssistantContent(part: Raw.ChatCompletionContentPart): Pick<OpenAI.Responses.ResponseOutputText, 'type' | 'text'> | undefined {
516
switch (part.type) {
517
case Raw.ChatCompletionContentPartKind.Text:
518
if (part.text.trim()) {
519
return { type: 'output_text', text: part.text };
520
}
521
}
522
}
523
524
function extractThinkingData(content: Raw.ChatCompletionContentPart[]): OpenAI.Responses.ResponseReasoningItem[] {
525
return coalesce(content.map(part => {
526
if (part.type === Raw.ChatCompletionContentPartKind.Opaque) {
527
const thinkingData = rawPartAsThinkingData(part);
528
if (thinkingData) {
529
return {
530
type: 'reasoning',
531
id: thinkingData.id,
532
summary: [],
533
encrypted_content: thinkingData.encrypted,
534
} satisfies OpenAI.Responses.ResponseReasoningItem;
535
}
536
}
537
}));
538
}
539
540
function extractPhaseData(content: Raw.ChatCompletionContentPart[]): string | undefined {
541
for (const part of content) {
542
if (part.type === Raw.ChatCompletionContentPartKind.Opaque) {
543
const phase = rawPartAsPhaseData(part);
544
if (phase) {
545
return phase;
546
}
547
}
548
}
549
return undefined;
550
}
551
552
/**
553
* Extracts compaction data from opaque content parts and converts them to
554
* Responses API input items for round-tripping.
555
*/
556
function extractCompactionData(content: Raw.ChatCompletionContentPart[]): OpenAI.Responses.ResponseInputItem[] {
557
return coalesce(content.map(part => {
558
if (part.type === Raw.ChatCompletionContentPartKind.Opaque) {
559
const compaction = rawPartAsCompactionData(part);
560
if (compaction) {
561
return {
562
type: openAIContextManagementCompactionType,
563
id: compaction.id,
564
encrypted_content: compaction.encrypted_content,
565
} as unknown as OpenAI.Responses.ResponseInputItem;
566
}
567
}
568
}));
569
}
570
571
/**
572
* This is an approximate responses input -> raw messages helper, should be used for logging only
573
*/
574
export function responseApiInputToRawMessagesForLogging(body: OpenAI.Responses.ResponseCreateParams): Raw.ChatMessage[] {
575
const messages: Raw.ChatMessage[] = [];
576
const pendingFunctionCalls: Raw.ChatMessageToolCall[] = [];
577
578
const flushPendingFunctionCalls = () => {
579
if (pendingFunctionCalls.length > 0) {
580
messages.push({
581
role: Raw.ChatRole.Assistant,
582
content: [],
583
toolCalls: pendingFunctionCalls.splice(0)
584
});
585
}
586
};
587
588
// Add system instructions if provided
589
if (body.instructions) {
590
messages.push({
591
role: Raw.ChatRole.System,
592
content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: body.instructions }]
593
});
594
}
595
596
// Convert input to array format if it's a string
597
const inputItems = typeof body.input === 'string' ? [{ role: 'user' as const, content: body.input, type: 'message' as const }] : (body.input ?? []);
598
599
for (const item of inputItems) {
600
// Handle message items with roles
601
if ('role' in item) {
602
switch (item.role) {
603
case 'user':
604
flushPendingFunctionCalls();
605
messages.push({
606
role: Raw.ChatRole.User,
607
content: ensureContentArray(item.content).map(responseContentToRawContent).filter(isDefined)
608
});
609
break;
610
case 'system':
611
case 'developer':
612
flushPendingFunctionCalls();
613
messages.push({
614
role: Raw.ChatRole.System,
615
content: ensureContentArray(item.content).map(responseContentToRawContent).filter(isDefined)
616
});
617
break;
618
case 'assistant':
619
flushPendingFunctionCalls();
620
if (isResponseOutputMessage(item)) {
621
messages.push({
622
role: Raw.ChatRole.Assistant,
623
content: item.content.map(responseOutputToRawContent).filter(isDefined)
624
});
625
} else if (isResponseInputItemMessage(item)) {
626
messages.push({
627
role: Raw.ChatRole.Assistant,
628
content: ensureContentArray(item.content).map(responseContentToRawContent).filter(isDefined)
629
});
630
}
631
break;
632
}
633
} else if ('type' in item) {
634
// Handle other item types without roles
635
switch (item.type) {
636
case 'function_call':
637
// Collect function calls to be grouped with the next assistant message
638
pendingFunctionCalls.push({
639
id: item.call_id,
640
type: 'function',
641
function: {
642
name: item.name,
643
arguments: item.arguments
644
}
645
});
646
break;
647
case 'function_call_output': {
648
flushPendingFunctionCalls();
649
const content = responseFunctionOutputToRawContents(item.output);
650
messages.push({
651
role: Raw.ChatRole.Tool,
652
content,
653
toolCallId: item.call_id
654
});
655
break;
656
}
657
case 'reasoning':
658
// We can't perfectly reconstruct the original thinking data
659
// but we can add a placeholder for logging
660
flushPendingFunctionCalls();
661
messages.push({
662
role: Raw.ChatRole.Assistant,
663
content: [{
664
type: Raw.ChatCompletionContentPartKind.Text,
665
text: `Reasoning summary: ${item.summary.map(s => s.text).join('\n\n')}`
666
}]
667
});
668
break;
669
default: {
670
// Client-executed tool search items (tool_search_call / tool_search_output)
671
const tsItem = item as unknown as ResponsesToolSearchCallInput | ResponsesToolSearchOutputInput;
672
if (tsItem.type === 'tool_search_call') {
673
pendingFunctionCalls.push({
674
id: tsItem.call_id,
675
type: 'function',
676
function: {
677
name: CUSTOM_TOOL_SEARCH_NAME,
678
arguments: JSON.stringify(tsItem.arguments ?? {}),
679
}
680
});
681
} else if (tsItem.type === 'tool_search_output') {
682
flushPendingFunctionCalls();
683
const toolNames = tsItem.tools.map(t => t.name);
684
messages.push({
685
role: Raw.ChatRole.Tool,
686
content: [{
687
type: Raw.ChatCompletionContentPartKind.Text,
688
text: JSON.stringify(toolNames),
689
}],
690
toolCallId: tsItem.call_id,
691
});
692
}
693
break;
694
}
695
}
696
}
697
}
698
699
// Flush any remaining function calls at the end
700
if (pendingFunctionCalls.length > 0) {
701
messages.push({
702
role: Raw.ChatRole.Assistant,
703
content: [],
704
toolCalls: pendingFunctionCalls.splice(0)
705
});
706
}
707
708
return messages;
709
}
710
711
function isResponseOutputMessage(item: OpenAI.Responses.ResponseInputItem): item is OpenAI.Responses.ResponseOutputMessage {
712
return 'role' in item && item.role === 'assistant' && 'type' in item && item.type === 'message' && 'content' in item && Array.isArray(item.content);
713
}
714
715
function isResponseInputItemMessage(item: OpenAI.Responses.ResponseInputItem): item is OpenAI.Responses.ResponseInputItem.Message {
716
return 'role' in item && item.role === 'assistant' && (!('type' in item) || item.type !== 'message');
717
}
718
719
function ensureContentArray(content: string | OpenAI.Responses.ResponseInputMessageContentList): OpenAI.Responses.ResponseInputMessageContentList {
720
if (typeof content === 'string') {
721
return [{ type: 'input_text', text: content }];
722
}
723
return content;
724
}
725
726
function responseContentToRawContent(part: OpenAI.Responses.ResponseInputContent | OpenAI.Responses.ResponseFunctionCallOutputItem): Raw.ChatCompletionContentPart | undefined {
727
switch (part.type) {
728
case 'input_text':
729
return { type: Raw.ChatCompletionContentPartKind.Text, text: part.text };
730
case 'input_image':
731
return {
732
type: Raw.ChatCompletionContentPartKind.Image,
733
imageUrl: {
734
url: part.image_url || '',
735
detail: part.detail === 'auto' ?
736
undefined :
737
(part.detail ?? undefined)
738
}
739
};
740
case 'input_file':
741
// This is a rough approximation for logging
742
return {
743
type: Raw.ChatCompletionContentPartKind.Opaque,
744
value: `[File Input - Filename: ${part.filename || 'unknown'}]`
745
};
746
}
747
}
748
749
function responseOutputToRawContent(part: OpenAI.Responses.ResponseOutputText | OpenAI.Responses.ResponseOutputRefusal): Raw.ChatCompletionContentPart | undefined {
750
switch (part.type) {
751
case 'output_text':
752
return { type: Raw.ChatCompletionContentPartKind.Text, text: part.text };
753
case 'refusal':
754
return { type: Raw.ChatCompletionContentPartKind.Text, text: `[Refusal: ${part.refusal}]` };
755
}
756
}
757
758
function responseFunctionOutputToRawContents(output: string | OpenAI.Responses.ResponseFunctionCallOutputItemList): Raw.ChatCompletionContentPart[] {
759
if (typeof output === 'string') {
760
return [{ type: Raw.ChatCompletionContentPartKind.Text, text: output }];
761
}
762
return coalesce(output.map(responseContentToRawContent));
763
}
764
765
function isCompactionItem(value: unknown): value is OpenAIContextManagementResponse {
766
return typeof value === 'object' && value !== null && 'type' in value && String(value.type) === openAIContextManagementCompactionType;
767
}
768
769
function hasOutputItem(chunk: OpenAI.Responses.ResponseStreamEvent): chunk is OpenAI.Responses.ResponseStreamEvent & ResponseStreamEventWithOutputItem {
770
return 'item' in chunk && 'output_index' in chunk && typeof chunk.output_index === 'number';
771
}
772
773
function hasResponseOutput(chunk: OpenAI.Responses.ResponseStreamEvent): chunk is OpenAI.Responses.ResponseStreamEvent & ResponseStreamEventWithResponseOutput {
774
return 'response' in chunk && Array.isArray(chunk.response.output);
775
}
776
777
function getOutputItemIndex(chunk: ResponseStreamEventWithOutputItem): number {
778
return chunk.output_index;
779
}
780
781
function isCompactionOutputItem(item: OpenAI.Responses.ResponseOutputItem): item is CompactionResponseOutputItem {
782
return isCompactionItem(item);
783
}
784
785
function getLatestCompactionOutput(output: OpenAI.Responses.ResponseOutputItem[], preferredOutputIndex: number | undefined): LatestCompactionOutput | undefined {
786
let latestCompactionOutput: LatestCompactionOutput | undefined;
787
for (let idx = output.length - 1; idx >= 0; idx--) {
788
const item = output[idx];
789
if (isCompactionOutputItem(item)) {
790
latestCompactionOutput = { item, outputIndex: idx };
791
break;
792
}
793
}
794
795
if (preferredOutputIndex !== undefined) {
796
const preferredItem = output[preferredOutputIndex];
797
if (preferredItem && isCompactionOutputItem(preferredItem) && (!latestCompactionOutput || preferredOutputIndex >= latestCompactionOutput.outputIndex)) {
798
return { item: preferredItem, outputIndex: preferredOutputIndex };
799
}
800
}
801
802
return latestCompactionOutput;
803
}
804
805
function keepLatestCompactionOutput(output: OpenAI.Responses.ResponseOutputItem[], preferredOutputIndex: number | undefined): OpenAI.Responses.ResponseOutputItem[] {
806
const latestCompactionOutput = getLatestCompactionOutput(output, preferredOutputIndex);
807
if (!latestCompactionOutput) {
808
return output;
809
}
810
811
return output.filter((item, idx) => !isCompactionOutputItem(item) || idx === latestCompactionOutput.outputIndex);
812
}
813
814
export async function processResponseFromChatEndpoint(instantiationService: IInstantiationService, telemetryService: ITelemetryService, logService: ILogService, response: Response, expectedNumChoices: number, finishCallback: FinishedCallback, telemetryData: TelemetryData, compactionThreshold?: number): Promise<AsyncIterableObject<ChatCompletion>> {
815
return new AsyncIterableObject<ChatCompletion>(async feed => {
816
const requestId = response.headers.get('X-Request-ID') ?? generateUuid();
817
const ghRequestId = response.headers.get('x-github-request-id') ?? '';
818
const { serverExperiments } = getRequestId(response.headers);
819
const processor = instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, requestId, ghRequestId, serverExperiments, compactionThreshold);
820
const dumper = createResponsesStreamDumper(requestId, logService);
821
const parser = new SSEParser((ev) => {
822
try {
823
logService.trace(`SSE: ${ev.data}`);
824
const parsedData = JSON.parse(ev.data);
825
const responseStreamEvent: OpenAI.Responses.ResponseStreamEvent = { type: ev.type, ...parsedData };
826
dumper.logEvent(responseStreamEvent);
827
const completion = processor.push(responseStreamEvent, finishCallback);
828
if (completion) {
829
sendCompletionOutputTelemetry(telemetryService, logService, completion, telemetryData);
830
feed.emitOne(completion);
831
}
832
} catch (e) {
833
feed.reject(e);
834
}
835
});
836
837
for await (const chunk of response.body) {
838
parser.feed(chunk);
839
}
840
}, async () => {
841
await response.body.destroy();
842
});
843
}
844
845
export function sendCompletionOutputTelemetry(telemetryService: ITelemetryService, logService: ILogService, completion: ChatCompletion, telemetryData: TelemetryData): void {
846
const telemetryMessage = rawMessageToCAPI(completion.message);
847
let telemetryDataWithUsage = telemetryData;
848
if (completion.usage) {
849
telemetryDataWithUsage = telemetryData.extendedBy({}, {
850
promptTokens: completion.usage.prompt_tokens,
851
completionTokens: completion.usage.completion_tokens,
852
totalTokens: completion.usage.total_tokens,
853
...(completion.usage.prompt_tokens_details && { cachedTokens: completion.usage.prompt_tokens_details.cached_tokens }),
854
...(completion.usage.completion_tokens_details && {
855
reasoningTokens: completion.usage.completion_tokens_details.reasoning_tokens,
856
acceptedPredictionTokens: completion.usage.completion_tokens_details.accepted_prediction_tokens,
857
rejectedPredictionTokens: completion.usage.completion_tokens_details.rejected_prediction_tokens,
858
}),
859
});
860
}
861
sendEngineMessagesTelemetry(telemetryService, [telemetryMessage], telemetryDataWithUsage, true, logService);
862
}
863
864
interface CapiResponsesTextDeltaEvent extends Omit<OpenAI.Responses.ResponseTextDeltaEvent, 'logprobs'> {
865
logprobs: Array<OpenAI.Responses.ResponseTextDeltaEvent.Logprob> | undefined;
866
}
867
868
export class OpenAIResponsesProcessor {
869
private textAccumulator: string = '';
870
private hasReceivedReasoningSummary = false;
871
private sawCompactionMessage = false;
872
private latestCompactionOutputIndex: number | undefined;
873
private latestCompactionItem: OpenAIContextManagementResponse | undefined;
874
/** Tracks the output_index of the last text delta to detect output item boundaries */
875
private lastTextDeltaOutputIndex: number | undefined;
876
/** Maps output_index to { name, callId, arguments } for streaming tool call updates */
877
private readonly toolCallInfo = new Map<number, { name: string; callId: string; arguments: string }>();
878
879
constructor(
880
private readonly telemetryData: TelemetryData,
881
private readonly telemetryService: ITelemetryService,
882
private readonly requestId: string,
883
private readonly ghRequestId: string,
884
private readonly serverExperiments: string,
885
private readonly compactionThreshold: number | undefined,
886
@ILogService private readonly logService: ILogService,
887
) { }
888
889
private getCompactionItemsInChunk(chunk: OpenAI.Responses.ResponseStreamEvent): CompactionItemInChunk[] {
890
const compactionItems: CompactionItemInChunk[] = [];
891
892
if (hasOutputItem(chunk) && isCompactionItem(chunk.item)) {
893
const outputIndex = getOutputItemIndex(chunk);
894
compactionItems.push({ item: chunk.item, outputIndex });
895
}
896
897
if (hasResponseOutput(chunk)) {
898
for (let idx = 0; idx < chunk.response.output.length; idx++) {
899
const item = chunk.response.output[idx];
900
if (isCompactionItem(item)) {
901
compactionItems.push({ item, outputIndex: idx });
902
}
903
}
904
}
905
906
return compactionItems;
907
}
908
909
private captureCompactionItem(item: OpenAIContextManagementResponse, outputIndex: number | undefined, onProgress: (delta: IResponseDelta) => undefined): void {
910
if (outputIndex !== undefined && this.latestCompactionOutputIndex !== undefined && outputIndex < this.latestCompactionOutputIndex) {
911
return;
912
}
913
914
const previousCompactionItem = this.latestCompactionItem;
915
this.sawCompactionMessage = true;
916
this.latestCompactionOutputIndex = outputIndex ?? this.latestCompactionOutputIndex;
917
this.latestCompactionItem = item;
918
919
if (previousCompactionItem?.id === item.id && previousCompactionItem.encrypted_content === item.encrypted_content) {
920
return;
921
}
922
923
onProgress({
924
text: '',
925
contextManagement: {
926
type: openAIContextManagementCompactionType,
927
id: item.id,
928
encrypted_content: item.encrypted_content,
929
}
930
});
931
}
932
933
public push(chunk: OpenAI.Responses.ResponseStreamEvent, _onProgress: FinishedCallback): ChatCompletion | undefined {
934
const onProgress = (delta: IResponseDelta): undefined => {
935
this.textAccumulator += delta.text;
936
_onProgress(this.textAccumulator, 0, delta);
937
};
938
const compactionItems = this.getCompactionItemsInChunk(chunk);
939
if (chunk.type !== 'response.completed') {
940
for (const { item, outputIndex } of compactionItems) {
941
this.captureCompactionItem(item, outputIndex, onProgress);
942
}
943
}
944
945
switch (chunk.type) {
946
case 'error':
947
return onProgress({ text: '', copilotErrors: [{ agent: 'openai', code: chunk.code || 'unknown', message: chunk.message, type: 'error', identifier: chunk.param || undefined }] });
948
case 'response.output_text.delta': {
949
const capiChunk: CapiResponsesTextDeltaEvent = chunk;
950
// When text arrives from a new output item, emit a paragraph
951
// separator so that e.g. commentary and final text don't fuse.
952
if (this.lastTextDeltaOutputIndex !== undefined && capiChunk.output_index !== this.lastTextDeltaOutputIndex) {
953
onProgress({ text: '\n\n' });
954
}
955
this.lastTextDeltaOutputIndex = capiChunk.output_index;
956
const haystack = new Lazy(() => new TextEncoder().encode(capiChunk.delta));
957
return onProgress({
958
text: capiChunk.delta,
959
logprobs: capiChunk.logprobs && {
960
content: capiChunk.logprobs.map(lp => ({
961
...mapLogProp(haystack, lp),
962
top_logprobs: lp.top_logprobs?.map(l => mapLogProp(haystack, l)) || []
963
}))
964
},
965
});
966
}
967
case 'response.output_item.added':
968
if (chunk.item.type === 'function_call') {
969
this.toolCallInfo.set(chunk.output_index, { name: chunk.item.name, callId: chunk.item.call_id, arguments: '' });
970
onProgress({
971
text: '',
972
beginToolCalls: [{ name: chunk.item.name, id: chunk.item.call_id }]
973
});
974
} else if (chunk.item.type.toString() === 'tool_search_call') {
975
const tsItem = chunk.item as unknown as ResponsesToolSearchCall;
976
if (tsItem.execution === 'client' && tsItem.call_id) {
977
// Client-executed tool search: treat as a regular tool call so our ToolSearchTool handles it.
978
this.toolCallInfo.set(chunk.output_index, { name: CUSTOM_TOOL_SEARCH_NAME, callId: tsItem.call_id, arguments: '' });
979
onProgress({
980
text: '',
981
beginToolCalls: [{ name: CUSTOM_TOOL_SEARCH_NAME, id: tsItem.call_id }]
982
});
983
}
984
}
985
return;
986
case 'response.function_call_arguments.delta': {
987
const info = this.toolCallInfo.get(chunk.output_index);
988
if (info) {
989
info.arguments += chunk.delta;
990
onProgress({
991
text: '',
992
copilotToolCallStreamUpdates: [{
993
id: info.callId,
994
name: info.name,
995
arguments: info.arguments,
996
}],
997
});
998
}
999
return;
1000
}
1001
case 'response.output_item.done':
1002
if (chunk.item.type === 'function_call') {
1003
this.toolCallInfo.delete(chunk.output_index);
1004
onProgress({
1005
text: '',
1006
copilotToolCalls: [{
1007
id: chunk.item.call_id,
1008
name: chunk.item.name,
1009
arguments: chunk.item.arguments,
1010
}],
1011
phase: (chunk.item as ResponseOutputItemWithPhase).phase
1012
});
1013
} else if (chunk.item.type.toString() === 'tool_search_call') {
1014
const tsCall = chunk.item as unknown as ResponsesToolSearchCall;
1015
if (tsCall.execution === 'client' && tsCall.call_id) {
1016
// Client-executed tool search completed: emit as a completed copilotToolCall
1017
this.toolCallInfo.delete(chunk.output_index);
1018
onProgress({
1019
text: '',
1020
copilotToolCalls: [{
1021
id: tsCall.call_id,
1022
name: CUSTOM_TOOL_SEARCH_NAME,
1023
arguments: JSON.stringify(tsCall.arguments ?? {}),
1024
}],
1025
});
1026
}
1027
} else if (chunk.item.type === 'reasoning') {
1028
onProgress({
1029
text: '',
1030
thinking: chunk.item.encrypted_content ? {
1031
id: chunk.item.id,
1032
// CAPI models don't stream the reasoning summary for some reason, byok do, so don't duplicate it
1033
text: this.hasReceivedReasoningSummary ?
1034
undefined :
1035
chunk.item.summary.map(s => s.text),
1036
encrypted: chunk.item.encrypted_content,
1037
} : undefined
1038
});
1039
} else if (chunk.item.type === 'message') {
1040
onProgress({
1041
text: '',
1042
phase: (chunk.item as ResponseOutputItemWithPhase).phase
1043
});
1044
}
1045
return;
1046
case 'response.reasoning_summary_text.delta':
1047
this.hasReceivedReasoningSummary = true;
1048
return onProgress({
1049
text: '',
1050
thinking: {
1051
id: chunk.item_id,
1052
text: chunk.delta,
1053
}
1054
});
1055
case 'response.reasoning_summary_part.done':
1056
this.hasReceivedReasoningSummary = true;
1057
return onProgress({
1058
text: '',
1059
thinking: {
1060
id: chunk.item_id
1061
}
1062
});
1063
case 'response.completed': {
1064
const normalizedOutput = keepLatestCompactionOutput(chunk.response.output, this.latestCompactionOutputIndex);
1065
const latestCompactionOutput = getLatestCompactionOutput(normalizedOutput, this.latestCompactionOutputIndex);
1066
const latestCompactionItem = latestCompactionOutput?.item;
1067
const previousCompactionItem = this.latestCompactionItem;
1068
if (latestCompactionItem) {
1069
this.sawCompactionMessage = true;
1070
this.latestCompactionOutputIndex = latestCompactionOutput.outputIndex;
1071
}
1072
1073
const shouldEmitResolvedCompaction = latestCompactionItem && (
1074
!previousCompactionItem ||
1075
previousCompactionItem.id !== latestCompactionItem.id ||
1076
previousCompactionItem.encrypted_content !== latestCompactionItem.encrypted_content
1077
);
1078
if (latestCompactionItem) {
1079
this.latestCompactionItem = latestCompactionItem;
1080
}
1081
if (this.compactionThreshold !== undefined && this.sawCompactionMessage) {
1082
const promptTokens = chunk.response.usage?.input_tokens ?? 0;
1083
const totalTokens = chunk.response.usage?.total_tokens ?? 0;
1084
sendResponsesApiCompactionTelemetry(this.telemetryService, {
1085
outcome: 'compaction_returned',
1086
headerRequestId: this.requestId,
1087
gitHubRequestId: this.ghRequestId,
1088
model: chunk.response.model,
1089
}, {
1090
compactThreshold: this.compactionThreshold,
1091
promptTokens,
1092
totalTokens,
1093
});
1094
this.logService.debug(`[responsesAPI_compaction] Compaction enabled. headerRequestId=${this.requestId}`);
1095
} else if (this.compactionThreshold !== undefined && (chunk.response.usage?.input_tokens ?? 0) >= this.compactionThreshold) {
1096
const promptTokens = chunk.response.usage?.input_tokens ?? 0;
1097
const totalTokens = chunk.response.usage?.total_tokens ?? 0;
1098
sendResponsesApiCompactionTelemetry(this.telemetryService, {
1099
outcome: 'threshold_met_no_compaction',
1100
headerRequestId: this.requestId,
1101
gitHubRequestId: this.ghRequestId,
1102
model: chunk.response.model,
1103
}, {
1104
compactThreshold: this.compactionThreshold,
1105
promptTokens,
1106
totalTokens,
1107
});
1108
this.logService.debug(`[responsesAPI_compaction] Compaction enabled but context not compacted after threshold was met. headerRequestId=${this.requestId}, gitHubRequestId=${this.ghRequestId}, promptTokens=${promptTokens}, totalTokens=${totalTokens}`);
1109
}
1110
onProgress({
1111
text: '',
1112
statefulMarker: chunk.response.id,
1113
contextManagement: shouldEmitResolvedCompaction ? latestCompactionItem : undefined,
1114
});
1115
return {
1116
blockFinished: true,
1117
choiceIndex: 0,
1118
model: chunk.response.model,
1119
tokens: [],
1120
telemetryData: this.telemetryData,
1121
requestId: { headerRequestId: this.requestId, gitHubRequestId: this.ghRequestId, completionId: chunk.response.id, created: chunk.response.created_at, deploymentId: '', serverExperiments: this.serverExperiments },
1122
usage: {
1123
prompt_tokens: chunk.response.usage?.input_tokens ?? 0,
1124
completion_tokens: chunk.response.usage?.output_tokens ?? 0,
1125
total_tokens: chunk.response.usage?.total_tokens ?? 0,
1126
prompt_tokens_details: {
1127
cached_tokens: chunk.response.usage?.input_tokens_details.cached_tokens ?? 0,
1128
},
1129
completion_tokens_details: {
1130
reasoning_tokens: chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0,
1131
accepted_prediction_tokens: 0,
1132
rejected_prediction_tokens: 0,
1133
},
1134
},
1135
finishReason: FinishedCompletionReason.Stop,
1136
message: {
1137
role: Raw.ChatRole.Assistant,
1138
content: normalizedOutput.map((item): Raw.ChatCompletionContentPart | undefined => {
1139
if (item.type === 'message') {
1140
return { type: Raw.ChatCompletionContentPartKind.Text, text: item.content.map(c => c.type === 'output_text' ? c.text : c.refusal).join('') };
1141
} else if (item.type === 'image_generation_call' && item.result) {
1142
return { type: Raw.ChatCompletionContentPartKind.Image, imageUrl: { url: item.result } };
1143
}
1144
}).filter(isDefined),
1145
}
1146
};
1147
}
1148
}
1149
}
1150
}
1151
1152
function mapLogProp(text: Lazy<Uint8Array>, lp: OpenAI.Responses.ResponseTextDeltaEvent.Logprob.TopLogprob): TokenLogProb {
1153
let bytes: number[] = [];
1154
if (lp.token) {
1155
const needle = new TextEncoder().encode(lp.token);
1156
const haystack = text.value;
1157
const idx = binaryIndexOf(haystack, needle);
1158
if (idx !== -1) {
1159
bytes = [idx, idx + needle.length];
1160
}
1161
}
1162
1163
return {
1164
token: lp.token!,
1165
bytes,
1166
logprob: lp.logprob!,
1167
};
1168
}
1169
1170