Path: blob/main/extensions/copilot/src/platform/endpoint/node/responsesApi.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Raw } from '@vscode/prompt-tsx';6import type { OpenAI } from 'openai';7import { Response } from '../../../platform/networking/common/fetcherService';8import { coalesce } from '../../../util/vs/base/common/arrays';9import { AsyncIterableObject } from '../../../util/vs/base/common/async';10import { binaryIndexOf } from '../../../util/vs/base/common/buffer';11import { Lazy } from '../../../util/vs/base/common/lazy';12import { SSEParser } from '../../../util/vs/base/common/sseParser';13import { isDefined } from '../../../util/vs/base/common/types';14import { generateUuid } from '../../../util/vs/base/common/uuid';15import { IInstantiationService, ServicesAccessor } from '../../../util/vs/platform/instantiation/common/instantiation';16import { ChatLocation } from '../../chat/common/commonTypes';17import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';18import { ILogService } from '../../log/common/logService';19import { CUSTOM_TOOL_SEARCH_NAME } from '../../networking/common/anthropic';20import { FinishedCallback, getRequestId, IResponseDelta, OpenAiFunctionTool, OpenAiResponsesFunctionTool, OpenAiToolSearchTool } from '../../networking/common/fetch';21import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody } from '../../networking/common/networking';22import { ChatCompletion, FinishedCompletionReason, modelsWithoutResponsesContextManagement, openAIContextManagementCompactionType, OpenAIContextManagementResponse, rawMessageToCAPI, TokenLogProb } from '../../networking/common/openai';23import { IToolDeferralService } from '../../networking/common/toolDeferralService';24import { sendEngineMessagesTelemetry, sendResponsesApiCompactionTelemetry } from '../../networking/node/chatStream';25import { IChatWebSocketManager } from '../../networking/node/chatWebSocketManager';26import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';27import { ITelemetryService } from '../../telemetry/common/telemetry';28import { TelemetryData } from '../../telemetry/common/telemetryData';29import { getVerbosityForModelSync, isResponsesApiToolSearchEnabled } from '../common/chatModelCapabilities';30import { rawPartAsCompactionData } from '../common/compactionDataContainer';31import { rawPartAsPhaseData } from '../common/phaseDataContainer';32import { getIndexOfStatefulMarker, getStatefulMarkerAndIndex } from '../common/statefulMarkerContainer';33import { rawPartAsThinkingData } from '../common/thinkingDataContainer';34import { createResponsesStreamDumper } from './responsesApiDebugDump';3536export function getResponsesApiCompactionThreshold(configService: IConfigurationService, expService: IExperimentationService, endpoint: IChatEndpoint): number | undefined {37const contextManagementEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiContextManagementEnabled, expService) && !modelsWithoutResponsesContextManagement.has(endpoint.family);38if (!contextManagementEnabled) {39return undefined;40}4142return endpoint.modelMaxPromptTokens > 043? Math.floor(endpoint.modelMaxPromptTokens * 0.9)44: 50000;45}4647export function createResponsesRequestBody(accessor: ServicesAccessor, options: ICreateEndpointBodyOptions, model: string, endpoint: IChatEndpoint): IEndpointBody {48const configService = accessor.get(IConfigurationService);49const expService = accessor.get(IExperimentationService);50const verbosity = getVerbosityForModelSync(endpoint);51const compactThreshold = getResponsesApiCompactionThreshold(configService, expService, endpoint);52// compaction supported for all the models but works well for codex models and any future models after 5.35354const webSocketStatefulMarker = resolveWebSocketStatefulMarker(accessor, options);55// When WebSocket is in use, always defer to the WebSocket marker (which may be56// undefined if the connection is new or the summary state changed). Never fall57// back to the HTTP marker lookup in that case.58const ignoreStatefulMarker = !!options.ignoreStatefulMarker || !!options.useWebSocket;59const modeChanged = !!options.modeChanged;6061// Tool search: when enabled, split tools into non-deferred (included in the request) and deferred62// (excluded from the request entirely). Uses OpenAI's client-executed tool search protocol: we add63// { type: 'tool_search', execution: 'client' }. The model emits tool_search_call, which we handle via64// our ToolSearchTool embeddings search, then round-trip as tool_search_output in the next request.65const toolSearchEnabled = isResponsesApiToolSearchEnabled(endpoint, configService, expService);66const isAllowedConversationAgent = options.location === ChatLocation.Agent || options.location === ChatLocation.MessagesProxy;67const isSubagent = options.telemetryProperties?.subType?.startsWith('subagent') ?? false;68const toolSearchInRequest = !!options.requestOptions?.tools?.some(t => t.function.name === CUSTOM_TOOL_SEARCH_NAME);69const shouldDeferTools = toolSearchEnabled && isAllowedConversationAgent && !isSubagent && toolSearchInRequest;70const toolDeferralService = shouldDeferTools ? accessor.get(IToolDeferralService) : undefined;7172type ResponsesFunctionTool = OpenAI.Responses.FunctionTool & OpenAiResponsesFunctionTool;73const functionTools: ResponsesFunctionTool[] = [];74if (options.requestOptions?.tools) {75for (const tool of options.requestOptions.tools) {76if (!tool.function.name || tool.function.name.length === 0) {77continue;78}79// Always skip the tool_search function tool — 'tool_search' is a reserved namespace in the80// Responses API. Client-executed tool search uses { type: 'tool_search', execution: 'client' } instead.81if (tool.function.name === CUSTOM_TOOL_SEARCH_NAME) {82continue;83}84const isDeferred = shouldDeferTools && !toolDeferralService!.isNonDeferredTool(tool.function.name);85// Client-executed tool search: deferred tools are NOT sent in the request.86// They are returned via tool_search_output when the model searches for them.87if (isDeferred) {88continue;89}90functionTools.push({91...tool.function,92type: 'function',93strict: false,94parameters: (tool.function.parameters || {}) as Record<string, unknown>,95});96}97}9899// Build final tools array100const finalTools: Array<ResponsesFunctionTool | OpenAiToolSearchTool | ClientToolSearchTool> = [...functionTools];101if (shouldDeferTools) {102// Client-executed tool search: the model emits tool_search_call, our ToolSearchTool103// handles the embeddings search, and we return tool_search_output with full definitions.104finalTools.unshift({105type: 'tool_search',106execution: 'client',107description: 'Search for relevant tools by describing what you need. Returns tool definitions for tools matching your query.',108parameters: {109type: 'object',110properties: {111query: {112type: 'string',113description: 'Natural language description of what tool capability you are looking for.',114},115},116required: ['query'],117},118} as ClientToolSearchTool);119}120121const toolsMap = options.requestOptions?.tools122? new Map(options.requestOptions.tools.map(t => [t.function.name, t]))123: undefined;124const shouldLoadToolFromToolSearch = shouldDeferTools ? (name: string) => !toolDeferralService!.isNonDeferredTool(name) : undefined;125126const body: IEndpointBody = {127model,128...rawMessagesToResponseAPI(model, options.messages, ignoreStatefulMarker, webSocketStatefulMarker, {129toolsMap,130shouldLoadToolFromToolSearch,131modeChanged,132}),133stream: true,134tools: finalTools.length > 0 ? finalTools : undefined,135// Only a subset of completion post options are supported, and some136// are renamed. Handle them manually:137max_output_tokens: options.postOptions.max_tokens,138tool_choice: typeof options.postOptions.tool_choice === 'object'139? { type: 'function', name: options.postOptions.tool_choice.function.name }140: options.postOptions.tool_choice,141top_logprobs: options.postOptions.logprobs ? 3 : undefined,142store: false,143text: verbosity ? { verbosity } : undefined,144};145146if (compactThreshold !== undefined) {147body.context_management = [{148'type': openAIContextManagementCompactionType,149// Trigger compaction at 90% of the model max prompt context to keep headroom for active turns.150'compact_threshold': compactThreshold151}];152}153154body.truncation = configService.getConfig(ConfigKey.Advanced.UseResponsesApiTruncation) ?155'auto' :156'disabled';157const thinkingExplicitlyDisabled = options.modelCapabilities?.enableThinking === false;158const summaryConfig = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiReasoningSummary, expService);159const shouldDisableReasoningSummary = endpoint.family === 'gpt-5.3-codex-spark-preview' || thinkingExplicitlyDisabled;160const effortFromSetting = configService.getConfig(ConfigKey.Advanced.ReasoningEffortOverride);161const effort = endpoint.supportsReasoningEffort?.length162? (effortFromSetting || options.modelCapabilities?.reasoningEffort || 'medium')163: undefined;164const summary = summaryConfig === 'off' || shouldDisableReasoningSummary ? undefined : summaryConfig;165if (effort || summary) {166body.reasoning = {167...(effort ? { effort } : {}),168...(summary ? { summary } : {})169};170}171172body.include = ['reasoning.encrypted_content'];173174const promptCacheKeyEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiPromptCacheKeyEnabled, expService);175if (promptCacheKeyEnabled && options.conversationId) {176body.prompt_cache_key = `${options.conversationId}:${endpoint.family}`;177}178179return body;180}181182export function getResponsesApiCompactionThresholdFromBody(body: Pick<IEndpointBody, 'context_management'>): number | undefined {183const contextManagement = body.context_management;184if (!Array.isArray(contextManagement)) {185return undefined;186}187188for (const item of contextManagement) {189if (item.type === openAIContextManagementCompactionType && typeof item.compact_threshold === 'number') {190return item.compact_threshold;191}192}193194return undefined;195}196197interface ResponseInputAssistantTextContentPart {198type: 'output_text';199text: string;200}201202interface ResponseInputAssistantMessageWithPhase {203type: 'message';204role: 'assistant';205content: ResponseInputAssistantTextContentPart[];206phase?: string;207}208209interface ResponseOutputItemWithPhase {210phase?: string;211}212213// ── Responses API tool search types ──────────────────────────────────214// These match the shapes from https://developers.openai.com/api/docs/guides/tools-tool-search215216/** Client-executed tool_search tool definition for the Responses API */217interface ClientToolSearchTool {218type: 'tool_search';219execution: 'client';220description: string;221parameters: Record<string, unknown>;222}223224interface ResponsesToolSearchCall {225type: 'tool_search_call';226id: string;227execution: 'client';228call_id: string | null;229status: string;230arguments?: Record<string, unknown>;231}232233/** Input item shape for a client-executed tool_search_call in conversation history */234interface ResponsesToolSearchCallInput {235type: 'tool_search_call';236execution: 'client';237call_id: string;238status: string;239arguments: Record<string, unknown>;240}241242/** Input item shape for a client-executed tool_search_output in conversation history */243interface ResponsesToolSearchOutputInput {244type: 'tool_search_output';245execution: 'client';246call_id: string;247status: string;248tools: ToolSearchLoadedTool[];249}250251/** A tool definition returned in tool_search_output */252interface ToolSearchLoadedTool {253type: 'function';254name: string;255description: string;256defer_loading: true;257parameters: object;258}259260interface LatestCompactionOutput {261readonly item: OpenAIContextManagementResponse;262readonly outputIndex: number;263}264265type CompactionResponseOutputItem = OpenAI.Responses.ResponseOutputItem & OpenAIContextManagementResponse;266267interface CompactionItemInChunk {268readonly item: OpenAIContextManagementResponse;269readonly outputIndex: number | undefined;270}271272interface ResponseStreamEventWithOutputItem {273readonly item: unknown;274readonly output_index: number;275}276277interface ResponseStreamEventWithResponseOutput {278readonly response: {279readonly output: OpenAI.Responses.ResponseOutputItem[];280};281}282283function resolveWebSocketStatefulMarker(accessor: ServicesAccessor, options: ICreateEndpointBodyOptions): string | undefined {284if (options.ignoreStatefulMarker || !options.useWebSocket || !options.conversationId) {285return undefined;286}287const wsManager = accessor.get(IChatWebSocketManager);288// If client-side summarization state changed since the stateful marker289// was stored (new summary, or rollback removing a summary), the server's290// state no longer matches. Skip the marker so the full history is sent.291const connSummarizedAt = wsManager.getSummarizedAtRoundId(options.conversationId);292if (options.summarizedAtRoundId !== connSummarizedAt) {293return undefined;294}295return wsManager.getStatefulMarker(options.conversationId);296}297298interface RawMessagesToResponseAPIOptions {299readonly toolsMap?: Map<string, OpenAiFunctionTool>;300readonly shouldLoadToolFromToolSearch?: (name: string) => boolean;301readonly modeChanged?: boolean;302}303304function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMessage[], ignoreStatefulMarker: boolean, webSocketStatefulMarker: string | undefined, options: RawMessagesToResponseAPIOptions = {}): { input: OpenAI.Responses.ResponseInputItem[]; previous_response_id?: string } {305const { toolsMap, shouldLoadToolFromToolSearch, modeChanged = false } = options;306const latestCompactionMessageIndex = getLatestCompactionMessageIndex(messages);307const latestCompactionMessage = latestCompactionMessageIndex !== undefined ? createCompactionRoundTripMessage(messages[latestCompactionMessageIndex]) : undefined;308309let previousResponseId: string | undefined;310let markerIndex: number | undefined;311312if (webSocketStatefulMarker) {313// WebSocket path: use the connection's current stateful marker if present in messages314markerIndex = getIndexOfStatefulMarker(webSocketStatefulMarker, messages);315if (markerIndex !== undefined) {316previousResponseId = webSocketStatefulMarker;317}318} else if (!ignoreStatefulMarker) {319// HTTP path: look up the latest marker for this model from messages320const statefulMarkerAndIndex = getStatefulMarkerAndIndex(modelId, messages);321if (statefulMarkerAndIndex) {322previousResponseId = statefulMarkerAndIndex.statefulMarker;323markerIndex = statefulMarkerAndIndex.index;324}325}326327if (modeChanged) {328previousResponseId = undefined;329markerIndex = undefined;330}331332if (markerIndex !== undefined) {333// Requests that resume from previous_response_id send only post-marker history,334// but they still need the latest compaction item even when that item predates335// the marker. This keeps both websocket and non-websocket traffic aligned.336messages = messages.slice(markerIndex + 1);337if (latestCompactionMessageIndex !== undefined) {338if (latestCompactionMessageIndex > markerIndex) {339messages = messages.slice(latestCompactionMessageIndex - (markerIndex + 1));340} else if (latestCompactionMessage) {341messages = [latestCompactionMessage, ...messages];342}343}344} else if (latestCompactionMessageIndex !== undefined) {345messages = messages.slice(latestCompactionMessageIndex);346}347348// Track which call_ids are tool_search_calls (from client-executed tool search)349const toolSearchCallIds = new Set<string>();350// Track tool names loaded via tool_search_output — these need a namespace field on function_call351const toolSearchLoadedTools = new Set<string>();352353const input: OpenAI.Responses.ResponseInputItem[] = [];354for (const message of messages) {355switch (message.role) {356case Raw.ChatRole.Assistant:357if (message.content.length) {358input.push(...extractCompactionData(message.content));359input.push(...extractThinkingData(message.content));360const asstContent = message.content.map(rawContentToResponsesAssistantContent).filter(isDefined);361if (asstContent.length) {362const assistantMessage: ResponseInputAssistantMessageWithPhase = {363role: 'assistant',364content: asstContent,365type: 'message',366phase: extractPhaseData(message.content),367};368// The Responses API expects previous assistant message content as output_text/refusal,369// but the SDK's ResponseOutputMessage type requires response-only id/status fields.370input.push(assistantMessage as OpenAI.Responses.ResponseInputItem);371}372}373if (message.toolCalls) {374for (const toolCall of message.toolCalls) {375if (toolCall.function.name === CUSTOM_TOOL_SEARCH_NAME) {376// Client-executed tool search: emit as tool_search_call instead of function_call377toolSearchCallIds.add(toolCall.id);378let parsedArgs: Record<string, unknown> = {};379try { parsedArgs = JSON.parse(toolCall.function.arguments || '{}'); } catch { }380input.push({381type: 'tool_search_call',382execution: 'client',383call_id: toolCall.id,384status: 'completed',385arguments: parsedArgs,386} satisfies ResponsesToolSearchCallInput as unknown as OpenAI.Responses.ResponseInputItem);387} else {388// Tools loaded via tool_search need a namespace field to round-trip correctly389const namespace = toolSearchLoadedTools.has(toolCall.function.name) ? toolCall.function.name : undefined;390input.push({ type: 'function_call', name: toolCall.function.name, arguments: toolCall.function.arguments, call_id: toolCall.id, ...(namespace ? { namespace } : {}) });391}392}393}394break;395case Raw.ChatRole.Tool:396if (message.toolCallId) {397if (toolSearchCallIds.has(message.toolCallId)) {398// Client-executed tool search result: convert tool names to tool_search_output with full definitions399const resultText = message.content400.filter(c => c.type === Raw.ChatCompletionContentPartKind.Text)401.map(c => c.text)402.join('');403const loadedTools = toolsMap ? buildToolSearchOutputTools(resultText, toolsMap, shouldLoadToolFromToolSearch) : [];404for (const t of loadedTools) {405toolSearchLoadedTools.add(t.name);406}407input.push({408type: 'tool_search_output',409execution: 'client',410call_id: message.toolCallId,411status: 'completed',412tools: loadedTools,413} satisfies ResponsesToolSearchOutputInput as unknown as OpenAI.Responses.ResponseInputItem);414} else {415const asText = message.content416.filter(c => c.type === Raw.ChatCompletionContentPartKind.Text)417.map(c => c.text)418.join('');419const asImages = message.content420.filter(c => c.type === Raw.ChatCompletionContentPartKind.Image)421.map((c): OpenAI.Responses.ResponseInputImage => ({422type: 'input_image',423detail: c.imageUrl.detail || 'auto',424image_url: c.imageUrl.url,425}));426427// todod@connor4312: hack while responses API only supports text output from tools428input.push({ type: 'function_call_output', call_id: message.toolCallId, output: asText });429if (asImages.length) {430input.push({ role: 'user', content: [{ type: 'input_text', text: 'Image associated with the above tool call:' }, ...asImages] });431}432}433}434break;435case Raw.ChatRole.User:436input.push({ role: 'user', content: message.content.map(rawContentToResponsesContent).filter(isDefined) });437break;438case Raw.ChatRole.System:439input.push({ role: 'system', content: message.content.map(rawContentToResponsesContent).filter(isDefined) });440break;441}442}443444return { input, previous_response_id: previousResponseId };445}446447/**448* Converts a JSON array of tool names (from ToolSearchTool) into full tool definitions449* for the tool_search_output. Falls back to an empty array on parse failure.450*/451function buildToolSearchOutputTools(resultText: string, toolsMap: Map<string, OpenAiFunctionTool>, shouldLoadToolFromToolSearch: ((name: string) => boolean) | undefined): ToolSearchLoadedTool[] {452let toolNames: unknown;453try { toolNames = JSON.parse(resultText); } catch { return []; }454if (!Array.isArray(toolNames)) { return []; }455456return toolNames457.filter((name): name is string => typeof name === 'string' && name !== CUSTOM_TOOL_SEARCH_NAME && toolsMap.has(name) && shouldLoadToolFromToolSearch?.(name) === true)458.map(name => {459const tool = toolsMap.get(name)!;460return {461type: 'function' as const,462name: tool.function.name,463description: tool.function.description || '',464defer_loading: true as const,465parameters: tool.function.parameters || { type: 'object', properties: {} },466};467});468}469470function createCompactionRoundTripMessage(message: Raw.ChatMessage): Raw.ChatMessage | undefined {471if (message.role !== Raw.ChatRole.Assistant) {472return undefined;473}474475const content = message.content.filter(part => part.type === Raw.ChatCompletionContentPartKind.Opaque && rawPartAsCompactionData(part));476if (!content.length) {477return undefined;478}479480return {481role: Raw.ChatRole.Assistant,482content,483};484}485486function getLatestCompactionMessageIndex(messages: readonly Raw.ChatMessage[]): number | undefined {487for (let idx = messages.length - 1; idx >= 0; idx--) {488const message = messages[idx];489for (const part of message.content) {490if (part.type === Raw.ChatCompletionContentPartKind.Opaque && rawPartAsCompactionData(part)) {491return idx;492}493}494}495496return undefined;497}498499function rawContentToResponsesContent(part: Raw.ChatCompletionContentPart): OpenAI.Responses.ResponseInputContent | undefined {500switch (part.type) {501case Raw.ChatCompletionContentPartKind.Text:502return { type: 'input_text', text: part.text };503case Raw.ChatCompletionContentPartKind.Image:504return { type: 'input_image', detail: part.imageUrl.detail || 'auto', image_url: part.imageUrl.url };505case Raw.ChatCompletionContentPartKind.Opaque: {506const maybeCast = part.value as OpenAI.Responses.ResponseInputContent;507if (maybeCast.type === 'input_text' || maybeCast.type === 'input_image' || maybeCast.type === 'input_file') {508return maybeCast;509}510}511}512}513514function rawContentToResponsesAssistantContent(part: Raw.ChatCompletionContentPart): Pick<OpenAI.Responses.ResponseOutputText, 'type' | 'text'> | undefined {515switch (part.type) {516case Raw.ChatCompletionContentPartKind.Text:517if (part.text.trim()) {518return { type: 'output_text', text: part.text };519}520}521}522523function extractThinkingData(content: Raw.ChatCompletionContentPart[]): OpenAI.Responses.ResponseReasoningItem[] {524return coalesce(content.map(part => {525if (part.type === Raw.ChatCompletionContentPartKind.Opaque) {526const thinkingData = rawPartAsThinkingData(part);527if (thinkingData) {528return {529type: 'reasoning',530id: thinkingData.id,531summary: [],532encrypted_content: thinkingData.encrypted,533} satisfies OpenAI.Responses.ResponseReasoningItem;534}535}536}));537}538539function extractPhaseData(content: Raw.ChatCompletionContentPart[]): string | undefined {540for (const part of content) {541if (part.type === Raw.ChatCompletionContentPartKind.Opaque) {542const phase = rawPartAsPhaseData(part);543if (phase) {544return phase;545}546}547}548return undefined;549}550551/**552* Extracts compaction data from opaque content parts and converts them to553* Responses API input items for round-tripping.554*/555function extractCompactionData(content: Raw.ChatCompletionContentPart[]): OpenAI.Responses.ResponseInputItem[] {556return coalesce(content.map(part => {557if (part.type === Raw.ChatCompletionContentPartKind.Opaque) {558const compaction = rawPartAsCompactionData(part);559if (compaction) {560return {561type: openAIContextManagementCompactionType,562id: compaction.id,563encrypted_content: compaction.encrypted_content,564} as unknown as OpenAI.Responses.ResponseInputItem;565}566}567}));568}569570/**571* This is an approximate responses input -> raw messages helper, should be used for logging only572*/573export function responseApiInputToRawMessagesForLogging(body: OpenAI.Responses.ResponseCreateParams): Raw.ChatMessage[] {574const messages: Raw.ChatMessage[] = [];575const pendingFunctionCalls: Raw.ChatMessageToolCall[] = [];576577const flushPendingFunctionCalls = () => {578if (pendingFunctionCalls.length > 0) {579messages.push({580role: Raw.ChatRole.Assistant,581content: [],582toolCalls: pendingFunctionCalls.splice(0)583});584}585};586587// Add system instructions if provided588if (body.instructions) {589messages.push({590role: Raw.ChatRole.System,591content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: body.instructions }]592});593}594595// Convert input to array format if it's a string596const inputItems = typeof body.input === 'string' ? [{ role: 'user' as const, content: body.input, type: 'message' as const }] : (body.input ?? []);597598for (const item of inputItems) {599// Handle message items with roles600if ('role' in item) {601switch (item.role) {602case 'user':603flushPendingFunctionCalls();604messages.push({605role: Raw.ChatRole.User,606content: ensureContentArray(item.content).map(responseContentToRawContent).filter(isDefined)607});608break;609case 'system':610case 'developer':611flushPendingFunctionCalls();612messages.push({613role: Raw.ChatRole.System,614content: ensureContentArray(item.content).map(responseContentToRawContent).filter(isDefined)615});616break;617case 'assistant':618flushPendingFunctionCalls();619if (isResponseOutputMessage(item)) {620messages.push({621role: Raw.ChatRole.Assistant,622content: item.content.map(responseOutputToRawContent).filter(isDefined)623});624} else if (isResponseInputItemMessage(item)) {625messages.push({626role: Raw.ChatRole.Assistant,627content: ensureContentArray(item.content).map(responseContentToRawContent).filter(isDefined)628});629}630break;631}632} else if ('type' in item) {633// Handle other item types without roles634switch (item.type) {635case 'function_call':636// Collect function calls to be grouped with the next assistant message637pendingFunctionCalls.push({638id: item.call_id,639type: 'function',640function: {641name: item.name,642arguments: item.arguments643}644});645break;646case 'function_call_output': {647flushPendingFunctionCalls();648const content = responseFunctionOutputToRawContents(item.output);649messages.push({650role: Raw.ChatRole.Tool,651content,652toolCallId: item.call_id653});654break;655}656case 'reasoning':657// We can't perfectly reconstruct the original thinking data658// but we can add a placeholder for logging659flushPendingFunctionCalls();660messages.push({661role: Raw.ChatRole.Assistant,662content: [{663type: Raw.ChatCompletionContentPartKind.Text,664text: `Reasoning summary: ${item.summary.map(s => s.text).join('\n\n')}`665}]666});667break;668default: {669// Client-executed tool search items (tool_search_call / tool_search_output)670const tsItem = item as unknown as ResponsesToolSearchCallInput | ResponsesToolSearchOutputInput;671if (tsItem.type === 'tool_search_call') {672pendingFunctionCalls.push({673id: tsItem.call_id,674type: 'function',675function: {676name: CUSTOM_TOOL_SEARCH_NAME,677arguments: JSON.stringify(tsItem.arguments ?? {}),678}679});680} else if (tsItem.type === 'tool_search_output') {681flushPendingFunctionCalls();682const toolNames = tsItem.tools.map(t => t.name);683messages.push({684role: Raw.ChatRole.Tool,685content: [{686type: Raw.ChatCompletionContentPartKind.Text,687text: JSON.stringify(toolNames),688}],689toolCallId: tsItem.call_id,690});691}692break;693}694}695}696}697698// Flush any remaining function calls at the end699if (pendingFunctionCalls.length > 0) {700messages.push({701role: Raw.ChatRole.Assistant,702content: [],703toolCalls: pendingFunctionCalls.splice(0)704});705}706707return messages;708}709710function isResponseOutputMessage(item: OpenAI.Responses.ResponseInputItem): item is OpenAI.Responses.ResponseOutputMessage {711return 'role' in item && item.role === 'assistant' && 'type' in item && item.type === 'message' && 'content' in item && Array.isArray(item.content);712}713714function isResponseInputItemMessage(item: OpenAI.Responses.ResponseInputItem): item is OpenAI.Responses.ResponseInputItem.Message {715return 'role' in item && item.role === 'assistant' && (!('type' in item) || item.type !== 'message');716}717718function ensureContentArray(content: string | OpenAI.Responses.ResponseInputMessageContentList): OpenAI.Responses.ResponseInputMessageContentList {719if (typeof content === 'string') {720return [{ type: 'input_text', text: content }];721}722return content;723}724725function responseContentToRawContent(part: OpenAI.Responses.ResponseInputContent | OpenAI.Responses.ResponseFunctionCallOutputItem): Raw.ChatCompletionContentPart | undefined {726switch (part.type) {727case 'input_text':728return { type: Raw.ChatCompletionContentPartKind.Text, text: part.text };729case 'input_image':730return {731type: Raw.ChatCompletionContentPartKind.Image,732imageUrl: {733url: part.image_url || '',734detail: part.detail === 'auto' ?735undefined :736(part.detail ?? undefined)737}738};739case 'input_file':740// This is a rough approximation for logging741return {742type: Raw.ChatCompletionContentPartKind.Opaque,743value: `[File Input - Filename: ${part.filename || 'unknown'}]`744};745}746}747748function responseOutputToRawContent(part: OpenAI.Responses.ResponseOutputText | OpenAI.Responses.ResponseOutputRefusal): Raw.ChatCompletionContentPart | undefined {749switch (part.type) {750case 'output_text':751return { type: Raw.ChatCompletionContentPartKind.Text, text: part.text };752case 'refusal':753return { type: Raw.ChatCompletionContentPartKind.Text, text: `[Refusal: ${part.refusal}]` };754}755}756757function responseFunctionOutputToRawContents(output: string | OpenAI.Responses.ResponseFunctionCallOutputItemList): Raw.ChatCompletionContentPart[] {758if (typeof output === 'string') {759return [{ type: Raw.ChatCompletionContentPartKind.Text, text: output }];760}761return coalesce(output.map(responseContentToRawContent));762}763764function isCompactionItem(value: unknown): value is OpenAIContextManagementResponse {765return typeof value === 'object' && value !== null && 'type' in value && String(value.type) === openAIContextManagementCompactionType;766}767768function hasOutputItem(chunk: OpenAI.Responses.ResponseStreamEvent): chunk is OpenAI.Responses.ResponseStreamEvent & ResponseStreamEventWithOutputItem {769return 'item' in chunk && 'output_index' in chunk && typeof chunk.output_index === 'number';770}771772function hasResponseOutput(chunk: OpenAI.Responses.ResponseStreamEvent): chunk is OpenAI.Responses.ResponseStreamEvent & ResponseStreamEventWithResponseOutput {773return 'response' in chunk && Array.isArray(chunk.response.output);774}775776function getOutputItemIndex(chunk: ResponseStreamEventWithOutputItem): number {777return chunk.output_index;778}779780function isCompactionOutputItem(item: OpenAI.Responses.ResponseOutputItem): item is CompactionResponseOutputItem {781return isCompactionItem(item);782}783784function getLatestCompactionOutput(output: OpenAI.Responses.ResponseOutputItem[], preferredOutputIndex: number | undefined): LatestCompactionOutput | undefined {785let latestCompactionOutput: LatestCompactionOutput | undefined;786for (let idx = output.length - 1; idx >= 0; idx--) {787const item = output[idx];788if (isCompactionOutputItem(item)) {789latestCompactionOutput = { item, outputIndex: idx };790break;791}792}793794if (preferredOutputIndex !== undefined) {795const preferredItem = output[preferredOutputIndex];796if (preferredItem && isCompactionOutputItem(preferredItem) && (!latestCompactionOutput || preferredOutputIndex >= latestCompactionOutput.outputIndex)) {797return { item: preferredItem, outputIndex: preferredOutputIndex };798}799}800801return latestCompactionOutput;802}803804function keepLatestCompactionOutput(output: OpenAI.Responses.ResponseOutputItem[], preferredOutputIndex: number | undefined): OpenAI.Responses.ResponseOutputItem[] {805const latestCompactionOutput = getLatestCompactionOutput(output, preferredOutputIndex);806if (!latestCompactionOutput) {807return output;808}809810return output.filter((item, idx) => !isCompactionOutputItem(item) || idx === latestCompactionOutput.outputIndex);811}812813export async function processResponseFromChatEndpoint(instantiationService: IInstantiationService, telemetryService: ITelemetryService, logService: ILogService, response: Response, expectedNumChoices: number, finishCallback: FinishedCallback, telemetryData: TelemetryData, compactionThreshold?: number): Promise<AsyncIterableObject<ChatCompletion>> {814return new AsyncIterableObject<ChatCompletion>(async feed => {815const requestId = response.headers.get('X-Request-ID') ?? generateUuid();816const ghRequestId = response.headers.get('x-github-request-id') ?? '';817const { serverExperiments } = getRequestId(response.headers);818const processor = instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, requestId, ghRequestId, serverExperiments, compactionThreshold);819const dumper = createResponsesStreamDumper(requestId, logService);820const parser = new SSEParser((ev) => {821try {822logService.trace(`SSE: ${ev.data}`);823const parsedData = JSON.parse(ev.data);824const responseStreamEvent: OpenAI.Responses.ResponseStreamEvent = { type: ev.type, ...parsedData };825dumper.logEvent(responseStreamEvent);826const completion = processor.push(responseStreamEvent, finishCallback);827if (completion) {828sendCompletionOutputTelemetry(telemetryService, logService, completion, telemetryData);829feed.emitOne(completion);830}831} catch (e) {832feed.reject(e);833}834});835836for await (const chunk of response.body) {837parser.feed(chunk);838}839}, async () => {840await response.body.destroy();841});842}843844export function sendCompletionOutputTelemetry(telemetryService: ITelemetryService, logService: ILogService, completion: ChatCompletion, telemetryData: TelemetryData): void {845const telemetryMessage = rawMessageToCAPI(completion.message);846let telemetryDataWithUsage = telemetryData;847if (completion.usage) {848telemetryDataWithUsage = telemetryData.extendedBy({}, {849promptTokens: completion.usage.prompt_tokens,850completionTokens: completion.usage.completion_tokens,851totalTokens: completion.usage.total_tokens,852...(completion.usage.prompt_tokens_details && { cachedTokens: completion.usage.prompt_tokens_details.cached_tokens }),853...(completion.usage.completion_tokens_details && {854reasoningTokens: completion.usage.completion_tokens_details.reasoning_tokens,855acceptedPredictionTokens: completion.usage.completion_tokens_details.accepted_prediction_tokens,856rejectedPredictionTokens: completion.usage.completion_tokens_details.rejected_prediction_tokens,857}),858});859}860sendEngineMessagesTelemetry(telemetryService, [telemetryMessage], telemetryDataWithUsage, true, logService);861}862863interface CapiResponsesTextDeltaEvent extends Omit<OpenAI.Responses.ResponseTextDeltaEvent, 'logprobs'> {864logprobs: Array<OpenAI.Responses.ResponseTextDeltaEvent.Logprob> | undefined;865}866867export class OpenAIResponsesProcessor {868private textAccumulator: string = '';869private hasReceivedReasoningSummary = false;870private sawCompactionMessage = false;871private latestCompactionOutputIndex: number | undefined;872private latestCompactionItem: OpenAIContextManagementResponse | undefined;873/** Tracks the output_index of the last text delta to detect output item boundaries */874private lastTextDeltaOutputIndex: number | undefined;875/** Maps output_index to { name, callId, arguments } for streaming tool call updates */876private readonly toolCallInfo = new Map<number, { name: string; callId: string; arguments: string }>();877878constructor(879private readonly telemetryData: TelemetryData,880private readonly telemetryService: ITelemetryService,881private readonly requestId: string,882private readonly ghRequestId: string,883private readonly serverExperiments: string,884private readonly compactionThreshold: number | undefined,885@ILogService private readonly logService: ILogService,886) { }887888private getCompactionItemsInChunk(chunk: OpenAI.Responses.ResponseStreamEvent): CompactionItemInChunk[] {889const compactionItems: CompactionItemInChunk[] = [];890891if (hasOutputItem(chunk) && isCompactionItem(chunk.item)) {892const outputIndex = getOutputItemIndex(chunk);893compactionItems.push({ item: chunk.item, outputIndex });894}895896if (hasResponseOutput(chunk)) {897for (let idx = 0; idx < chunk.response.output.length; idx++) {898const item = chunk.response.output[idx];899if (isCompactionItem(item)) {900compactionItems.push({ item, outputIndex: idx });901}902}903}904905return compactionItems;906}907908private captureCompactionItem(item: OpenAIContextManagementResponse, outputIndex: number | undefined, onProgress: (delta: IResponseDelta) => undefined): void {909if (outputIndex !== undefined && this.latestCompactionOutputIndex !== undefined && outputIndex < this.latestCompactionOutputIndex) {910return;911}912913const previousCompactionItem = this.latestCompactionItem;914this.sawCompactionMessage = true;915this.latestCompactionOutputIndex = outputIndex ?? this.latestCompactionOutputIndex;916this.latestCompactionItem = item;917918if (previousCompactionItem?.id === item.id && previousCompactionItem.encrypted_content === item.encrypted_content) {919return;920}921922onProgress({923text: '',924contextManagement: {925type: openAIContextManagementCompactionType,926id: item.id,927encrypted_content: item.encrypted_content,928}929});930}931932public push(chunk: OpenAI.Responses.ResponseStreamEvent, _onProgress: FinishedCallback): ChatCompletion | undefined {933const onProgress = (delta: IResponseDelta): undefined => {934this.textAccumulator += delta.text;935_onProgress(this.textAccumulator, 0, delta);936};937const compactionItems = this.getCompactionItemsInChunk(chunk);938if (chunk.type !== 'response.completed') {939for (const { item, outputIndex } of compactionItems) {940this.captureCompactionItem(item, outputIndex, onProgress);941}942}943944switch (chunk.type) {945case 'error':946return onProgress({ text: '', copilotErrors: [{ agent: 'openai', code: chunk.code || 'unknown', message: chunk.message, type: 'error', identifier: chunk.param || undefined }] });947case 'response.output_text.delta': {948const capiChunk: CapiResponsesTextDeltaEvent = chunk;949// When text arrives from a new output item, emit a paragraph950// separator so that e.g. commentary and final text don't fuse.951if (this.lastTextDeltaOutputIndex !== undefined && capiChunk.output_index !== this.lastTextDeltaOutputIndex) {952onProgress({ text: '\n\n' });953}954this.lastTextDeltaOutputIndex = capiChunk.output_index;955const haystack = new Lazy(() => new TextEncoder().encode(capiChunk.delta));956return onProgress({957text: capiChunk.delta,958logprobs: capiChunk.logprobs && {959content: capiChunk.logprobs.map(lp => ({960...mapLogProp(haystack, lp),961top_logprobs: lp.top_logprobs?.map(l => mapLogProp(haystack, l)) || []962}))963},964});965}966case 'response.output_item.added':967if (chunk.item.type === 'function_call') {968this.toolCallInfo.set(chunk.output_index, { name: chunk.item.name, callId: chunk.item.call_id, arguments: '' });969onProgress({970text: '',971beginToolCalls: [{ name: chunk.item.name, id: chunk.item.call_id }]972});973} else if (chunk.item.type.toString() === 'tool_search_call') {974const tsItem = chunk.item as unknown as ResponsesToolSearchCall;975if (tsItem.execution === 'client' && tsItem.call_id) {976// Client-executed tool search: treat as a regular tool call so our ToolSearchTool handles it.977this.toolCallInfo.set(chunk.output_index, { name: CUSTOM_TOOL_SEARCH_NAME, callId: tsItem.call_id, arguments: '' });978onProgress({979text: '',980beginToolCalls: [{ name: CUSTOM_TOOL_SEARCH_NAME, id: tsItem.call_id }]981});982}983}984return;985case 'response.function_call_arguments.delta': {986const info = this.toolCallInfo.get(chunk.output_index);987if (info) {988info.arguments += chunk.delta;989onProgress({990text: '',991copilotToolCallStreamUpdates: [{992id: info.callId,993name: info.name,994arguments: info.arguments,995}],996});997}998return;999}1000case 'response.output_item.done':1001if (chunk.item.type === 'function_call') {1002this.toolCallInfo.delete(chunk.output_index);1003onProgress({1004text: '',1005copilotToolCalls: [{1006id: chunk.item.call_id,1007name: chunk.item.name,1008arguments: chunk.item.arguments,1009}],1010phase: (chunk.item as ResponseOutputItemWithPhase).phase1011});1012} else if (chunk.item.type.toString() === 'tool_search_call') {1013const tsCall = chunk.item as unknown as ResponsesToolSearchCall;1014if (tsCall.execution === 'client' && tsCall.call_id) {1015// Client-executed tool search completed: emit as a completed copilotToolCall1016this.toolCallInfo.delete(chunk.output_index);1017onProgress({1018text: '',1019copilotToolCalls: [{1020id: tsCall.call_id,1021name: CUSTOM_TOOL_SEARCH_NAME,1022arguments: JSON.stringify(tsCall.arguments ?? {}),1023}],1024});1025}1026} else if (chunk.item.type === 'reasoning') {1027onProgress({1028text: '',1029thinking: chunk.item.encrypted_content ? {1030id: chunk.item.id,1031// CAPI models don't stream the reasoning summary for some reason, byok do, so don't duplicate it1032text: this.hasReceivedReasoningSummary ?1033undefined :1034chunk.item.summary.map(s => s.text),1035encrypted: chunk.item.encrypted_content,1036} : undefined1037});1038} else if (chunk.item.type === 'message') {1039onProgress({1040text: '',1041phase: (chunk.item as ResponseOutputItemWithPhase).phase1042});1043}1044return;1045case 'response.reasoning_summary_text.delta':1046this.hasReceivedReasoningSummary = true;1047return onProgress({1048text: '',1049thinking: {1050id: chunk.item_id,1051text: chunk.delta,1052}1053});1054case 'response.reasoning_summary_part.done':1055this.hasReceivedReasoningSummary = true;1056return onProgress({1057text: '',1058thinking: {1059id: chunk.item_id1060}1061});1062case 'response.completed': {1063const normalizedOutput = keepLatestCompactionOutput(chunk.response.output, this.latestCompactionOutputIndex);1064const latestCompactionOutput = getLatestCompactionOutput(normalizedOutput, this.latestCompactionOutputIndex);1065const latestCompactionItem = latestCompactionOutput?.item;1066const previousCompactionItem = this.latestCompactionItem;1067if (latestCompactionItem) {1068this.sawCompactionMessage = true;1069this.latestCompactionOutputIndex = latestCompactionOutput.outputIndex;1070}10711072const shouldEmitResolvedCompaction = latestCompactionItem && (1073!previousCompactionItem ||1074previousCompactionItem.id !== latestCompactionItem.id ||1075previousCompactionItem.encrypted_content !== latestCompactionItem.encrypted_content1076);1077if (latestCompactionItem) {1078this.latestCompactionItem = latestCompactionItem;1079}1080if (this.compactionThreshold !== undefined && this.sawCompactionMessage) {1081const promptTokens = chunk.response.usage?.input_tokens ?? 0;1082const totalTokens = chunk.response.usage?.total_tokens ?? 0;1083sendResponsesApiCompactionTelemetry(this.telemetryService, {1084outcome: 'compaction_returned',1085headerRequestId: this.requestId,1086gitHubRequestId: this.ghRequestId,1087model: chunk.response.model,1088}, {1089compactThreshold: this.compactionThreshold,1090promptTokens,1091totalTokens,1092});1093this.logService.debug(`[responsesAPI_compaction] Compaction enabled. headerRequestId=${this.requestId}`);1094} else if (this.compactionThreshold !== undefined && (chunk.response.usage?.input_tokens ?? 0) >= this.compactionThreshold) {1095const promptTokens = chunk.response.usage?.input_tokens ?? 0;1096const totalTokens = chunk.response.usage?.total_tokens ?? 0;1097sendResponsesApiCompactionTelemetry(this.telemetryService, {1098outcome: 'threshold_met_no_compaction',1099headerRequestId: this.requestId,1100gitHubRequestId: this.ghRequestId,1101model: chunk.response.model,1102}, {1103compactThreshold: this.compactionThreshold,1104promptTokens,1105totalTokens,1106});1107this.logService.debug(`[responsesAPI_compaction] Compaction enabled but context not compacted after threshold was met. headerRequestId=${this.requestId}, gitHubRequestId=${this.ghRequestId}, promptTokens=${promptTokens}, totalTokens=${totalTokens}`);1108}1109onProgress({1110text: '',1111statefulMarker: chunk.response.id,1112contextManagement: shouldEmitResolvedCompaction ? latestCompactionItem : undefined,1113});1114return {1115blockFinished: true,1116choiceIndex: 0,1117model: chunk.response.model,1118tokens: [],1119telemetryData: this.telemetryData,1120requestId: { headerRequestId: this.requestId, gitHubRequestId: this.ghRequestId, completionId: chunk.response.id, created: chunk.response.created_at, deploymentId: '', serverExperiments: this.serverExperiments },1121usage: {1122prompt_tokens: chunk.response.usage?.input_tokens ?? 0,1123completion_tokens: chunk.response.usage?.output_tokens ?? 0,1124total_tokens: chunk.response.usage?.total_tokens ?? 0,1125prompt_tokens_details: {1126cached_tokens: chunk.response.usage?.input_tokens_details.cached_tokens ?? 0,1127},1128completion_tokens_details: {1129reasoning_tokens: chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0,1130accepted_prediction_tokens: 0,1131rejected_prediction_tokens: 0,1132},1133},1134finishReason: FinishedCompletionReason.Stop,1135message: {1136role: Raw.ChatRole.Assistant,1137content: normalizedOutput.map((item): Raw.ChatCompletionContentPart | undefined => {1138if (item.type === 'message') {1139return { type: Raw.ChatCompletionContentPartKind.Text, text: item.content.map(c => c.type === 'output_text' ? c.text : c.refusal).join('') };1140} else if (item.type === 'image_generation_call' && item.result) {1141return { type: Raw.ChatCompletionContentPartKind.Image, imageUrl: { url: item.result } };1142}1143}).filter(isDefined),1144}1145};1146}1147}1148}1149}11501151function mapLogProp(text: Lazy<Uint8Array>, lp: OpenAI.Responses.ResponseTextDeltaEvent.Logprob.TopLogprob): TokenLogProb {1152let bytes: number[] = [];1153if (lp.token) {1154const needle = new TextEncoder().encode(lp.token);1155const haystack = text.value;1156const idx = binaryIndexOf(haystack, needle);1157if (idx !== -1) {1158bytes = [idx, idx + needle.length];1159}1160}11611162return {1163token: lp.token!,1164bytes,1165logprob: lp.logprob!,1166};1167}116811691170