Path: blob/main/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Raw } from '@vscode/prompt-tsx';6import type { OpenAI } from 'openai';7import type { CancellationToken } from 'vscode';8import { IAuthenticationService } from '../../../platform/authentication/common/authentication';9import { CopilotToken } from '../../../platform/authentication/common/copilotToken';10import { FetchStreamRecorder, IChatMLFetcher, IFetchMLOptions, Source } from '../../../platform/chat/common/chatMLFetcher';11import { IChatQuotaService } from '../../../platform/chat/common/chatQuotaService';12import { ChatFetchError, ChatFetchResponseType, ChatFetchRetriableError, ChatLocation, ChatResponse, ChatResponses, RESPONSE_CONTAINED_NO_CHOICES } from '../../../platform/chat/common/commonTypes';13import { IConversationOptions } from '../../../platform/chat/common/conversationOptions';14import { getTextPart, toTextParts } from '../../../platform/chat/common/globalStringUtils';15import { IInteractionService } from '../../../platform/chat/common/interactionService';16import { ConfigKey, HARD_TOOL_LIMIT, IConfigurationService } from '../../../platform/configuration/common/configurationService';17import { ICAPIClientService } from '../../../platform/endpoint/common/capiClient';18import { isAutoModel } from '../../../platform/endpoint/node/autoChatEndpoint';19import { getResponsesApiCompactionThresholdFromBody, OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging, sendCompletionOutputTelemetry } from '../../../platform/endpoint/node/responsesApi';20import { collectSingleLineErrorMessage, ILogService } from '../../../platform/log/common/logService';21import { FinishedCallback, getRequestId, IResponseDelta, OptionalChatRequestParams, RequestId } from '../../../platform/networking/common/fetch';22import { FetcherId, IFetcherService, Response } from '../../../platform/networking/common/fetcherService';23import { IBackgroundRequestOptions, IChatEndpoint, IEndpointBody, ISubagentRequestOptions, postRequest, stringifyUrlOrRequestMetadata } from '../../../platform/networking/common/networking';24import { CAPIChatMessage, ChatCompletion, FilterReason, FinishedCompletionReason, rawMessageToCAPI } from '../../../platform/networking/common/openai';25import { sendEngineMessagesTelemetry } from '../../../platform/networking/node/chatStream';26import { CAPIWebSocketErrorEvent, IChatWebSocketManager, isCAPIWebSocketError } from '../../../platform/networking/node/chatWebSocketManager';27import { sendCommunicationErrorTelemetry } from '../../../platform/networking/node/stream';28import { ChatFailKind, ChatRequestCanceled, ChatRequestFailed, ChatResults, FetchResponseKind } from '../../../platform/openai/node/fetch';29import { CopilotChatAttr, emitInferenceDetailsEvent, GenAiAttr, GenAiMetrics, GenAiOperationName, GenAiProviderName, normalizeProviderMessages, StdAttr, toSystemInstructions, toToolDefinitions, truncateForOTel } from '../../../platform/otel/common/index';30import { IOTelService, ISpanHandle, SpanKind, SpanStatusCode } from '../../../platform/otel/common/otelService';31import { IRequestLogger } from '../../../platform/requestLogger/common/requestLogger';32import { getCurrentCapturingToken } from '../../../platform/requestLogger/node/requestLogger';33import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';34import { ITelemetryService, TelemetryProperties } from '../../../platform/telemetry/common/telemetry';35import { TelemetryData } from '../../../platform/telemetry/common/telemetryData';36import { isEncryptedThinkingDelta } from '../../../platform/thinking/common/thinking';37import { calculateLineRepetitionStats, isRepetitive } from '../../../util/common/anomalyDetection';38import { ErrorUtils } from '../../../util/common/errors';39import { AsyncIterableObject } from '../../../util/vs/base/common/async';40import { isCancellationError } from '../../../util/vs/base/common/errors';41import { Emitter } from '../../../util/vs/base/common/event';42import { Disposable } from '../../../util/vs/base/common/lifecycle';43import { escapeRegExpCharacters } from '../../../util/vs/base/common/strings';44import { generateUuid } from '../../../util/vs/base/common/uuid';45import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';46import { isBYOKModel } from '../../byok/node/openAIEndpoint';47import { EXTENSION_ID } from '../../common/constants';48import { IPowerService } from '../../power/common/powerService';49import { ChatMLFetcherTelemetrySender as Telemetry } from './chatMLFetcherTelemetry';5051export interface IMadeChatRequestEvent {52readonly messages: Raw.ChatMessage[];53readonly model: string;54readonly source?: Source;55readonly tokenCount?: number;56}5758export abstract class AbstractChatMLFetcher extends Disposable implements IChatMLFetcher {5960declare _serviceBrand: undefined;6162constructor(63protected readonly options: IConversationOptions,64) {65super();66}6768protected preparePostOptions(requestOptions: OptionalChatRequestParams): OptionalChatRequestParams {69return {70temperature: this.options.temperature,71top_p: this.options.topP,72// we disallow `stream=false` because we don't support non-streamed response73...requestOptions,74stream: true75};76}7778protected readonly _onDidMakeChatMLRequest = this._register(new Emitter<IMadeChatRequestEvent>());79readonly onDidMakeChatMLRequest = this._onDidMakeChatMLRequest.event;8081public async fetchOne(opts: IFetchMLOptions, token: CancellationToken): Promise<ChatResponse> {82const resp = await this.fetchMany({83...opts,84requestOptions: { ...opts.requestOptions, n: 1 }85}, token);86if (resp.type === ChatFetchResponseType.Success) {87return { ...resp, value: resp.value[0] };88}89return resp;90}9192/**93* Note: the returned array of strings may be less than `n` (e.g., in case there were errors during streaming)94*/95public abstract fetchMany(opts: IFetchMLOptions, token: CancellationToken): Promise<ChatResponses>;96}9798export class ChatMLFetcherImpl extends AbstractChatMLFetcher {99100private static readonly _maxConsecutiveWebSocketFallbacks = 3;101102/**103* Delays (in ms) between connectivity check attempts before retrying a failed request.104* Configurable for testing purposes.105*/106public connectivityCheckDelays = [1000, 10000, 10000];107108/**109* Tracks consecutive WebSocket request failures where the HTTP retry succeeded.110* After {@link _maxConsecutiveWebSocketFallbacks} such failures, WebSocket requests are disabled entirely.111*/112private _consecutiveWebSocketRetryFallbacks = 0;113114constructor(115@IFetcherService private readonly _fetcherService: IFetcherService,116@ITelemetryService private readonly _telemetryService: ITelemetryService,117@IRequestLogger private readonly _requestLogger: IRequestLogger,118@ILogService private readonly _logService: ILogService,119@IAuthenticationService private readonly _authenticationService: IAuthenticationService,120@IInteractionService private readonly _interactionService: IInteractionService,121@IChatQuotaService private readonly _chatQuotaService: IChatQuotaService,122@ICAPIClientService private readonly _capiClientService: ICAPIClientService,123@IConversationOptions options: IConversationOptions,124@IConfigurationService private readonly _configurationService: IConfigurationService,125@IExperimentationService private readonly _experimentationService: IExperimentationService,126@IPowerService private readonly _powerService: IPowerService,127@IInstantiationService private readonly _instantiationService: IInstantiationService,128@IChatWebSocketManager private readonly _webSocketManager: IChatWebSocketManager,129@IOTelService private readonly _otelService: IOTelService,130) {131super(options);132}133134/**135* Note: the returned array of strings may be less than `n` (e.g., in case there were errors during streaming)136*/137public async fetchMany(opts: IFetchMLOptions, token: CancellationToken): Promise<ChatResponses> {138let { debugName, endpoint: chatEndpoint, finishedCb, location, messages, requestOptions, source, telemetryProperties, userInitiatedRequest, requestKindOptions, conversationId, turnId, useWebSocket, ignoreStatefulMarker } = opts;139if (useWebSocket && this._consecutiveWebSocketRetryFallbacks >= ChatMLFetcherImpl._maxConsecutiveWebSocketFallbacks) {140this._logService.debug(`[ChatWebSocketManager] Disabling WebSocket for request due to ${this._consecutiveWebSocketRetryFallbacks} consecutive WebSocket failures with successful HTTP fallback.`);141useWebSocket = false;142ignoreStatefulMarker = true;143}144if (!telemetryProperties) {145telemetryProperties = {};146}147148if (!telemetryProperties.messageSource) {149telemetryProperties.messageSource = debugName;150}151152const transport = useWebSocket ? 'websocket' : 'http';153154// TODO @lramos15 telemetry should not drive request ids155const ourRequestId = telemetryProperties.requestId ?? telemetryProperties.messageId ?? generateUuid();156157const maxResponseTokens = chatEndpoint.maxOutputTokens;158if (!requestOptions?.prediction) {159requestOptions = { max_tokens: maxResponseTokens, ...requestOptions };160}161// Avoid sending a prediction with no content as this will yield a 400 Bad Request162if (!requestOptions.prediction?.content) {163delete requestOptions['prediction'];164}165166const postOptions = this.preparePostOptions(requestOptions);167const requestBody = chatEndpoint.createRequestBody({168...opts,169ignoreStatefulMarker,170requestId: ourRequestId,171postOptions172});173174175const baseTelemetry = TelemetryData.createAndMarkAsIssued({176...telemetryProperties,177...(conversationId ? { conversationId } : {}),178headerRequestId: ourRequestId,179baseModel: chatEndpoint.model,180uiKind: ChatLocation.toString(location)181});182183const pendingLoggedChatRequest = this._requestLogger.logChatRequest(debugName, chatEndpoint, {184messages: opts.messages,185model: chatEndpoint.model,186ourRequestId,187location: opts.location,188body: requestBody,189ignoreStatefulMarker,190isConversationRequest: opts.isConversationRequest,191customMetadata: opts.customMetadata192});193let tokenCount = -1;194const streamRecorder = new FetchStreamRecorder(finishedCb);195const enableRetryOnError = opts.enableRetryOnError ?? opts.enableRetryOnFilter;196const canRetryOnce = opts.canRetryOnceWithoutRollback ?? !(opts.enableRetryOnFilter || opts.enableRetryOnError);197let usernameToScrub: string | undefined;198let actualFetcher: FetcherId | undefined;199let actualBytesReceived: number | undefined;200let actualStatusCode: number | undefined;201let suspendEventSeen: boolean | undefined;202let resumeEventSeen: boolean | undefined;203let otelInferenceSpan: ISpanHandle | undefined;204try {205let response: ChatResults | ChatRequestFailed | ChatRequestCanceled;206const payloadValidationResult = isValidChatPayload(opts.messages, postOptions, chatEndpoint, this._configurationService, this._experimentationService);207if (!payloadValidationResult.isValid) {208response = {209type: FetchResponseKind.Failed,210modelRequestId: undefined,211failKind: ChatFailKind.ValidationFailed,212reason: payloadValidationResult.reason,213};214} else {215let tokenCountPromise: Promise<number> | undefined;216const countTokens = () => tokenCountPromise ??= chatEndpoint.acquireTokenizer().countMessagesTokens(messages);217const copilotToken = await this._authenticationService.getCopilotToken();218usernameToScrub = copilotToken.username;219220const fetchResult = await this._fetchAndStreamChat(221chatEndpoint,222requestBody,223baseTelemetry,224streamRecorder.callback,225requestOptions.secretKey,226copilotToken,227opts.location,228ourRequestId,229postOptions.n,230token,231countTokens,232userInitiatedRequest,233useWebSocket,234turnId,235conversationId,236telemetryProperties,237opts.useFetcher,238canRetryOnce,239requestKindOptions,240opts.summarizedAtRoundId,241opts.modeChanged,242);243response = fetchResult.result;244actualFetcher = fetchResult.fetcher;245actualBytesReceived = fetchResult.bytesReceived;246actualStatusCode = fetchResult.statusCode;247suspendEventSeen = fetchResult.suspendEventSeen;248resumeEventSeen = fetchResult.resumeEventSeen;249otelInferenceSpan = fetchResult.otelSpan;250// Tag span with debug name so orphaned spans (title, progressMessages, etc.) are identifiable251otelInferenceSpan?.setAttribute(GenAiAttr.AGENT_NAME, debugName);252253// Extract and set structured prompt sections for the debug panel254if (otelInferenceSpan) {255// Support both Chat Completions API (messages) and Responses API (input) formats256const capiMessages = (requestBody.messages ?? requestBody.input) as ReadonlyArray<{ role?: string; content?: string | unknown[] }> | undefined;257// User request: last user-role message258const userMessages = capiMessages?.filter(m => m.role === 'user');259const lastUserMsg = userMessages?.[userMessages.length - 1];260if (lastUserMsg?.content) {261const userContent = typeof lastUserMsg.content === 'string'262? lastUserMsg.content263: JSON.stringify(lastUserMsg.content);264otelInferenceSpan.setAttribute(CopilotChatAttr.USER_REQUEST, truncateForOTel(userContent));265}266// System instructions — check messages array, top-level system (Anthropic), or instructions (Responses API)267const systemMsg = capiMessages?.find(m => m.role === 'system');268const systemContent = systemMsg?.content269?? (requestBody as Record<string, unknown>).system270?? (requestBody as Record<string, unknown>).instructions;271if (systemContent) {272let systemText: string;273if (typeof systemContent === 'string') {274systemText = systemContent;275} else if (Array.isArray(systemContent)) {276// Anthropic format: array of content blocks — extract text only,277// dropping metadata like cache_control so the value is stable across turns.278systemText = (systemContent as Array<{ text?: string }>)279.map(b => b.text ?? '')280.join('\n');281} else {282systemText = JSON.stringify(systemContent);283}284// Format as OTel GenAI system instruction JSON schema285const systemInstructions = toSystemInstructions(systemText);286if (systemInstructions) {287otelInferenceSpan.setAttribute(GenAiAttr.SYSTEM_INSTRUCTIONS, JSON.stringify(systemInstructions));288}289}290}291292// Always capture full request content for the debug panel293if (otelInferenceSpan) {294const capiMessages = (requestBody.messages ?? requestBody.input) as ReadonlyArray<Record<string, unknown>> | undefined;295if (capiMessages) {296// Normalize provider-specific content (Anthropic tool_use/tool_result, OpenAI tool messages) to OTel schema297otelInferenceSpan.setAttribute(GenAiAttr.INPUT_MESSAGES, truncateForOTel(JSON.stringify(normalizeProviderMessages(capiMessages))));298}299// Tool definitions: emit on every chat span so trace viewers can render the300// tool catalog per LLM call (issue #299934). Includes `parameters` per301// OTel GenAI semantic conventions (issue #300318).302const toolDefs = toToolDefinitions(requestBody.tools);303if (toolDefs) {304otelInferenceSpan.setAttribute(GenAiAttr.TOOL_DEFINITIONS, truncateForOTel(JSON.stringify(toolDefs)));305}306}307tokenCount = await countTokens();308const extensionId = source?.extensionId ?? EXTENSION_ID;309this._onDidMakeChatMLRequest.fire({310messages,311model: chatEndpoint.model,312source: { extensionId },313tokenCount314});315}316const timeToFirstToken = Date.now() - baseTelemetry.issuedTime;317pendingLoggedChatRequest?.markTimeToFirstToken(timeToFirstToken);318switch (response.type) {319case FetchResponseKind.Success: {320const result = await this.processSuccessfulResponse(response, messages, requestBody, ourRequestId, maxResponseTokens, tokenCount, timeToFirstToken, streamRecorder, baseTelemetry, chatEndpoint, userInitiatedRequest, transport, actualFetcher, actualBytesReceived, suspendEventSeen, resumeEventSeen);321322// Handle FilteredRetry case with augmented messages323if (result.type === ChatFetchResponseType.FilteredRetry) {324325if (opts.enableRetryOnFilter) {326streamRecorder.callback('', 0, { text: '', retryReason: result.category });327328const filteredContent = result.value[0];329if (filteredContent) {330const retryMessage = (result.category === FilterReason.Copyright) ?331`The previous response (copied below) was filtered due to being too similar to existing public code. Please suggest something similar in function that does not match public code. Here's the previous response: ${filteredContent}\n\n` :332`The previous response (copied below) was filtered due to triggering our content safety filters, which looks for hateful, self-harm, sexual, or violent content. Please suggest something similar in content that does not trigger these filters. Here's the previous response: ${filteredContent}\n\n`;333const augmentedMessages: Raw.ChatMessage[] = [334...messages,335{336role: Raw.ChatRole.User,337content: toTextParts(retryMessage)338}339];340341// Retry with augmented messages342const retryResult = await this.fetchMany({343...opts,344debugName: 'retry-' + debugName,345messages: augmentedMessages,346finishedCb,347location,348endpoint: chatEndpoint,349source,350requestOptions,351userInitiatedRequest: false, // do not mark the retry as user initiated352telemetryProperties: { ...telemetryProperties, retryAfterFilterCategory: result.category ?? 'uncategorized' },353enableRetryOnFilter: false,354canRetryOnceWithoutRollback: false,355enableRetryOnError,356}, token);357358pendingLoggedChatRequest?.resolve(retryResult, streamRecorder.deltas);359if (retryResult.type === ChatFetchResponseType.Success) {360return retryResult;361}362}363}364365return {366type: ChatFetchResponseType.Filtered,367category: result.category,368reason: 'Response got filtered.',369requestId: result.requestId,370serverRequestId: result.serverRequestId371};372}373374pendingLoggedChatRequest?.resolve(result, streamRecorder.deltas);375376// Record OTel token usage metrics if available377if (result.type === ChatFetchResponseType.Success && result.usage) {378const metricAttrs = {379operationName: GenAiOperationName.CHAT,380providerName: GenAiProviderName.GITHUB,381requestModel: chatEndpoint.model,382responseModel: result.resolvedModel,383};384if (result.usage.prompt_tokens) {385GenAiMetrics.recordTokenUsage(this._otelService, result.usage.prompt_tokens, 'input', metricAttrs);386}387if (result.usage.completion_tokens) {388GenAiMetrics.recordTokenUsage(this._otelService, result.usage.completion_tokens, 'output', metricAttrs);389}390391// Set token usage and response details on the chat span before ending it392otelInferenceSpan?.setAttributes({393[GenAiAttr.USAGE_INPUT_TOKENS]: result.usage.prompt_tokens ?? 0,394[GenAiAttr.USAGE_OUTPUT_TOKENS]: result.usage.completion_tokens ?? 0,395[GenAiAttr.RESPONSE_MODEL]: result.resolvedModel ?? chatEndpoint.model,396[GenAiAttr.RESPONSE_ID]: result.requestId,397[GenAiAttr.RESPONSE_FINISH_REASONS]: ['stop'],398...(result.usage.prompt_tokens_details?.cached_tokens399? { [GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS]: result.usage.prompt_tokens_details.cached_tokens }400: {}),401...(result.usage.prompt_tokens_details?.cache_creation_input_tokens402? { [GenAiAttr.USAGE_CACHE_CREATION_INPUT_TOKENS]: result.usage.prompt_tokens_details.cache_creation_input_tokens }403: {}),404[CopilotChatAttr.TIME_TO_FIRST_TOKEN]: timeToFirstToken,405...(result.serverRequestId ? { [CopilotChatAttr.SERVER_REQUEST_ID]: result.serverRequestId } : {}),406...(result.usage.completion_tokens_details?.reasoning_tokens407? { [GenAiAttr.USAGE_REASONING_TOKENS]: result.usage.completion_tokens_details.reasoning_tokens }408: {}),409});410}411// Always capture response content for the debug panel412if (otelInferenceSpan && result.type === ChatFetchResponseType.Success) {413const responseText = streamRecorder.deltas.map(d => d.text).join('');414const toolCalls = streamRecorder.deltas415.filter(d => d.copilotToolCalls?.length)416.flatMap(d => d.copilotToolCalls!.map(tc => ({417type: 'tool_call' as const, id: tc.id, name: tc.name, arguments: tc.arguments418})));419const parts: Array<{ type: string; content?: string; id?: string; name?: string; arguments?: unknown }> = [];420if (responseText) {421parts.push({ type: 'text', content: responseText });422}423parts.push(...toolCalls);424if (parts.length > 0) {425otelInferenceSpan.setAttribute(GenAiAttr.OUTPUT_MESSAGES, truncateForOTel(JSON.stringify([{ role: 'assistant', parts }])));426}427// Capture reasoning/thinking text if present428const hasThinking = streamRecorder.deltas.some(d => d.thinking);429if (hasThinking) {430const thinkingTexts = streamRecorder.deltas431.filter(d => d.thinking && !isEncryptedThinkingDelta(d.thinking) && d.thinking.text)432.map(d => {433const t = d.thinking!;434if ('encrypted' in t) { return ''; }435return Array.isArray(t.text) ? t.text.join('') : (t.text ?? '');436});437const reasoningText = thinkingTexts.join('');438otelInferenceSpan.setAttribute(CopilotChatAttr.REASONING_CONTENT, truncateForOTel(reasoningText || '[encrypted]'));439}440}441442// Emit OTel inference details event BEFORE ending the span443// so the log record inherits the active trace context444emitInferenceDetailsEvent(445this._otelService,446{447model: chatEndpoint.model,448temperature: requestOptions?.temperature,449maxTokens: requestOptions?.max_tokens,450},451result.type === ChatFetchResponseType.Success ? {452id: result.requestId,453model: result.resolvedModel,454finishReasons: ['stop'],455inputTokens: result.usage?.prompt_tokens,456outputTokens: result.usage?.completion_tokens,457} : undefined,458);459460otelInferenceSpan?.end();461otelInferenceSpan = undefined;462463// Record OTel time-to-first-token metric464if (timeToFirstToken > 0) {465GenAiMetrics.recordTimeToFirstToken(this._otelService, chatEndpoint.model, timeToFirstToken / 1000);466}467468if (useWebSocket && result.type === ChatFetchResponseType.Success) {469this._consecutiveWebSocketRetryFallbacks = 0;470}471472return result;473}474case FetchResponseKind.Canceled:475Telemetry.sendCancellationTelemetry(476this._telemetryService,477{478source: telemetryProperties.messageSource ?? 'unknown',479requestId: ourRequestId,480model: chatEndpoint.model,481apiType: chatEndpoint.apiType,482transport,483associatedRequestId: telemetryProperties.associatedRequestId,484retryAfterError: telemetryProperties.retryAfterError,485retryAfterErrorGitHubRequestId: telemetryProperties.retryAfterErrorGitHubRequestId,486connectivityTestError: telemetryProperties.connectivityTestError,487connectivityTestErrorGitHubRequestId: telemetryProperties.connectivityTestErrorGitHubRequestId,488retryAfterFilterCategory: telemetryProperties.retryAfterFilterCategory,489fetcher: actualFetcher,490suspendEventSeen,491resumeEventSeen,492},493{494totalTokenMax: chatEndpoint.modelMaxPromptTokens ?? -1,495promptTokenCount: tokenCount,496tokenCountMax: maxResponseTokens,497timeToFirstToken,498timeToFirstTokenEmitted: (baseTelemetry && streamRecorder.firstTokenEmittedTime) ? streamRecorder.firstTokenEmittedTime - baseTelemetry.issuedTime : -1,499timeToCancelled: Date.now() - baseTelemetry.issuedTime,500isVisionRequest: this.filterImageMessages(messages) ? 1 : -1,501isBYOK: isBYOKModel(chatEndpoint),502isAuto: isAutoModel(chatEndpoint),503bytesReceived: actualBytesReceived,504issuedTime: baseTelemetry.issuedTime,505});506pendingLoggedChatRequest?.resolveWithCancelation();507// Set canceled status on OTel span508otelInferenceSpan?.setAttributes({509[GenAiAttr.RESPONSE_FINISH_REASONS]: ['cancelled'],510[CopilotChatAttr.CANCELED]: true,511});512otelInferenceSpan?.end();513otelInferenceSpan = undefined;514return this.processCanceledResponse(response, ourRequestId, streamRecorder, telemetryProperties);515case FetchResponseKind.Failed: {516const processed = this.processFailedResponse(response, ourRequestId, isAutoModel(chatEndpoint) === 1);517// Retry on server errors based on configured status codes518const retryServerErrorStatusCodes = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.RetryServerErrorStatusCodes, this._experimentationService);519const statusCodesToRetry = retryServerErrorStatusCodes520.split(',')521.map(s => parseInt(s.trim(), 10));522const retryAfterServerError = enableRetryOnError && actualStatusCode !== undefined && statusCodesToRetry.includes(actualStatusCode);523const retryWithoutWebSocket = enableRetryOnError && useWebSocket && (response.failKind === ChatFailKind.ServerError || response.failKind === ChatFailKind.Unknown);524if (retryAfterServerError || retryWithoutWebSocket) {525const { retryResult } = await this._retryAfterError({526opts,527processed,528telemetryProperties,529requestBody,530tokenCount,531maxResponseTokens,532timeToError: timeToFirstToken,533transport,534actualFetcher,535bytesReceived: actualBytesReceived,536baseTelemetry,537streamRecorder,538retryReason: 'server_error',539debugNamePrefix: 'retry-server-error-',540pendingLoggedChatRequest,541token,542usernameToScrub,543suspendEventSeen,544resumeEventSeen,545});546if (retryResult) {547return retryResult;548}549}550Telemetry.sendResponseErrorTelemetry(this._telemetryService, {551processed,552telemetryProperties,553chatEndpointInfo: chatEndpoint,554requestBody,555tokenCount,556maxResponseTokens,557timeToFirstToken,558isVisionRequest: this.filterImageMessages(messages),559transport,560fetcher: actualFetcher,561bytesReceived: actualBytesReceived,562issuedTime: baseTelemetry.issuedTime,563wasRetried: false,564suspendEventSeen,565resumeEventSeen,566});567pendingLoggedChatRequest?.resolve(processed);568return processed;569}570}571} catch (err) {572// End OTel inference span on error if not already ended573if (otelInferenceSpan) {574otelInferenceSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));575otelInferenceSpan.setAttribute(StdAttr.ERROR_TYPE, err instanceof Error ? err.constructor.name : 'Error');576otelInferenceSpan.setAttribute(GenAiAttr.RESPONSE_FINISH_REASONS, ['error']);577otelInferenceSpan.recordException(err);578otelInferenceSpan.end();579}580const timeToError = Date.now() - baseTelemetry.issuedTime;581if (err.fetcherId) {582actualFetcher = err.fetcherId;583}584if (err.suspendEventSeen) {585suspendEventSeen = true;586}587if (err.resumeEventSeen) {588resumeEventSeen = true;589}590const processed = this.processError(err, ourRequestId, err.gitHubRequestId, usernameToScrub, isAutoModel(chatEndpoint) === 1);591const retryNetworkError = enableRetryOnError && processed.type === ChatFetchResponseType.NetworkError && this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.RetryNetworkErrors, this._experimentationService);592const retryWithoutWebSocket = enableRetryOnError && useWebSocket && (processed.type === ChatFetchResponseType.NetworkError || processed.type === ChatFetchResponseType.Failed);593if (retryNetworkError || retryWithoutWebSocket) {594const { retryResult, connectivityTestError, connectivityTestErrorGitHubRequestId } = await this._retryAfterError({595opts,596processed,597telemetryProperties,598requestBody,599tokenCount,600maxResponseTokens,601timeToError,602transport,603actualFetcher,604bytesReceived: err.bytesReceived,605baseTelemetry,606streamRecorder,607retryReason: 'network_error',608debugNamePrefix: 'retry-error-',609pendingLoggedChatRequest,610token,611usernameToScrub,612suspendEventSeen,613resumeEventSeen,614});615if (retryResult) {616return retryResult;617}618telemetryProperties = { ...telemetryProperties, connectivityTestError, connectivityTestErrorGitHubRequestId };619}620if (processed.type === ChatFetchResponseType.Canceled) {621Telemetry.sendCancellationTelemetry(622this._telemetryService,623{624source: telemetryProperties.messageSource ?? 'unknown',625requestId: ourRequestId,626model: chatEndpoint.model,627apiType: chatEndpoint.apiType,628transport,629associatedRequestId: telemetryProperties.associatedRequestId,630retryAfterError: telemetryProperties.retryAfterError,631retryAfterErrorGitHubRequestId: telemetryProperties.retryAfterErrorGitHubRequestId,632connectivityTestError: telemetryProperties.connectivityTestError,633connectivityTestErrorGitHubRequestId: telemetryProperties.connectivityTestErrorGitHubRequestId,634retryAfterFilterCategory: telemetryProperties.retryAfterFilterCategory,635fetcher: actualFetcher,636suspendEventSeen,637resumeEventSeen,638},639{640totalTokenMax: chatEndpoint.modelMaxPromptTokens ?? -1,641promptTokenCount: tokenCount,642tokenCountMax: maxResponseTokens,643timeToFirstToken: undefined,644timeToCancelled: timeToError,645isVisionRequest: this.filterImageMessages(messages) ? 1 : -1,646isBYOK: isBYOKModel(chatEndpoint),647isAuto: isAutoModel(chatEndpoint),648bytesReceived: err.bytesReceived,649issuedTime: baseTelemetry.issuedTime,650}651);652} else {653Telemetry.sendResponseErrorTelemetry(this._telemetryService, {654processed,655telemetryProperties,656chatEndpointInfo: chatEndpoint,657requestBody,658tokenCount,659maxResponseTokens,660timeToFirstToken: timeToError,661isVisionRequest: this.filterImageMessages(messages),662transport,663fetcher: actualFetcher,664bytesReceived: err.bytesReceived,665issuedTime: baseTelemetry.issuedTime,666wasRetried: false,667suspendEventSeen,668resumeEventSeen,669});670}671pendingLoggedChatRequest?.resolve(processed);672return processed;673}674}675676private async _checkNetworkConnectivity(useFetcher?: FetcherId): Promise<{ retryRequest: boolean; connectivityTestError?: string; connectivityTestErrorGitHubRequestId?: string }> {677// Ping CAPI to check network connectivity before retrying678const delays = this.connectivityCheckDelays;679let connectivityTestError: string | undefined = undefined;680let connectivityTestErrorGitHubRequestId: string | undefined = undefined;681for (const delay of delays) {682this._logService.info(`Waiting ${delay}ms before pinging CAPI to check network connectivity...`);683await new Promise(resolve => setTimeout(resolve, delay));684try {685const isGHEnterprise = this._capiClientService.dotcomAPIURL !== 'https://api.github.com';686const url = this._capiClientService.capiPingURL;687const headers = await this._getAuthHeaders(isGHEnterprise, url);688const res = await this._fetcherService.fetch(url, {689headers,690useFetcher,691callSite: 'capi-ping',692});693if (res.status >= 200 && res.status < 300) {694this._logService.info(`CAPI ping successful, proceeding with chat request retry...`);695return { retryRequest: true, connectivityTestError, connectivityTestErrorGitHubRequestId };696} else {697connectivityTestError = `Status ${res.status}: ${res.statusText}`;698connectivityTestErrorGitHubRequestId = res.headers.get('x-github-request-id') ?? '';699this._logService.info(`CAPI ping returned status ${res.status}, retrying ping...`);700}701} catch (err) {702connectivityTestError = collectSingleLineErrorMessage(err, true);703connectivityTestErrorGitHubRequestId = undefined; // no response headers yet704this._logService.info(`CAPI ping failed with error, retrying ping: ${connectivityTestError}`);705}706}707return { retryRequest: false, connectivityTestError, connectivityTestErrorGitHubRequestId };708}709710private async _getAuthHeaders(isGHEnterprise: boolean, url: string) {711const authHeaders: Record<string, string> = {};712if (isGHEnterprise) {713let token = '';714if (url === this._capiClientService.dotcomAPIURL) {715token = this._authenticationService.anyGitHubSession?.accessToken || '';716} else {717try {718token = (await this._authenticationService.getCopilotToken()).token;719} catch (_err) {720// Ignore error721token = '';722}723}724authHeaders['Authorization'] = `Bearer ${token}`;725}726return authHeaders;727}728729private async _retryAfterError(params: {730opts: IFetchMLOptions;731processed: ChatFetchError;732telemetryProperties: TelemetryProperties;733requestBody: IEndpointBody;734tokenCount: number;735maxResponseTokens: number;736timeToError: number;737transport: string;738actualFetcher: FetcherId | undefined;739bytesReceived: number | undefined;740baseTelemetry: TelemetryData;741streamRecorder: FetchStreamRecorder;742retryReason: 'network_error' | 'server_error';743debugNamePrefix: string;744pendingLoggedChatRequest: ReturnType<IRequestLogger['logChatRequest']>;745token: CancellationToken;746usernameToScrub: string | undefined;747suspendEventSeen: boolean | undefined;748resumeEventSeen: boolean | undefined;749}): Promise<{ retryResult?: ChatResponses; connectivityTestError?: string; connectivityTestErrorGitHubRequestId?: string }> {750const {751opts,752processed,753telemetryProperties,754requestBody,755tokenCount,756maxResponseTokens,757timeToError,758transport,759actualFetcher,760bytesReceived,761baseTelemetry,762streamRecorder,763retryReason,764debugNamePrefix,765pendingLoggedChatRequest,766token,767usernameToScrub,768suspendEventSeen,769resumeEventSeen,770} = params;771772// net::ERR_NETWORK_CHANGED: https://github.com/microsoft/vscode/issues/260297773const isNetworkChangedError = ['darwin', 'linux'].includes(process.platform) && processed.reason.indexOf('net::ERR_NETWORK_CHANGED') !== -1;774// When Electron's network process crashes, all requests through it fail permanently.775// Fall back to node-fetch which bypasses Electron's network stack entirely.776const fallbackEnabled = this._configurationService.getExperimentBasedConfig(777ConfigKey.TeamInternal.FallbackNodeFetchOnNetworkProcessCrash, this._experimentationService);778const isNetworkProcessCrash = processed.type === ChatFetchResponseType.NetworkError779&& processed.isNetworkProcessCrash === true780&& fallbackEnabled;781const useFetcher = (isNetworkChangedError || isNetworkProcessCrash) ? 'node-fetch' : opts.useFetcher;782this._logService.info(`Retrying chat request with ${useFetcher || 'default'} fetcher after: ${processed.reasonDetail || processed.reason}`);783const connectivity = await this._checkNetworkConnectivity(useFetcher);784const connectivityTestError = connectivity.connectivityTestError ? this.scrubErrorDetail(connectivity.connectivityTestError, usernameToScrub) : undefined;785const connectivityTestErrorGitHubRequestId = connectivity.connectivityTestErrorGitHubRequestId;786if (!connectivity.retryRequest) {787this._logService.info(`Not retrying chat request as network connectivity could not be re-established.`);788return { connectivityTestError, connectivityTestErrorGitHubRequestId };789}790791Telemetry.sendResponseErrorTelemetry(792this._telemetryService,793{794processed,795telemetryProperties,796chatEndpointInfo: opts.endpoint,797requestBody,798tokenCount,799maxResponseTokens,800timeToFirstToken: timeToError,801isVisionRequest: this.filterImageMessages(opts.messages),802transport,803fetcher: actualFetcher,804bytesReceived,805issuedTime: baseTelemetry.issuedTime,806wasRetried: true,807suspendEventSeen,808resumeEventSeen,809},810);811812streamRecorder.callback('', 0, { text: '', retryReason });813814const retryResult = await this.fetchMany({815...opts,816useWebSocket: false,817ignoreStatefulMarker: opts.useWebSocket || opts.ignoreStatefulMarker,818debugName: debugNamePrefix + opts.debugName,819userInitiatedRequest: false, // do not mark the retry as user initiated820telemetryProperties: {821...telemetryProperties,822retryAfterError: processed.reasonDetail || processed.reason,823retryAfterErrorGitHubRequestId: processed.serverRequestId,824connectivityTestError,825connectivityTestErrorGitHubRequestId,826},827enableRetryOnError: false,828useFetcher,829}, token);830831pendingLoggedChatRequest?.resolve(retryResult, streamRecorder.deltas);832if (opts.useWebSocket && retryResult.type === ChatFetchResponseType.Success) {833this._consecutiveWebSocketRetryFallbacks++;834this._logService.info(`[ChatWebSocketManager] WebSocket request failed with successful HTTP fallback (${this._consecutiveWebSocketRetryFallbacks} consecutive).`);835if (opts.conversationId) {836// Closing here because the retry is transparent.837this._webSocketManager.closeConnection(opts.conversationId);838}839}840return { retryResult, connectivityTestError, connectivityTestErrorGitHubRequestId };841}842843private async _fetchAndStreamChat(844chatEndpointInfo: IChatEndpoint,845request: IEndpointBody,846baseTelemetryData: TelemetryData,847finishedCb: FinishedCallback,848secretKey: string | undefined,849copilotToken: CopilotToken,850location: ChatLocation,851ourRequestId: string,852nChoices: number | undefined,853cancellationToken: CancellationToken,854countTokens: () => Promise<number>,855userInitiatedRequest?: boolean,856useWebSocket?: boolean,857turnId?: string,858conversationId?: string,859telemetryProperties?: TelemetryProperties | undefined,860useFetcher?: FetcherId,861canRetryOnce?: boolean,862requestKindOptions?: IBackgroundRequestOptions | ISubagentRequestOptions,863summarizedAtRoundId?: string,864modeChanged?: boolean,865): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled; fetcher?: FetcherId; bytesReceived?: number; statusCode?: number; suspendEventSeen?: boolean; resumeEventSeen?: boolean; otelSpan?: ISpanHandle }> {866const isPowerSaveBlockerEnabled = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.ChatRequestPowerSaveBlocker, this._experimentationService);867const blockerHandle = isPowerSaveBlockerEnabled && location !== ChatLocation.Other ? this._powerService.acquirePowerSaveBlocker() : undefined;868869let suspendEventSeen = false;870let resumeEventSeen = false;871872const suspendListener = this._powerService.onDidSuspend(() => {873suspendEventSeen = true;874this._logService.info(`System suspended during streaming request ${ourRequestId} (${ChatLocation.toString(location)})`);875});876877const resumeListener = this._powerService.onDidResume(() => {878resumeEventSeen = true;879this._logService.info(`System resumed during streaming request ${ourRequestId} (${ChatLocation.toString(location)})`);880});881882try {883const fetchResult = await this._doFetchAndStreamChat(884chatEndpointInfo,885request,886baseTelemetryData,887finishedCb,888secretKey,889copilotToken,890location,891ourRequestId,892nChoices,893cancellationToken,894countTokens,895userInitiatedRequest,896useWebSocket,897turnId,898conversationId,899telemetryProperties,900useFetcher,901canRetryOnce,902requestKindOptions,903summarizedAtRoundId,904modeChanged,905);906return { ...fetchResult, suspendEventSeen: suspendEventSeen || undefined, resumeEventSeen: resumeEventSeen || undefined };907} catch (err) {908if (suspendEventSeen) {909err.suspendEventSeen = true;910}911if (resumeEventSeen) {912err.resumeEventSeen = true;913}914throw err;915} finally {916suspendListener.dispose();917resumeListener.dispose();918blockerHandle?.dispose();919}920}921922private async _doFetchAndStreamChat(923chatEndpointInfo: IChatEndpoint,924request: IEndpointBody,925baseTelemetryData: TelemetryData,926finishedCb: FinishedCallback,927secretKey: string | undefined,928copilotToken: CopilotToken,929location: ChatLocation,930ourRequestId: string,931nChoices: number | undefined,932cancellationToken: CancellationToken,933countTokens: () => Promise<number>,934userInitiatedRequest?: boolean,935useWebSocket?: boolean,936turnId?: string,937conversationId?: string,938telemetryProperties?: TelemetryProperties | undefined,939useFetcher?: FetcherId,940canRetryOnce?: boolean,941requestKindOptions?: IBackgroundRequestOptions | ISubagentRequestOptions,942summarizedAtRoundId?: string,943modeChanged?: boolean,944): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled; fetcher?: FetcherId; bytesReceived?: number; statusCode?: number; otelSpan?: ISpanHandle }> {945946if (cancellationToken.isCancellationRequested) {947return { result: { type: FetchResponseKind.Canceled, reason: 'before fetch request' } };948}949950// OTel inference span for this LLM call951const serverAddress = typeof chatEndpointInfo.urlOrRequestMetadata === 'string'952? (() => { try { return new URL(chatEndpointInfo.urlOrRequestMetadata).hostname; } catch { return undefined; } })()953: undefined;954const chatSessionId = getCurrentCapturingToken()?.chatSessionId;955const parentChatSessionId = getCurrentCapturingToken()?.parentChatSessionId;956const debugLogLabel = getCurrentCapturingToken()?.debugLogLabel;957const otelSpan = this._otelService.startSpan(`chat ${chatEndpointInfo.model}`, {958kind: SpanKind.CLIENT,959attributes: {960[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,961[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.GITHUB,962[GenAiAttr.REQUEST_MODEL]: chatEndpointInfo.model,963[GenAiAttr.CONVERSATION_ID]: telemetryProperties?.requestId ?? ourRequestId,964[GenAiAttr.REQUEST_MAX_TOKENS]: request.max_tokens ?? request.max_output_tokens ?? request.max_completion_tokens ?? 2048,965...(request.temperature !== undefined ? { [GenAiAttr.REQUEST_TEMPERATURE]: request.temperature } : {}),966...(request.top_p !== undefined ? { [GenAiAttr.REQUEST_TOP_P]: request.top_p } : {}),967[CopilotChatAttr.MAX_PROMPT_TOKENS]: chatEndpointInfo.modelMaxPromptTokens,968...(serverAddress ? { [StdAttr.SERVER_ADDRESS]: serverAddress } : {}),969...(conversationId ? { [CopilotChatAttr.SESSION_ID]: conversationId } : {}),970...(chatSessionId ? { [CopilotChatAttr.CHAT_SESSION_ID]: chatSessionId } : {}),971...(parentChatSessionId ? { [CopilotChatAttr.PARENT_CHAT_SESSION_ID]: parentChatSessionId } : {}),972...(debugLogLabel ? { [CopilotChatAttr.DEBUG_LOG_LABEL]: debugLogLabel } : {}),973},974});975const otelStartTime = Date.now();976977try {978979this._logService.debug(`modelMaxPromptTokens ${chatEndpointInfo.modelMaxPromptTokens}`);980this._logService.debug(`modelMaxResponseTokens ${request.max_tokens ?? 2048}`);981this._logService.debug(`chat model ${chatEndpointInfo.model}`);982983secretKey ??= copilotToken.token;984if (!secretKey) {985// If no key is set we error986const urlOrRequestMetadata = stringifyUrlOrRequestMetadata(chatEndpointInfo.urlOrRequestMetadata);987this._logService.error(`Failed to send request to ${urlOrRequestMetadata} due to missing key`);988sendCommunicationErrorTelemetry(this._telemetryService, `Failed to send request to ${urlOrRequestMetadata} due to missing key`);989return {990result: {991type: FetchResponseKind.Failed,992modelRequestId: undefined,993failKind: ChatFailKind.TokenExpiredOrInvalid,994reason: 'key is missing'995}996};997}998999// WebSocket path: use persistent WebSocket connection for Responses API endpoints1000if (useWebSocket && turnId && conversationId) {1001const wsResult = await this._doFetchViaWebSocket(1002chatEndpointInfo,1003request,1004baseTelemetryData,1005finishedCb,1006secretKey,1007location,1008ourRequestId,1009turnId,1010conversationId,1011cancellationToken,1012countTokens,1013userInitiatedRequest,1014telemetryProperties,1015requestKindOptions,1016summarizedAtRoundId,1017modeChanged,1018);1019return { ...wsResult, otelSpan };1020}10211022const httpResult = await this._doFetchViaHttp(1023chatEndpointInfo,1024request,1025baseTelemetryData,1026finishedCb,1027secretKey,1028location,1029ourRequestId,1030nChoices,1031cancellationToken,1032userInitiatedRequest,1033telemetryProperties,1034useFetcher,1035canRetryOnce,1036requestKindOptions,1037);1038return { ...httpResult, otelSpan };10391040} catch (err) {1041otelSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));1042otelSpan.setAttribute(StdAttr.ERROR_TYPE, err instanceof Error ? err.constructor.name : 'Error');1043otelSpan.recordException(err);1044throw err;1045} finally {1046const durationSec = (Date.now() - otelStartTime) / 1000;1047GenAiMetrics.recordOperationDuration(this._otelService, durationSec, {1048operationName: GenAiOperationName.CHAT,1049providerName: GenAiProviderName.GITHUB,1050requestModel: chatEndpointInfo.model,1051});1052// Span is NOT ended here — caller (fetchMany) will set token attributes and end it1053}1054}10551056/**1057* Sends a chat request via a persistent WebSocket connection instead of HTTP POST.1058* Events are the same Responses API streaming events, processed by OpenAIResponsesProcessor.1059*/1060private async _doFetchViaWebSocket(1061chatEndpointInfo: IChatEndpoint,1062request: IEndpointBody,1063baseTelemetryData: TelemetryData,1064finishedCb: FinishedCallback,1065secretKey: string,1066location: ChatLocation,1067ourRequestId: string,1068turnId: string,1069conversationId: string,1070cancellationToken: CancellationToken,1071countTokens: () => Promise<number>,1072userInitiatedRequest: boolean | undefined,1073telemetryProperties: TelemetryProperties | undefined,1074requestKindOptions: IBackgroundRequestOptions | ISubagentRequestOptions | undefined,1075summarizedAtRoundId: string | undefined,1076modeChanged: boolean | undefined,1077): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled }> {1078const intent = locationToIntent(location);1079const agentInteractionType = requestKindOptions?.kind === 'subagent' ?1080'conversation-subagent' :1081requestKindOptions?.kind === 'background' ?1082'conversation-background' :1083intent === 'conversation-agent' ? intent : undefined;1084const additionalHeaders: Record<string, string> = {1085'Authorization': `Bearer ${secretKey}`,1086'X-Request-Id': ourRequestId,1087'OpenAI-Intent': intent,1088'X-GitHub-Api-Version': '2025-05-01',1089'X-Interaction-Id': this._interactionService.interactionId,1090...(chatEndpointInfo.getExtraHeaders ? chatEndpointInfo.getExtraHeaders(location) : {}),1091};1092if (agentInteractionType) {1093additionalHeaders['X-Interaction-Type'] = agentInteractionType;1094additionalHeaders['X-Agent-Task-Id'] = ourRequestId;1095}1096if (request.messages?.some((m: CAPIChatMessage) => Array.isArray(m.content) ? m.content.some(c => 'image_url' in c) : false) && chatEndpointInfo.supportsVision) {1097additionalHeaders['Copilot-Vision-Request'] = 'true';1098}1099const connection = this._webSocketManager.getOrCreateConnection(conversationId, additionalHeaders, ourRequestId);1100try {1101await connection.connect();1102} catch (err) {1103(err as any).gitHubRequestId = connection.gitHubRequestId;1104throw err;1105}11061107// Generate unique ID to link input and output messages1108const modelCallId = generateUuid();11091110const telemetryData = TelemetryData.createAndMarkAsIssued({1111endpoint: 'completions',1112engineName: 'chat',1113uiKind: ChatLocation.toString(location),1114transport: 'websocket',1115...{ ...telemetryProperties, modelCallId },1116}, {1117maxTokenWindow: chatEndpointInfo.modelMaxPromptTokens1118});11191120const modelRequestId = getRequestId(connection.responseHeaders);1121// Request id changes over the lifetime of the connection.1122modelRequestId.headerRequestId = ourRequestId;1123telemetryData.extendWithRequestId(modelRequestId);1124if (modelRequestId.serverExperiments) {1125this._telemetryService.setSharedProperty('capi.assignmentcontext', modelRequestId.serverExperiments);1126}11271128for (const [key, value] of Object.entries(request)) {1129if (key === 'messages' || key === 'input') {1130continue;1131} // Skip messages (PII)1132telemetryData.properties[`request.option.${key}`] = JSON.stringify(value) ?? 'undefined';1133}1134this._telemetryService.sendGHTelemetryEvent('request.sent', telemetryData.properties, telemetryData.measurements);11351136const requestStart = Date.now();1137const handle = connection.sendRequest(request, { userInitiated: !!userInitiatedRequest, turnId, requestId: ourRequestId, model: chatEndpointInfo.model, countTokens, tokenCountMax: chatEndpointInfo.maxOutputTokens, modelMaxPromptTokens: chatEndpointInfo.modelMaxPromptTokens, summarizedAtRoundId, modeChanged }, cancellationToken);11381139const extendedBaseTelemetryData = baseTelemetryData.extendedBy({ modelCallId });1140const processor = this._instantiationService.createInstance(OpenAIResponsesProcessor, extendedBaseTelemetryData, this._telemetryService, modelRequestId.headerRequestId, modelRequestId.gitHubRequestId, modelRequestId.serverExperiments, getResponsesApiCompactionThresholdFromBody(request));11411142// Set up streaming first so event listeners are registered before we1143// await the first event — AsyncIterableObject runs its executor eagerly.1144const chatCompletions = new AsyncIterableObject<ChatCompletion>(async emitter => {1145try {1146await new Promise<void>((resolve, reject) => {1147handle.onEvent(event => {1148const completion = processor.push(event, finishedCb);1149if (completion) {1150sendCompletionOutputTelemetry(this._telemetryService, this._logService, completion, extendedBaseTelemetryData);1151emitter.emitOne(completion);1152}11531154if (event.type === 'response.completed') {1155const snapshots = (event as any).copilot_quota_snapshots;1156if (snapshots && typeof snapshots === 'object') {1157this._chatQuotaService.processQuotaSnapshots(snapshots);1158}1159}1160});11611162handle.onCAPIError(event => {1163// Mid-stream CAPI error — throw so the caller can handle it1164const error = new Error(`${event.error.message} (${event.error.code})`);1165(error as any).gitHubRequestId = modelRequestId.gitHubRequestId;1166(error as any).capiWebSocketError = event;1167reject(error);1168});11691170handle.onError(error => {1171(error as any).gitHubRequestId = modelRequestId.gitHubRequestId;1172if (isCancellationError(error)) {1173reject(error);1174return;1175}11761177const warningTelemetry = telemetryData.extendedBy({ error: error.message });1178this._telemetryService.sendGHTelemetryEvent('request.shownWarning', warningTelemetry.properties, warningTelemetry.measurements);11791180const totalTimeMs = Date.now() - requestStart;1181telemetryData.measurements.totalTimeMs = totalTimeMs;1182telemetryData.properties.error = error.message;11831184this._logService.debug(`request.error: [websocket], took ${totalTimeMs} ms`);1185this._telemetryService.sendGHTelemetryEvent('request.error', telemetryData.properties, telemetryData.measurements);11861187reject(error);1188});11891190handle.done.then(resolve, reject);1191});11921193const totalTimeMs = Date.now() - requestStart;1194telemetryData.measurements.totalTimeMs = totalTimeMs;1195this._logService.debug(`request.response: [websocket], took ${totalTimeMs} ms`);1196this._telemetryService.sendGHTelemetryEvent('request.response', telemetryData.properties, telemetryData.measurements);1197} finally {1198let messagesToLog = request.messages;1199if ((!messagesToLog || messagesToLog.length === 0) && (request as OpenAI.Responses.ResponseCreateParams).input) {1200try {1201const rawMessages = responseApiInputToRawMessagesForLogging(request as OpenAI.Responses.ResponseCreateParams);1202messagesToLog = rawMessageToCAPI(rawMessages);1203} catch (e) {1204this._logService.error(`Failed to convert Response API input to messages for telemetry:`, e);1205messagesToLog = [];1206}1207}1208sendEngineMessagesTelemetry(this._telemetryService, messagesToLog ?? [], telemetryData, false, this._logService);1209}1210});12111212// Wait for the first event to determine the response type,1213// analogous to checking HTTP status code before streaming the body.1214const firstEvent = await handle.firstEvent;12151216if (cancellationToken.isCancellationRequested) {1217return { result: { type: FetchResponseKind.Canceled, reason: 'after first WebSocket event' } };1218}12191220// CAPI error before any stream events — return Failed like HTTP non-2001221if (isCAPIWebSocketError(firstEvent)) {1222const totalTimeMs = Date.now() - requestStart;1223telemetryData.measurements.totalTimeMs = totalTimeMs;1224telemetryData.properties.error = `${firstEvent.error.message} (${firstEvent.error.code})`;1225this._logService.debug(`request.error: [websocket capi error], took ${totalTimeMs} ms`);1226this._telemetryService.sendGHTelemetryEvent('request.error', telemetryData.properties, telemetryData.measurements);1227return { result: await this._handleWebSocketCAPIError(firstEvent, modelRequestId) };1228}12291230// Clear stale quota-exceeded state if the server accepted the request.1231if (this._authenticationService.copilotToken?.isFreeUser && this._authenticationService.copilotToken?.isChatQuotaExceeded) {1232this._authenticationService.resetCopilotToken();1233}12341235return {1236result: {1237type: FetchResponseKind.Success,1238chatCompletions,1239}1240};1241}12421243private async _doFetchViaHttp(1244chatEndpointInfo: IChatEndpoint,1245request: IEndpointBody,1246baseTelemetryData: TelemetryData,1247finishedCb: FinishedCallback,1248secretKey: string,1249location: ChatLocation,1250ourRequestId: string,1251nChoices: number | undefined,1252cancellationToken: CancellationToken,1253userInitiatedRequest: boolean | undefined,1254telemetryProperties: TelemetryProperties | undefined,1255useFetcher: FetcherId | undefined,1256canRetryOnce: boolean | undefined,1257requestKindOptions: IBackgroundRequestOptions | ISubagentRequestOptions | undefined,1258): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled; fetcher?: FetcherId; bytesReceived?: number; statusCode?: number }> {1259// Generate unique ID to link input and output messages1260const modelCallId = generateUuid();12611262const response = await this._fetchWithInstrumentation(1263chatEndpointInfo,1264ourRequestId,1265request,1266secretKey,1267location,1268cancellationToken,1269userInitiatedRequest,1270{ ...telemetryProperties, modelCallId },1271useFetcher,1272canRetryOnce,1273requestKindOptions,1274);12751276if (cancellationToken.isCancellationRequested) {1277try {1278// Destroy the stream so that the server is hopefully notified we don't want any more data1279// and can cancel/forget about the request itself.1280await response!.body.destroy();1281} catch (e) {1282this._logService.error(e, `Error destroying stream`);1283this._telemetryService.sendGHTelemetryException(e, 'Error destroying stream');1284}1285return {1286result: { type: FetchResponseKind.Canceled, reason: 'after fetch request' },1287fetcher: response.fetcher,1288bytesReceived: response.bytesReceived1289};1290}12911292if (response.status === 200 && this._authenticationService.copilotToken?.isFreeUser && this._authenticationService.copilotToken?.isChatQuotaExceeded) {1293this._authenticationService.resetCopilotToken();1294}12951296if (response.status !== 200) {1297const telemetryData = createTelemetryData(chatEndpointInfo, location, ourRequestId);1298this._logService.info('Request ID for failed request: ' + ourRequestId);1299return {1300result: await this._handleError(telemetryData, response, ourRequestId),1301fetcher: response.fetcher,1302bytesReceived: response.bytesReceived,1303statusCode: response.status1304};1305}13061307// Extend baseTelemetryData with modelCallId for output messages1308const extendedBaseTelemetryData = baseTelemetryData.extendedBy({ modelCallId });13091310let chatCompletions;1311const gitHubRequestId = response.headers.get('x-github-request-id') ?? '';1312try {1313const completions = await chatEndpointInfo.processResponseFromChatEndpoint(1314this._telemetryService,1315this._logService,1316response,1317nChoices ?? /* OpenAI's default */ 1,1318finishedCb,1319extendedBaseTelemetryData,1320cancellationToken,1321location,1322);1323chatCompletions = new AsyncIterableObject<ChatCompletion>(async emitter => {1324try {1325for await (const completion of completions) {1326emitter.emitOne(completion);1327}1328} catch (err) {1329err.fetcherId = response.fetcher;1330err.gitHubRequestId = gitHubRequestId;1331err.bytesReceived = response.bytesReceived;1332throw err;1333}1334});1335} catch (err) {1336err.fetcherId = response.fetcher;1337err.gitHubRequestId = gitHubRequestId;1338err.bytesReceived = response.bytesReceived;1339throw err;1340}13411342// CAPI will return us a Copilot Edits Session Header which is our token to using the speculative decoding endpoint1343// We should store this in the auth service for easy use later1344if (response.headers.get('Copilot-Edits-Session')) {1345this._authenticationService.speculativeDecodingEndpointToken = response.headers.get('Copilot-Edits-Session') ?? undefined;1346}13471348this._chatQuotaService.processQuotaHeaders(response.headers);13491350return {1351result: {1352type: FetchResponseKind.Success,1353chatCompletions,1354},1355fetcher: response.fetcher,1356bytesReceived: response.bytesReceived1357};1358}13591360private async _fetchWithInstrumentation(1361chatEndpoint: IChatEndpoint,1362ourRequestId: string,1363request: IEndpointBody,1364secretKey: string,1365location: ChatLocation,1366cancellationToken: CancellationToken,1367userInitiatedRequest?: boolean,1368telemetryProperties?: TelemetryProperties,1369useFetcher?: FetcherId,1370canRetryOnce?: boolean,1371requestKindOptions?: IBackgroundRequestOptions | ISubagentRequestOptions,1372): Promise<Response> {13731374// If request contains an image, we include this header.1375const additionalHeaders: Record<string, string> = {1376'X-Interaction-Id': this._interactionService.interactionId,1377'X-Initiator': userInitiatedRequest ? 'user' : 'agent', // Agent = a system request / not the primary user query.1378};1379if (request.messages?.some((m: CAPIChatMessage) => Array.isArray(m.content) ? m.content.some(c => 'image_url' in c) : false) && chatEndpoint.supportsVision) {1380additionalHeaders['Copilot-Vision-Request'] = 'true';1381}1382const telemetryData = TelemetryData.createAndMarkAsIssued({1383endpoint: 'completions',1384engineName: 'chat',1385uiKind: ChatLocation.toString(location),1386transport: 'http',1387...telemetryProperties // This includes the modelCallId from fetchAndStreamChat1388}, {1389maxTokenWindow: chatEndpoint.modelMaxPromptTokens1390});13911392for (const [key, value] of Object.entries(request)) {1393if (key === 'messages' || key === 'input') {1394continue;1395} // Skip messages (PII)1396telemetryData.properties[`request.option.${key}`] = JSON.stringify(value) ?? 'undefined';1397}13981399// The request ID we are passed in is sent in the request to the proxy, and included in our pre-request telemetry.1400// We hope (but do not rely on) that the model will use the same ID in the response, allowing us to correlate1401// the request and response.1402telemetryData.properties['headerRequestId'] = ourRequestId;14031404this._telemetryService.sendGHTelemetryEvent('request.sent', telemetryData.properties, telemetryData.measurements);14051406const requestStart = Date.now();1407const intent = locationToIntent(location);14081409// Wrap the Promise with success/error callbacks so we can log/measure it1410return this._instantiationService.invokeFunction(postRequest, {1411endpointOrUrl: chatEndpoint,1412secretKey,1413intent,1414requestId: ourRequestId,1415body: request,1416additionalHeaders,1417cancelToken: cancellationToken,1418useFetcher,1419canRetryOnce,1420location,1421requestKindOptions,1422}).then(response => {1423const apim = response.headers.get('apim-request-id');1424if (apim) {1425this._logService.debug(`APIM request id: ${apim}`);1426}1427const ghRequestId = response.headers.get('x-github-request-id');1428if (ghRequestId) {1429this._logService.debug(`GH request id: ${ghRequestId}`);1430}1431// This ID is hopefully the one the same as ourRequestId, but it is not guaranteed.1432// If they are different then we will override the original one we set in telemetryData above.1433const modelRequestId = getRequestId(response.headers);1434// Preserve ourRequestId as headerRequestId if the server didn't echo x-request-id1435modelRequestId.headerRequestId = modelRequestId.headerRequestId || ourRequestId;1436telemetryData.extendWithRequestId(modelRequestId);1437if (modelRequestId.serverExperiments) {1438this._telemetryService.setSharedProperty('capi.assignmentcontext', modelRequestId.serverExperiments);1439}14401441// TODO: Add response length (requires parsing)1442const totalTimeMs = Date.now() - requestStart;1443telemetryData.measurements.totalTimeMs = totalTimeMs;14441445this._logService.debug(`request.response: [${stringifyUrlOrRequestMetadata(chatEndpoint.urlOrRequestMetadata)}], took ${totalTimeMs} ms`);14461447this._telemetryService.sendGHTelemetryEvent('request.response', telemetryData.properties, telemetryData.measurements);14481449return response;1450})1451.catch(error => {1452if (this._fetcherService.isAbortError(error)) {1453// If we cancelled a network request, we don't want to log a `request.error`1454throw error;1455}14561457const warningTelemetry = telemetryData.extendedBy({ error: 'Network exception' });1458this._telemetryService.sendGHTelemetryEvent('request.shownWarning', warningTelemetry.properties, warningTelemetry.measurements);14591460telemetryData.properties.code = String(error.code ?? '');1461telemetryData.properties.errno = String(error.errno ?? '');1462telemetryData.properties.message = String(error.message ?? '');1463telemetryData.properties.type = String(error.type ?? '');14641465const totalTimeMs = Date.now() - requestStart;1466telemetryData.measurements.totalTimeMs = totalTimeMs;14671468this._logService.debug(`request.response: [${stringifyUrlOrRequestMetadata(chatEndpoint.urlOrRequestMetadata)}] took ${totalTimeMs} ms`);14691470this._telemetryService.sendGHTelemetryEvent('request.error', telemetryData.properties, telemetryData.measurements);14711472throw error;1473})1474.finally(() => {1475let messagesToLog = request.messages;14761477// For Response API (has input but no messages), convert input to messages for logging1478if ((!messagesToLog || messagesToLog.length === 0) && (request as OpenAI.Responses.ResponseCreateParams).input) {1479try {1480const rawMessages = responseApiInputToRawMessagesForLogging(request as OpenAI.Responses.ResponseCreateParams);1481messagesToLog = rawMessageToCAPI(rawMessages);1482} catch (e) {1483this._logService.error(`Failed to convert Response API input to messages for telemetry:`, e);1484messagesToLog = [];1485}1486}14871488sendEngineMessagesTelemetry(this._telemetryService, messagesToLog ?? [], telemetryData, false, this._logService);1489});1490}14911492private async _handleError(1493telemetryData: TelemetryData,1494response: Response,1495requestId: string1496): Promise<ChatRequestFailed> {1497const modelRequestIdObj = getRequestId(response.headers);1498requestId = modelRequestIdObj.headerRequestId || requestId;1499modelRequestIdObj.headerRequestId = requestId;15001501telemetryData.properties.error = `Response status was ${response.status}`;1502telemetryData.properties.status = String(response.status);1503this._telemetryService.sendGHTelemetryEvent('request.shownWarning', telemetryData.properties, telemetryData.measurements);15041505const text = await response.text();1506let jsonData: Record<string, any> | undefined;1507try {1508jsonData = JSON.parse(text);1509jsonData = jsonData?.error ?? jsonData; // Extract nested error object if it exists1510} catch {1511// JSON parsing failed, it's not json content.1512}15131514const reasonNoText = `Server error: ${response.status}`;1515const reason = `${reasonNoText} ${text}`;1516this._logService.error(reason);15171518if (400 <= response.status && response.status < 500) {15191520if (response.status === 400 && text.includes('off_topic')) {1521return {1522type: FetchResponseKind.Failed,1523modelRequestId: modelRequestIdObj,1524failKind: ChatFailKind.OffTopic,1525reason: 'filtered as off_topic by intent classifier: message was not programming related',1526};1527}15281529if (response.status === 401 && text.includes('authorize_url') && jsonData?.authorize_url) {1530return {1531type: FetchResponseKind.Failed,1532modelRequestId: modelRequestIdObj,1533failKind: ChatFailKind.AgentUnauthorized,1534reason: response.statusText || response.statusText,1535data: jsonData1536};1537}15381539if (response.status === 400 && jsonData?.code === 'previous_response_not_found') {1540return {1541type: FetchResponseKind.Failed,1542modelRequestId: modelRequestIdObj,1543failKind: ChatFailKind.InvalidPreviousResponseId,1544reason: jsonData.message || 'Invalid previous response ID',1545data: jsonData,1546};1547}15481549if (response.status === 401 || response.status === 403) {1550// Token has expired or invalid, fetch a new one on next request1551// TODO(drifkin): these actions should probably happen in vsc specific code1552this._authenticationService.resetCopilotToken(response.status);1553return {1554type: FetchResponseKind.Failed,1555modelRequestId: modelRequestIdObj,1556failKind: ChatFailKind.TokenExpiredOrInvalid,1557reason: jsonData?.message || `token expired or invalid: ${response.status}`,1558};1559}15601561if (response.status === 402) {1562// When we receive a 402, we have exceed a quota1563// This is stored on the token so let's refresh it1564if (!this._authenticationService.copilotToken?.isChatQuotaExceeded) {1565this._authenticationService.resetCopilotToken(response.status);1566await this._authenticationService.getCopilotToken();1567}156815691570const retryAfter = response.headers.get('retry-after');15711572const convertToDate = (retryAfterString: string | null): Date | undefined => {1573if (!retryAfterString) {1574return undefined;1575}15761577// Try treating it as a date1578const retryAfterDate = new Date(retryAfterString);1579if (!isNaN(retryAfterDate.getDate())) {1580return retryAfterDate;1581}15821583// It is not a date, try treating it as a duration from the current date1584const retryAfterDuration = parseInt(retryAfterString, 10);1585if (isNaN(retryAfterDuration)) {1586return undefined;1587}15881589return new Date(Date.now() + retryAfterDuration * 1000);1590};15911592const retryAfterDate = convertToDate(retryAfter);15931594return {1595type: FetchResponseKind.Failed,1596modelRequestId: modelRequestIdObj,1597failKind: ChatFailKind.QuotaExceeded,1598reason: jsonData?.message ?? 'Free tier quota exceeded',1599data: {1600capiError: jsonData,1601retryAfter: retryAfterDate1602}1603};1604}16051606if (response.status === 404) {1607let errorReason: string;16081609// Check if response body is valid JSON1610if (!jsonData) {1611errorReason = text;1612} else {1613errorReason = JSON.stringify(jsonData);1614}16151616return {1617type: FetchResponseKind.Failed,1618modelRequestId: modelRequestIdObj,1619failKind: ChatFailKind.NotFound,1620reason: errorReason1621};1622}16231624if (response.status === 422) {1625return {1626type: FetchResponseKind.Failed,1627modelRequestId: modelRequestIdObj,1628failKind: ChatFailKind.ContentFilter,1629reason: 'Filtered by Responsible AI Service\n\n' + text,1630};1631}16321633if (response.status === 424) {1634return {1635type: FetchResponseKind.Failed,1636modelRequestId: modelRequestIdObj,1637failKind: ChatFailKind.AgentFailedDependency,1638reason: text1639};1640}16411642if (response.status === 429) {1643let rateLimitReason = text;1644rateLimitReason = jsonData?.message ?? jsonData?.code;16451646if (text.includes('extension_blocked') && jsonData?.code === 'extension_blocked' && jsonData?.type === 'rate_limit_error') {1647return {1648type: FetchResponseKind.Failed,1649modelRequestId: modelRequestIdObj,1650failKind: ChatFailKind.ExtensionBlocked,1651reason: 'Extension blocked',1652data: {1653...jsonData?.message,1654retryAfter: response.headers.get('retry-after'),1655}1656};1657}16581659// HTTP 429 Too Many Requests1660return {1661type: FetchResponseKind.Failed,1662modelRequestId: modelRequestIdObj,1663failKind: ChatFailKind.RateLimited,1664reason: rateLimitReason,1665data: {1666retryAfter: response.headers.get('retry-after'),1667rateLimitKey: response.headers.get('x-ratelimit-exceeded'),1668capiError: jsonData1669}1670};1671}16721673if (response.status === 466) {1674this._logService.info(text);1675return {1676type: FetchResponseKind.Failed,1677modelRequestId: modelRequestIdObj,1678failKind: ChatFailKind.ClientNotSupported,1679reason: `client not supported: ${text}`1680};1681}16821683if (response.status === 499) {1684this._logService.info('Cancelled by server');1685return {1686type: FetchResponseKind.Failed,1687modelRequestId: modelRequestIdObj,1688failKind: ChatFailKind.ServerCanceled,1689reason: 'canceled by server'1690};1691}16921693} else if (500 <= response.status && response.status < 600) {16941695if (response.status === 503) {1696return {1697type: FetchResponseKind.Failed,1698modelRequestId: modelRequestIdObj,1699failKind: ChatFailKind.RateLimited,1700reason: 'Upstream provider rate limit hit',1701data: {1702retryAfter: null,1703rateLimitKey: null,1704capiError: { code: 'upstream_provider_rate_limit', message: text }1705}1706};1707}17081709// HTTP 5xx Server Error1710return {1711type: FetchResponseKind.Failed,1712modelRequestId: modelRequestIdObj,1713failKind: ChatFailKind.ServerError,1714reason: reasonNoText,1715};1716}17171718this._logService.error(`Request Failed: ${response.status} ${text}`);17191720sendCommunicationErrorTelemetry(this._telemetryService, 'Unhandled status from server: ' + response.status, text);17211722return {1723type: FetchResponseKind.Failed,1724modelRequestId: modelRequestIdObj,1725failKind: ChatFailKind.Unknown,1726reason: `Request Failed: ${response.status} ${text}`1727};1728}17291730private async processSuccessfulResponse(1731response: ChatResults,1732messages: Raw.ChatMessage[],1733requestBody: IEndpointBody,1734requestId: string,1735maxResponseTokens: number,1736promptTokenCount: number,1737timeToFirstToken: number,1738streamRecorder: FetchStreamRecorder,1739baseTelemetry: TelemetryData,1740chatEndpointInfo: IChatEndpoint,1741userInitiatedRequest: boolean | undefined,1742transport: string,1743fetcher: FetcherId | undefined,1744bytesReceived: number | undefined,1745suspendEventSeen: boolean | undefined,1746resumeEventSeen: boolean | undefined,1747): Promise<ChatResponses | ChatFetchRetriableError<string[]>> {17481749const completions: ChatCompletion[] = [];17501751for await (const chatCompletion of response.chatCompletions) {1752Telemetry.sendSuccessTelemetry(1753this._telemetryService,1754{1755chatCompletion,1756baseTelemetry,1757userInitiatedRequest,1758chatEndpointInfo,1759requestBody,1760maxResponseTokens,1761promptTokenCount,1762timeToFirstToken,1763timeToFirstTokenEmitted: (baseTelemetry && streamRecorder.firstTokenEmittedTime) ? streamRecorder.firstTokenEmittedTime - baseTelemetry.issuedTime : -1,1764hasImageMessages: this.filterImageMessages(messages),1765transport,1766fetcher,1767bytesReceived,1768suspendEventSeen,1769resumeEventSeen,1770}1771);17721773if (!this.isRepetitive(chatCompletion, baseTelemetry?.properties)) {1774completions.push(chatCompletion);1775}1776}1777const successFinishReasons = new Set([FinishedCompletionReason.Stop, FinishedCompletionReason.ClientTrimmed, FinishedCompletionReason.FunctionCall, FinishedCompletionReason.ToolCalls]);1778const successfulCompletions = completions.filter(c => successFinishReasons.has(c.finishReason));1779if (successfulCompletions.length >= 1) {1780return {1781type: ChatFetchResponseType.Success,1782resolvedModel: successfulCompletions[0].model,1783usage: successfulCompletions.length === 1 ? successfulCompletions[0].usage : undefined,1784value: successfulCompletions.map(c => getTextPart(c.message.content)),1785requestId,1786serverRequestId: successfulCompletions[0].requestId.headerRequestId,1787};1788}17891790const result = completions.at(0);17911792switch (result?.finishReason) {1793case FinishedCompletionReason.ContentFilter:1794return {1795type: ChatFetchResponseType.FilteredRetry,1796category: result.filterReason ?? FilterReason.Copyright,1797reason: 'Response got filtered.',1798value: completions.map(c => getTextPart(c.message.content)),1799requestId: requestId,1800serverRequestId: result.requestId.headerRequestId,1801};1802case FinishedCompletionReason.Length:1803return {1804type: ChatFetchResponseType.Length,1805reason: 'Response too long.',1806requestId: requestId,1807serverRequestId: result.requestId.headerRequestId,1808truncatedValue: getTextPart(result.message.content)1809};1810case FinishedCompletionReason.ServerError:1811return {1812type: ChatFetchResponseType.Failed,1813reason: 'Server error. Stream terminated',1814requestId: requestId,1815serverRequestId: result.requestId.headerRequestId,1816streamError: result.error1817};1818}1819return {1820type: ChatFetchResponseType.Unknown,1821reason: RESPONSE_CONTAINED_NO_CHOICES,1822requestId: requestId,1823serverRequestId: result?.requestId.headerRequestId,1824};1825}18261827private filterImageMessages(messages: Raw.ChatMessage[]): boolean {1828return messages?.some(m => Array.isArray(m.content) ? m.content.some(c => 'imageUrl' in c) : false);1829}18301831private isRepetitive(chatCompletion: ChatCompletion, telemetryProperties?: TelemetryProperties) {1832const lineRepetitionStats = calculateLineRepetitionStats(getTextPart(chatCompletion.message.content));1833const hasRepetition = isRepetitive(chatCompletion.tokens);1834if (hasRepetition) {1835const telemetryData = TelemetryData.createAndMarkAsIssued();1836telemetryData.extendWithRequestId(chatCompletion.requestId);1837const extended = telemetryData.extendedBy(telemetryProperties);1838this._telemetryService.sendEnhancedGHTelemetryEvent('conversation.repetition.detected', extended.properties, extended.measurements);1839}1840if (lineRepetitionStats.numberOfRepetitions >= 10) {1841/* __GDPR__1842"conversation.repetition.detected" : {1843"owner": "lramos15",1844"comment": "Calculates the number of repetitions in a response. Useful for loop detection",1845"finishReason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response finished. Helps identify cancellation vs length limits" },1846"requestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Id for this message request." },1847"lengthOfLine": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Length of the repeating line, in characters." },1848"numberOfRepetitions": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Number of times the line repeats." },1849"totalLines": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Number of total lines in the response." }1850}1851*/1852this._telemetryService.sendMSFTTelemetryEvent('conversation.repetition.detected', {1853requestId: chatCompletion.requestId.headerRequestId,1854finishReason: chatCompletion.finishReason,1855}, {1856numberOfRepetitions: lineRepetitionStats.numberOfRepetitions,1857lengthOfLine: lineRepetitionStats.mostRepeatedLine.length,1858totalLines: lineRepetitionStats.totalLines1859});1860}1861return hasRepetition;1862}18631864/**1865* Check for repetition in partial response deltas from a cancelled request.1866*1867* This method performs the same repetition detection as the `isRepetitive` method,1868* but operates on partial response data collected before the request was cancelled.1869*1870* Key differences from completed requests:1871* - Text is reconstructed from delta.text values instead of message.content1872* - Tokens are approximated by splitting text on whitespace instead of using1873* the actual token array (which is only available in completed responses)1874* - Enhanced telemetry won't include RequestId fields since we only have the1875* headerRequestId string, not the full RequestId object1876* - The finishReason is marked as 'canceled' to distinguish from server-generated1877* finish reasons1878*/1879private checkRepetitionInDeltas(1880deltas: IResponseDelta[],1881requestId: string,1882telemetryProperties?: TelemetryProperties1883): void {1884// Reconstruct the text content from deltas (filter out null, undefined, and empty text values)1885const textContent = deltas.filter(delta => delta.text?.length > 0).map(delta => delta.text).join('');18861887// Early exit if no content1888if (!textContent || textContent.trim().length === 0) {1889return;1890}18911892// For cancelled requests, we don't have the actual token array (only available in ChatCompletion),1893// so we approximate by splitting text content on whitespace. This is less precise than actual1894// tokenization but sufficient for detecting obvious repetition patterns.1895const tokens = textContent.split(/\s+/).filter(t => t.length > 0);18961897// Check for line repetition1898const lineRepetitionStats = calculateLineRepetitionStats(textContent);18991900// Check for token-level repetition1901const hasRepetition = isRepetitive(tokens);19021903// Send telemetry if repetition is detected1904if (hasRepetition) {1905const telemetryData = TelemetryData.createAndMarkAsIssued();1906const extended = telemetryData.extendedBy(telemetryProperties);1907// Note: For cancelled requests, we don't have a full RequestId object,1908// so we can't use extendWithRequestId like the non-cancelled path does.1909// This means enhanced telemetry for cancelled requests won't include1910// completionId, created, deploymentId, or serverExperiments fields.1911this._telemetryService.sendEnhancedGHTelemetryEvent('conversation.repetition.detected', extended.properties, extended.measurements);1912}19131914if (lineRepetitionStats.numberOfRepetitions >= 10) {1915this._telemetryService.sendMSFTTelemetryEvent('conversation.repetition.detected', {1916requestId: requestId,1917finishReason: 'canceled', // Client-side finish reason to distinguish from server-generated reasons1918}, {1919numberOfRepetitions: lineRepetitionStats.numberOfRepetitions,1920lengthOfLine: lineRepetitionStats.mostRepeatedLine.length,1921totalLines: lineRepetitionStats.totalLines1922});1923}1924}19251926private processCanceledResponse(1927response: ChatRequestCanceled,1928requestId: string,1929streamRecorder?: FetchStreamRecorder,1930telemetryProperties?: TelemetryProperties1931): ChatResponses {1932// Check for repetition in the partial response before cancellation1933if (streamRecorder && streamRecorder.deltas.length > 0) {1934this.checkRepetitionInDeltas(streamRecorder.deltas, requestId, telemetryProperties);1935}19361937return {1938type: ChatFetchResponseType.Canceled,1939reason: response.reason,1940requestId: requestId,1941serverRequestId: undefined,1942};1943}19441945private processFailedResponse(response: ChatRequestFailed, requestId: string, isAuto: boolean): ChatFetchError {1946const serverRequestId = response.modelRequestId?.gitHubRequestId;1947const reason = response.reason;1948if (response.failKind === ChatFailKind.RateLimited) {1949return { type: ChatFetchResponseType.RateLimited, reason, requestId, serverRequestId, retryAfter: response.data?.retryAfter, rateLimitKey: (response.data?.rateLimitKey || ''), isAuto, capiError: response.data?.capiError };1950}1951if (response.failKind === ChatFailKind.QuotaExceeded) {1952return { type: ChatFetchResponseType.QuotaExceeded, reason, requestId, serverRequestId, retryAfter: response.data?.retryAfter, capiError: response.data?.capiError };1953}1954if (response.failKind === ChatFailKind.OffTopic) {1955return { type: ChatFetchResponseType.OffTopic, reason, requestId, serverRequestId };1956}1957if (response.failKind === ChatFailKind.TokenExpiredOrInvalid || response.failKind === ChatFailKind.ClientNotSupported || reason.includes('Bad request: ')) {1958return { type: ChatFetchResponseType.BadRequest, reason, requestId, serverRequestId };1959}1960if (response.failKind === ChatFailKind.ServerError) {1961return { type: ChatFetchResponseType.Failed, reason, requestId, serverRequestId };1962}1963if (response.failKind === ChatFailKind.ContentFilter) {1964return { type: ChatFetchResponseType.PromptFiltered, reason, category: FilterReason.Prompt, requestId, serverRequestId };1965}1966if (response.failKind === ChatFailKind.AgentUnauthorized) {1967return { type: ChatFetchResponseType.AgentUnauthorized, reason, authorizationUrl: response.data!.authorize_url, requestId, serverRequestId };1968}1969if (response.failKind === ChatFailKind.AgentFailedDependency) {1970return { type: ChatFetchResponseType.AgentFailedDependency, reason, requestId, serverRequestId };1971}1972if (response.failKind === ChatFailKind.ExtensionBlocked) {1973const retryAfter = typeof response.data?.retryAfter === 'number' ? response.data.retryAfter : 300;1974return { type: ChatFetchResponseType.ExtensionBlocked, reason, requestId, retryAfter, learnMoreLink: response.data?.learnMoreLink ?? '', serverRequestId };1975}1976if (response.failKind === ChatFailKind.NotFound) {1977return { type: ChatFetchResponseType.NotFound, reason, requestId, serverRequestId };1978}1979if (response.failKind === ChatFailKind.InvalidPreviousResponseId) {1980return { type: ChatFetchResponseType.InvalidStatefulMarker, reason, requestId, serverRequestId };1981}19821983return { type: ChatFetchResponseType.Failed, reason, requestId, serverRequestId };1984}19851986private processError(err: unknown, requestId: string, gitHubRequestId: string | undefined, usernameToScrub: string | undefined, isAuto: boolean): ChatFetchError {1987const capiWebSocketError = (err as any)?.capiWebSocketError as CAPIWebSocketErrorEvent | undefined;1988if (capiWebSocketError) {1989return this._handleWebSocketError(capiWebSocketError, requestId, gitHubRequestId, isAuto);1990}19911992const fetcher = this._fetcherService;1993// If we cancelled a network request, we don't want to log an error1994if (fetcher.isAbortError(err)) {1995return {1996type: ChatFetchResponseType.Canceled,1997reason: 'network request aborted',1998requestId: requestId,1999serverRequestId: gitHubRequestId,2000};2001}2002if (isCancellationError(err)) {2003return {2004type: ChatFetchResponseType.Canceled,2005reason: 'Got a cancellation error',2006requestId: requestId,2007serverRequestId: gitHubRequestId,2008};2009}2010if (err && (2011(err instanceof Error && err.message === 'Premature close') ||2012(typeof err === 'object' && (err as any).code === 'ERR_STREAM_PREMATURE_CLOSE') /* to be extra sure */)2013) {2014return {2015type: ChatFetchResponseType.Canceled,2016reason: 'Stream closed prematurely',2017requestId: requestId,2018serverRequestId: gitHubRequestId,2019};2020}2021this._logService.error(ErrorUtils.fromUnknown(err), `Error on conversation request`);2022this._telemetryService.sendGHTelemetryException(err, 'Error on conversation request');2023const userMessage = fetcher.getUserMessageForFetcherError(err);2024const errorDetail = collectSingleLineErrorMessage(err, true);2025const scrubbedErrorDetail = this.scrubErrorDetail(errorDetail, usernameToScrub);2026if (fetcher.isInternetDisconnectedError(err)) {2027return {2028type: ChatFetchResponseType.NetworkError,2029reason: `It appears you're not connected to the internet, please check your network connection and try again.`,2030reasonDetail: scrubbedErrorDetail,2031requestId: requestId,2032serverRequestId: gitHubRequestId,2033};2034} else if (fetcher.isFetcherError(err)) {2035const isNetworkProcessCrash = fetcher.isNetworkProcessCrashedError(err);2036return {2037type: ChatFetchResponseType.NetworkError,2038reason: userMessage,2039reasonDetail: scrubbedErrorDetail,2040requestId: requestId,2041serverRequestId: gitHubRequestId,2042...(isNetworkProcessCrash ? { isNetworkProcessCrash: true } : {}),2043};2044} else {2045return {2046type: ChatFetchResponseType.Failed,2047reason: 'Error on conversation request. Check the log for more details.',2048reasonDetail: scrubbedErrorDetail,2049requestId: requestId,2050serverRequestId: gitHubRequestId,2051};2052}2053}20542055private async _handleWebSocketCAPIError(event: CAPIWebSocketErrorEvent, modelRequestId: RequestId): Promise<ChatRequestFailed> {2056const { code, message } = event.error;2057const capiError = { code, message };2058const codePrefix = code.split(':')[0];20592060this._logService.error(`WebSocket CAPI error: ${message} (${code})`);20612062if (codePrefix === 'rate_limited' || codePrefix === 'user_model_rate_limited' || codePrefix === 'user_global_rate_limited' || codePrefix === 'integration_rate_limited' || codePrefix === 'model_overloaded' || codePrefix === 'agent_mode_limit_exceeded') {2063return {2064type: FetchResponseKind.Failed,2065modelRequestId,2066failKind: ChatFailKind.RateLimited,2067reason: message,2068data: { capiError },2069};2070}2071if (codePrefix === 'quota_exceeded' || codePrefix === 'free_quota_exceeded' || codePrefix === 'overage_limit_reached' || codePrefix === 'billing_not_configured') {2072// Refresh the copilot token so isChatQuotaExceeded reflects the new state,2073// matching the HTTP 402 handler behavior.2074if (!this._authenticationService.copilotToken?.isChatQuotaExceeded) {2075this._authenticationService.resetCopilotToken(402);2076await this._authenticationService.getCopilotToken();2077}2078return {2079type: FetchResponseKind.Failed,2080modelRequestId,2081failKind: ChatFailKind.QuotaExceeded,2082reason: message,2083data: { capiError },2084};2085}2086if (code === 'content_filter') {2087return {2088type: FetchResponseKind.Failed,2089modelRequestId,2090failKind: ChatFailKind.ContentFilter,2091reason: message,2092};2093}2094if (code === 'not_found') {2095return {2096type: FetchResponseKind.Failed,2097modelRequestId,2098failKind: ChatFailKind.NotFound,2099reason: message,2100};2101}2102if (code === 'request_too_large') {2103return {2104type: FetchResponseKind.Failed,2105modelRequestId,2106failKind: ChatFailKind.Unknown,2107reason: `Request Failed: ${code} ${message}`,2108};2109}2110if (code === 'service_unavailable') {2111return {2112type: FetchResponseKind.Failed,2113modelRequestId,2114failKind: ChatFailKind.ServerError,2115reason: `Request Failed: ${code} ${message}`,2116};2117}2118if (code === 'bad_request') {2119return {2120type: FetchResponseKind.Failed,2121modelRequestId,2122failKind: ChatFailKind.Unknown,2123reason: `Request Failed: ${code} ${message}`,2124};2125}21262127// internal_error, session_expired, or any unknown code2128return {2129type: FetchResponseKind.Failed,2130modelRequestId,2131failKind: ChatFailKind.ServerError,2132reason: `Request Failed: ${code} ${message || 'WebSocket server error'}`,2133};2134}21352136private _handleWebSocketError(event: CAPIWebSocketErrorEvent, requestId: string, serverRequestId: string | undefined, isAuto: boolean): ChatFetchError {2137const { code, message } = event.error;2138const capiError = { code, message };2139const codePrefix = code.split(':')[0];21402141if (codePrefix === 'rate_limited' || codePrefix === 'user_model_rate_limited' || codePrefix === 'user_global_rate_limited' || codePrefix === 'integration_rate_limited' || codePrefix === 'model_overloaded' || codePrefix === 'agent_mode_limit_exceeded') {2142return { type: ChatFetchResponseType.RateLimited, reason: message, requestId, serverRequestId, retryAfter: undefined, rateLimitKey: '', isAuto, capiError };2143}2144if (codePrefix === 'quota_exceeded' || codePrefix === 'free_quota_exceeded' || codePrefix === 'overage_limit_reached' || codePrefix === 'billing_not_configured') {2145return { type: ChatFetchResponseType.QuotaExceeded, reason: message, requestId, serverRequestId, capiError, retryAfter: undefined };2146}2147if (code === 'content_filter') {2148return { type: ChatFetchResponseType.PromptFiltered, reason: message, category: FilterReason.Prompt, requestId, serverRequestId };2149}2150if (code === 'not_found') {2151return { type: ChatFetchResponseType.NotFound, reason: message, requestId, serverRequestId };2152}2153if (code === 'bad_request') {2154return { type: ChatFetchResponseType.BadRequest, reason: message, requestId, serverRequestId };2155}21562157// internal_error, session_expired, service_unavailable, request_too_large, or any unknown code2158return { type: ChatFetchResponseType.Failed, reason: `Request Failed: ${code} ${message || 'WebSocket server error'}`, requestId, serverRequestId };2159}21602161private scrubErrorDetail(errorDetail: string, usernameToScrub: string | undefined) {2162if (usernameToScrub) {2163const regex = new RegExp(escapeRegExpCharacters(usernameToScrub), 'ig');2164errorDetail = errorDetail.replaceAll(regex, '<login>');2165}2166return errorDetail.replaceAll(/(?<=logged in as )(?!<login>)[^\s]+/ig, '!<login>!'); // marking fallback with !2167}2168}21692170/**2171* Validates a chat request payload to ensure it is valid2172* @param params The params being sent in the chat request2173* @returns Whether the chat payload is valid2174*/2175function isValidChatPayload(messages: Raw.ChatMessage[], postOptions: OptionalChatRequestParams, endpoint: IChatEndpoint, configurationService: IConfigurationService, experimentationService: IExperimentationService): { isValid: boolean; reason: string } {2176if (messages.length === 0) {2177return { isValid: false, reason: asUnexpected('No messages provided') };2178}2179if (postOptions?.max_tokens && postOptions?.max_tokens < 1) {2180return { isValid: false, reason: asUnexpected('Invalid response token parameter') };2181}21822183const functionNamePattern = /^[a-zA-Z0-9_-]+$/;2184if (2185postOptions?.functions?.some(f => !f.name.match(functionNamePattern)) ||2186postOptions?.function_call?.name && !postOptions.function_call.name.match(functionNamePattern)2187) {2188return { isValid: false, reason: asUnexpected('Function names must match ^[a-zA-Z0-9_-]+$') };2189}21902191if (postOptions?.tools && postOptions.tools.length > HARD_TOOL_LIMIT && !endpoint.supportsToolSearch) {2192return { isValid: false, reason: `Tool limit exceeded (${postOptions.tools.length}/${HARD_TOOL_LIMIT}). Click "Configure Tools" in the chat input to disable ${postOptions.tools.length - HARD_TOOL_LIMIT} tools and retry.` };2193}21942195return { isValid: true, reason: '' };2196}21972198function asUnexpected(reason: string) {2199return `Prompt failed validation with the reason: ${reason}. Please file an issue.`;2200}22012202export function createTelemetryData(chatEndpointInfo: IChatEndpoint, location: ChatLocation, headerRequestId: string) {2203return TelemetryData.createAndMarkAsIssued({2204endpoint: 'completions',2205engineName: 'chat',2206uiKind: ChatLocation.toString(location),2207headerRequestId2208});2209}22102211/**2212* WARNING: The value that is returned from this function drives the disablement of RAI for full-file rewrite requests2213* in Copilot Edits, Copilot Chat, Agent Mode, and Inline Chat.2214* If your chat location generates full-file rewrite requests and you are unsure if changing something here will cause problems, please talk to @roblourens2215*/22162217export function locationToIntent(location: ChatLocation): string {2218switch (location) {2219case ChatLocation.Panel:2220return 'conversation-panel';2221case ChatLocation.Editor:2222return 'conversation-inline';2223case ChatLocation.EditingSession:2224return 'conversation-edits';2225case ChatLocation.Notebook:2226return 'conversation-notebook';2227case ChatLocation.Terminal:2228return 'conversation-terminal';2229case ChatLocation.Other:2230return 'conversation-other';2231case ChatLocation.Agent:2232return 'conversation-agent';2233case ChatLocation.ResponsesProxy:2234return 'responses-proxy';2235case ChatLocation.MessagesProxy:2236return 'messages-proxy';2237}2238}223922402241