CoCalc -- chatMLFetcher.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts
¹³³⁹⁹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { Raw } from '@vscode/prompt-tsx';
7
import type { OpenAI } from 'openai';
8
import type { CancellationToken } from 'vscode';
9
import { IAuthenticationService } from '../../../platform/authentication/common/authentication';
10
import { CopilotToken } from '../../../platform/authentication/common/copilotToken';
11
import { FetchStreamRecorder, IChatMLFetcher, IFetchMLOptions, Source } from '../../../platform/chat/common/chatMLFetcher';
12
import { IChatQuotaService } from '../../../platform/chat/common/chatQuotaService';
13
import { ChatFetchError, ChatFetchResponseType, ChatFetchRetriableError, ChatLocation, ChatResponse, ChatResponses, RESPONSE_CONTAINED_NO_CHOICES } from '../../../platform/chat/common/commonTypes';
14
import { IConversationOptions } from '../../../platform/chat/common/conversationOptions';
15
import { getTextPart, toTextParts } from '../../../platform/chat/common/globalStringUtils';
16
import { IInteractionService } from '../../../platform/chat/common/interactionService';
17
import { ConfigKey, HARD_TOOL_LIMIT, IConfigurationService } from '../../../platform/configuration/common/configurationService';
18
import { ICAPIClientService } from '../../../platform/endpoint/common/capiClient';
19
import { isAutoModel } from '../../../platform/endpoint/node/autoChatEndpoint';
20
import { getResponsesApiCompactionThresholdFromBody, OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging, sendCompletionOutputTelemetry } from '../../../platform/endpoint/node/responsesApi';
21
import { collectSingleLineErrorMessage, ILogService } from '../../../platform/log/common/logService';
22
import { FinishedCallback, getRequestId, IResponseDelta, OptionalChatRequestParams, RequestId } from '../../../platform/networking/common/fetch';
23
import { FetcherId, IFetcherService, Response } from '../../../platform/networking/common/fetcherService';
24
import { IBackgroundRequestOptions, IChatEndpoint, IEndpointBody, ISubagentRequestOptions, postRequest, stringifyUrlOrRequestMetadata } from '../../../platform/networking/common/networking';
25
import { CAPIChatMessage, ChatCompletion, FilterReason, FinishedCompletionReason, rawMessageToCAPI } from '../../../platform/networking/common/openai';
26
import { sendEngineMessagesTelemetry } from '../../../platform/networking/node/chatStream';
27
import { CAPIWebSocketErrorEvent, IChatWebSocketManager, isCAPIWebSocketError } from '../../../platform/networking/node/chatWebSocketManager';
28
import { sendCommunicationErrorTelemetry } from '../../../platform/networking/node/stream';
29
import { ChatFailKind, ChatRequestCanceled, ChatRequestFailed, ChatResults, FetchResponseKind } from '../../../platform/openai/node/fetch';
30
import { CopilotChatAttr, emitInferenceDetailsEvent, GenAiAttr, GenAiMetrics, GenAiOperationName, GenAiProviderName, normalizeProviderMessages, StdAttr, toSystemInstructions, toToolDefinitions, truncateForOTel } from '../../../platform/otel/common/index';
31
import { IOTelService, ISpanHandle, SpanKind, SpanStatusCode } from '../../../platform/otel/common/otelService';
32
import { IRequestLogger } from '../../../platform/requestLogger/common/requestLogger';
33
import { getCurrentCapturingToken } from '../../../platform/requestLogger/node/requestLogger';
34
import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
35
import { ITelemetryService, TelemetryProperties } from '../../../platform/telemetry/common/telemetry';
36
import { TelemetryData } from '../../../platform/telemetry/common/telemetryData';
37
import { isEncryptedThinkingDelta } from '../../../platform/thinking/common/thinking';
38
import { calculateLineRepetitionStats, isRepetitive } from '../../../util/common/anomalyDetection';
39
import { ErrorUtils } from '../../../util/common/errors';
40
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
41
import { isCancellationError } from '../../../util/vs/base/common/errors';
42
import { Emitter } from '../../../util/vs/base/common/event';
43
import { Disposable } from '../../../util/vs/base/common/lifecycle';
44
import { escapeRegExpCharacters } from '../../../util/vs/base/common/strings';
45
import { generateUuid } from '../../../util/vs/base/common/uuid';
46
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
47
import { isBYOKModel } from '../../byok/node/openAIEndpoint';
48
import { EXTENSION_ID } from '../../common/constants';
49
import { IPowerService } from '../../power/common/powerService';
50
import { ChatMLFetcherTelemetrySender as Telemetry } from './chatMLFetcherTelemetry';
51

52
export interface IMadeChatRequestEvent {
53
	readonly messages: Raw.ChatMessage[];
54
	readonly model: string;
55
	readonly source?: Source;
56
	readonly tokenCount?: number;
57
}
58

59
export abstract class AbstractChatMLFetcher extends Disposable implements IChatMLFetcher {
60

61
	declare _serviceBrand: undefined;
62

63
	constructor(
64
		protected readonly options: IConversationOptions,
65
	) {
66
		super();
67
	}
68

69
	protected preparePostOptions(requestOptions: OptionalChatRequestParams): OptionalChatRequestParams {
70
		return {
71
			temperature: this.options.temperature,
72
			top_p: this.options.topP,
73
			// we disallow `stream=false` because we don't support non-streamed response
74
			...requestOptions,
75
			stream: true
76
		};
77
	}
78

79
	protected readonly _onDidMakeChatMLRequest = this._register(new Emitter<IMadeChatRequestEvent>());
80
	readonly onDidMakeChatMLRequest = this._onDidMakeChatMLRequest.event;
81

82
	public async fetchOne(opts: IFetchMLOptions, token: CancellationToken): Promise<ChatResponse> {
83
		const resp = await this.fetchMany({
84
			...opts,
85
			requestOptions: { ...opts.requestOptions, n: 1 }
86
		}, token);
87
		if (resp.type === ChatFetchResponseType.Success) {
88
			return { ...resp, value: resp.value[0] };
89
		}
90
		return resp;
91
	}
92

93
	/**
94
	 * Note: the returned array of strings may be less than `n` (e.g., in case there were errors during streaming)
95
	 */
96
	public abstract fetchMany(opts: IFetchMLOptions, token: CancellationToken): Promise<ChatResponses>;
97
}
98

99
export class ChatMLFetcherImpl extends AbstractChatMLFetcher {
100

101
	private static readonly _maxConsecutiveWebSocketFallbacks = 3;
102

103
	/**
104
	 * Delays (in ms) between connectivity check attempts before retrying a failed request.
105
	 * Configurable for testing purposes.
106
	 */
107
	public connectivityCheckDelays = [1000, 10000, 10000];
108

109
	/**
110
	 * Tracks consecutive WebSocket request failures where the HTTP retry succeeded.
111
	 * After {@link _maxConsecutiveWebSocketFallbacks} such failures, WebSocket requests are disabled entirely.
112
	 */
113
	private _consecutiveWebSocketRetryFallbacks = 0;
114

115
	constructor(
116
		@IFetcherService private readonly _fetcherService: IFetcherService,
117
		@ITelemetryService private readonly _telemetryService: ITelemetryService,
118
		@IRequestLogger private readonly _requestLogger: IRequestLogger,
119
		@ILogService private readonly _logService: ILogService,
120
		@IAuthenticationService private readonly _authenticationService: IAuthenticationService,
121
		@IInteractionService private readonly _interactionService: IInteractionService,
122
		@IChatQuotaService private readonly _chatQuotaService: IChatQuotaService,
123
		@ICAPIClientService private readonly _capiClientService: ICAPIClientService,
124
		@IConversationOptions options: IConversationOptions,
125
		@IConfigurationService private readonly _configurationService: IConfigurationService,
126
		@IExperimentationService private readonly _experimentationService: IExperimentationService,
127
		@IPowerService private readonly _powerService: IPowerService,
128
		@IInstantiationService private readonly _instantiationService: IInstantiationService,
129
		@IChatWebSocketManager private readonly _webSocketManager: IChatWebSocketManager,
130
		@IOTelService private readonly _otelService: IOTelService,
131
	) {
132
		super(options);
133
	}
134

135
	/**
136
	 * Note: the returned array of strings may be less than `n` (e.g., in case there were errors during streaming)
137
	 */
138
	public async fetchMany(opts: IFetchMLOptions, token: CancellationToken): Promise<ChatResponses> {
139
		let { debugName, endpoint: chatEndpoint, finishedCb, location, messages, requestOptions, source, telemetryProperties, userInitiatedRequest, requestKindOptions, conversationId, turnId, useWebSocket, ignoreStatefulMarker } = opts;
140
		if (useWebSocket && this._consecutiveWebSocketRetryFallbacks >= ChatMLFetcherImpl._maxConsecutiveWebSocketFallbacks) {
141
			this._logService.debug(`[ChatWebSocketManager] Disabling WebSocket for request due to ${this._consecutiveWebSocketRetryFallbacks} consecutive WebSocket failures with successful HTTP fallback.`);
142
			useWebSocket = false;
143
			ignoreStatefulMarker = true;
144
		}
145
		if (!telemetryProperties) {
146
			telemetryProperties = {};
147
		}
148

149
		if (!telemetryProperties.messageSource) {
150
			telemetryProperties.messageSource = debugName;
151
		}
152

153
		const transport = useWebSocket ? 'websocket' : 'http';
154

155
		// TODO @lramos15 telemetry should not drive request ids
156
		const ourRequestId = telemetryProperties.requestId ?? telemetryProperties.messageId ?? generateUuid();
157

158
		const maxResponseTokens = chatEndpoint.maxOutputTokens;
159
		if (!requestOptions?.prediction) {
160
			requestOptions = { max_tokens: maxResponseTokens, ...requestOptions };
161
		}
162
		// Avoid sending a prediction with no content as this will yield a 400 Bad Request
163
		if (!requestOptions.prediction?.content) {
164
			delete requestOptions['prediction'];
165
		}
166

167
		const postOptions = this.preparePostOptions(requestOptions);
168
		const requestBody = chatEndpoint.createRequestBody({
169
			...opts,
170
			ignoreStatefulMarker,
171
			requestId: ourRequestId,
172
			postOptions
173
		});
174

175

176
		const baseTelemetry = TelemetryData.createAndMarkAsIssued({
177
			...telemetryProperties,
178
			...(conversationId ? { conversationId } : {}),
179
			headerRequestId: ourRequestId,
180
			baseModel: chatEndpoint.model,
181
			uiKind: ChatLocation.toString(location)
182
		});
183

184
		const pendingLoggedChatRequest = this._requestLogger.logChatRequest(debugName, chatEndpoint, {
185
			messages: opts.messages,
186
			model: chatEndpoint.model,
187
			ourRequestId,
188
			location: opts.location,
189
			body: requestBody,
190
			ignoreStatefulMarker,
191
			isConversationRequest: opts.isConversationRequest,
192
			customMetadata: opts.customMetadata
193
		});
194
		let tokenCount = -1;
195
		const streamRecorder = new FetchStreamRecorder(finishedCb);
196
		const enableRetryOnError = opts.enableRetryOnError ?? opts.enableRetryOnFilter;
197
		const canRetryOnce = opts.canRetryOnceWithoutRollback ?? !(opts.enableRetryOnFilter || opts.enableRetryOnError);
198
		let usernameToScrub: string | undefined;
199
		let actualFetcher: FetcherId | undefined;
200
		let actualBytesReceived: number | undefined;
201
		let actualStatusCode: number | undefined;
202
		let suspendEventSeen: boolean | undefined;
203
		let resumeEventSeen: boolean | undefined;
204
		let otelInferenceSpan: ISpanHandle | undefined;
205
		try {
206
			let response: ChatResults | ChatRequestFailed | ChatRequestCanceled;
207
			const payloadValidationResult = isValidChatPayload(opts.messages, postOptions, chatEndpoint, this._configurationService, this._experimentationService);
208
			if (!payloadValidationResult.isValid) {
209
				response = {
210
					type: FetchResponseKind.Failed,
211
					modelRequestId: undefined,
212
					failKind: ChatFailKind.ValidationFailed,
213
					reason: payloadValidationResult.reason,
214
				};
215
			} else {
216
				let tokenCountPromise: Promise<number> | undefined;
217
				const countTokens = () => tokenCountPromise ??= chatEndpoint.acquireTokenizer().countMessagesTokens(messages);
218
				const copilotToken = await this._authenticationService.getCopilotToken();
219
				usernameToScrub = copilotToken.username;
220

221
				const fetchResult = await this._fetchAndStreamChat(
222
					chatEndpoint,
223
					requestBody,
224
					baseTelemetry,
225
					streamRecorder.callback,
226
					requestOptions.secretKey,
227
					copilotToken,
228
					opts.location,
229
					ourRequestId,
230
					postOptions.n,
231
					token,
232
					countTokens,
233
					userInitiatedRequest,
234
					useWebSocket,
235
					turnId,
236
					conversationId,
237
					telemetryProperties,
238
					opts.useFetcher,
239
					canRetryOnce,
240
					requestKindOptions,
241
					opts.summarizedAtRoundId,
242
					opts.modeChanged,
243
				);
244
				response = fetchResult.result;
245
				actualFetcher = fetchResult.fetcher;
246
				actualBytesReceived = fetchResult.bytesReceived;
247
				actualStatusCode = fetchResult.statusCode;
248
				suspendEventSeen = fetchResult.suspendEventSeen;
249
				resumeEventSeen = fetchResult.resumeEventSeen;
250
				otelInferenceSpan = fetchResult.otelSpan;
251
				// Tag span with debug name so orphaned spans (title, progressMessages, etc.) are identifiable
252
				otelInferenceSpan?.setAttribute(GenAiAttr.AGENT_NAME, debugName);
253

254
				// Extract and set structured prompt sections for the debug panel
255
				if (otelInferenceSpan) {
256
					// Support both Chat Completions API (messages) and Responses API (input) formats
257
					const capiMessages = (requestBody.messages ?? requestBody.input) as ReadonlyArray<{ role?: string; content?: string | unknown[] }> | undefined;
258
					// User request: last user-role message
259
					const userMessages = capiMessages?.filter(m => m.role === 'user');
260
					const lastUserMsg = userMessages?.[userMessages.length - 1];
261
					if (lastUserMsg?.content) {
262
						const userContent = typeof lastUserMsg.content === 'string'
263
							? lastUserMsg.content
264
							: JSON.stringify(lastUserMsg.content);
265
						otelInferenceSpan.setAttribute(CopilotChatAttr.USER_REQUEST, truncateForOTel(userContent));
266
					}
267
					// System instructions — check messages array, top-level system (Anthropic), or instructions (Responses API)
268
					const systemMsg = capiMessages?.find(m => m.role === 'system');
269
					const systemContent = systemMsg?.content
270
						?? (requestBody as Record<string, unknown>).system
271
						?? (requestBody as Record<string, unknown>).instructions;
272
					if (systemContent) {
273
						let systemText: string;
274
						if (typeof systemContent === 'string') {
275
							systemText = systemContent;
276
						} else if (Array.isArray(systemContent)) {
277
							// Anthropic format: array of content blocks — extract text only,
278
							// dropping metadata like cache_control so the value is stable across turns.
279
							systemText = (systemContent as Array<{ text?: string }>)
280
								.map(b => b.text ?? '')
281
								.join('\n');
282
						} else {
283
							systemText = JSON.stringify(systemContent);
284
						}
285
						// Format as OTel GenAI system instruction JSON schema
286
						const systemInstructions = toSystemInstructions(systemText);
287
						if (systemInstructions) {
288
							otelInferenceSpan.setAttribute(GenAiAttr.SYSTEM_INSTRUCTIONS, JSON.stringify(systemInstructions));
289
						}
290
					}
291
				}
292

293
				// Always capture full request content for the debug panel
294
				if (otelInferenceSpan) {
295
					const capiMessages = (requestBody.messages ?? requestBody.input) as ReadonlyArray<Record<string, unknown>> | undefined;
296
					if (capiMessages) {
297
						// Normalize provider-specific content (Anthropic tool_use/tool_result, OpenAI tool messages) to OTel schema
298
						otelInferenceSpan.setAttribute(GenAiAttr.INPUT_MESSAGES, truncateForOTel(JSON.stringify(normalizeProviderMessages(capiMessages))));
299
					}
300
					// Tool definitions: emit on every chat span so trace viewers can render the
301
					// tool catalog per LLM call (issue #299934). Includes `parameters` per
302
					// OTel GenAI semantic conventions (issue #300318).
303
					const toolDefs = toToolDefinitions(requestBody.tools);
304
					if (toolDefs) {
305
						otelInferenceSpan.setAttribute(GenAiAttr.TOOL_DEFINITIONS, truncateForOTel(JSON.stringify(toolDefs)));
306
					}
307
				}
308
				tokenCount = await countTokens();
309
				const extensionId = source?.extensionId ?? EXTENSION_ID;
310
				this._onDidMakeChatMLRequest.fire({
311
					messages,
312
					model: chatEndpoint.model,
313
					source: { extensionId },
314
					tokenCount
315
				});
316
			}
317
			const timeToFirstToken = Date.now() - baseTelemetry.issuedTime;
318
			pendingLoggedChatRequest?.markTimeToFirstToken(timeToFirstToken);
319
			switch (response.type) {
320
				case FetchResponseKind.Success: {
321
					const result = await this.processSuccessfulResponse(response, messages, requestBody, ourRequestId, maxResponseTokens, tokenCount, timeToFirstToken, streamRecorder, baseTelemetry, chatEndpoint, userInitiatedRequest, transport, actualFetcher, actualBytesReceived, suspendEventSeen, resumeEventSeen);
322

323
					// Handle FilteredRetry case with augmented messages
324
					if (result.type === ChatFetchResponseType.FilteredRetry) {
325

326
						if (opts.enableRetryOnFilter) {
327
							streamRecorder.callback('', 0, { text: '', retryReason: result.category });
328

329
							const filteredContent = result.value[0];
330
							if (filteredContent) {
331
								const retryMessage = (result.category === FilterReason.Copyright) ?
332
									`The previous response (copied below) was filtered due to being too similar to existing public code. Please suggest something similar in function that does not match public code. Here's the previous response: ${filteredContent}\n\n` :
333
									`The previous response (copied below) was filtered due to triggering our content safety filters, which looks for hateful, self-harm, sexual, or violent content. Please suggest something similar in content that does not trigger these filters. Here's the previous response: ${filteredContent}\n\n`;
334
								const augmentedMessages: Raw.ChatMessage[] = [
335
									...messages,
336
									{
337
										role: Raw.ChatRole.User,
338
										content: toTextParts(retryMessage)
339
									}
340
								];
341

342
								// Retry with augmented messages
343
								const retryResult = await this.fetchMany({
344
									...opts,
345
									debugName: 'retry-' + debugName,
346
									messages: augmentedMessages,
347
									finishedCb,
348
									location,
349
									endpoint: chatEndpoint,
350
									source,
351
									requestOptions,
352
									userInitiatedRequest: false, // do not mark the retry as user initiated
353
									telemetryProperties: { ...telemetryProperties, retryAfterFilterCategory: result.category ?? 'uncategorized' },
354
									enableRetryOnFilter: false,
355
									canRetryOnceWithoutRollback: false,
356
									enableRetryOnError,
357
								}, token);
358

359
								pendingLoggedChatRequest?.resolve(retryResult, streamRecorder.deltas);
360
								if (retryResult.type === ChatFetchResponseType.Success) {
361
									return retryResult;
362
								}
363
							}
364
						}
365

366
						return {
367
							type: ChatFetchResponseType.Filtered,
368
							category: result.category,
369
							reason: 'Response got filtered.',
370
							requestId: result.requestId,
371
							serverRequestId: result.serverRequestId
372
						};
373
					}
374

375
					pendingLoggedChatRequest?.resolve(result, streamRecorder.deltas);
376

377
					// Record OTel token usage metrics if available
378
					if (result.type === ChatFetchResponseType.Success && result.usage) {
379
						const metricAttrs = {
380
							operationName: GenAiOperationName.CHAT,
381
							providerName: GenAiProviderName.GITHUB,
382
							requestModel: chatEndpoint.model,
383
							responseModel: result.resolvedModel,
384
						};
385
						if (result.usage.prompt_tokens) {
386
							GenAiMetrics.recordTokenUsage(this._otelService, result.usage.prompt_tokens, 'input', metricAttrs);
387
						}
388
						if (result.usage.completion_tokens) {
389
							GenAiMetrics.recordTokenUsage(this._otelService, result.usage.completion_tokens, 'output', metricAttrs);
390
						}
391

392
						// Set token usage and response details on the chat span before ending it
393
						otelInferenceSpan?.setAttributes({
394
							[GenAiAttr.USAGE_INPUT_TOKENS]: result.usage.prompt_tokens ?? 0,
395
							[GenAiAttr.USAGE_OUTPUT_TOKENS]: result.usage.completion_tokens ?? 0,
396
							[GenAiAttr.RESPONSE_MODEL]: result.resolvedModel ?? chatEndpoint.model,
397
							[GenAiAttr.RESPONSE_ID]: result.requestId,
398
							[GenAiAttr.RESPONSE_FINISH_REASONS]: ['stop'],
399
							...(result.usage.prompt_tokens_details?.cached_tokens
400
								? { [GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS]: result.usage.prompt_tokens_details.cached_tokens }
401
								: {}),
402
							...(result.usage.prompt_tokens_details?.cache_creation_input_tokens
403
								? { [GenAiAttr.USAGE_CACHE_CREATION_INPUT_TOKENS]: result.usage.prompt_tokens_details.cache_creation_input_tokens }
404
								: {}),
405
							[CopilotChatAttr.TIME_TO_FIRST_TOKEN]: timeToFirstToken,
406
							...(result.serverRequestId ? { [CopilotChatAttr.SERVER_REQUEST_ID]: result.serverRequestId } : {}),
407
							...(result.usage.completion_tokens_details?.reasoning_tokens
408
								? { [GenAiAttr.USAGE_REASONING_TOKENS]: result.usage.completion_tokens_details.reasoning_tokens }
409
								: {}),
410
						});
411
					}
412
					// Always capture response content for the debug panel
413
					if (otelInferenceSpan && result.type === ChatFetchResponseType.Success) {
414
						const responseText = streamRecorder.deltas.map(d => d.text).join('');
415
						const toolCalls = streamRecorder.deltas
416
							.filter(d => d.copilotToolCalls?.length)
417
							.flatMap(d => d.copilotToolCalls!.map(tc => ({
418
								type: 'tool_call' as const, id: tc.id, name: tc.name, arguments: tc.arguments
419
							})));
420
						const parts: Array<{ type: string; content?: string; id?: string; name?: string; arguments?: unknown }> = [];
421
						if (responseText) {
422
							parts.push({ type: 'text', content: responseText });
423
						}
424
						parts.push(...toolCalls);
425
						if (parts.length > 0) {
426
							otelInferenceSpan.setAttribute(GenAiAttr.OUTPUT_MESSAGES, truncateForOTel(JSON.stringify([{ role: 'assistant', parts }])));
427
						}
428
						// Capture reasoning/thinking text if present
429
						const hasThinking = streamRecorder.deltas.some(d => d.thinking);
430
						if (hasThinking) {
431
							const thinkingTexts = streamRecorder.deltas
432
								.filter(d => d.thinking && !isEncryptedThinkingDelta(d.thinking) && d.thinking.text)
433
								.map(d => {
434
									const t = d.thinking!;
435
									if ('encrypted' in t) { return ''; }
436
									return Array.isArray(t.text) ? t.text.join('') : (t.text ?? '');
437
								});
438
							const reasoningText = thinkingTexts.join('');
439
							otelInferenceSpan.setAttribute(CopilotChatAttr.REASONING_CONTENT, truncateForOTel(reasoningText || '[encrypted]'));
440
						}
441
					}
442

443
					// Emit OTel inference details event BEFORE ending the span
444
					// so the log record inherits the active trace context
445
					emitInferenceDetailsEvent(
446
						this._otelService,
447
						{
448
							model: chatEndpoint.model,
449
							temperature: requestOptions?.temperature,
450
							maxTokens: requestOptions?.max_tokens,
451
						},
452
						result.type === ChatFetchResponseType.Success ? {
453
							id: result.requestId,
454
							model: result.resolvedModel,
455
							finishReasons: ['stop'],
456
							inputTokens: result.usage?.prompt_tokens,
457
							outputTokens: result.usage?.completion_tokens,
458
						} : undefined,
459
					);
460

461
					otelInferenceSpan?.end();
462
					otelInferenceSpan = undefined;
463

464
					// Record OTel time-to-first-token metric
465
					if (timeToFirstToken > 0) {
466
						GenAiMetrics.recordTimeToFirstToken(this._otelService, chatEndpoint.model, timeToFirstToken / 1000);
467
					}
468

469
					if (useWebSocket && result.type === ChatFetchResponseType.Success) {
470
						this._consecutiveWebSocketRetryFallbacks = 0;
471
					}
472

473
					return result;
474
				}
475
				case FetchResponseKind.Canceled:
476
					Telemetry.sendCancellationTelemetry(
477
						this._telemetryService,
478
						{
479
							source: telemetryProperties.messageSource ?? 'unknown',
480
							requestId: ourRequestId,
481
							model: chatEndpoint.model,
482
							apiType: chatEndpoint.apiType,
483
							transport,
484
							associatedRequestId: telemetryProperties.associatedRequestId,
485
							retryAfterError: telemetryProperties.retryAfterError,
486
							retryAfterErrorGitHubRequestId: telemetryProperties.retryAfterErrorGitHubRequestId,
487
							connectivityTestError: telemetryProperties.connectivityTestError,
488
							connectivityTestErrorGitHubRequestId: telemetryProperties.connectivityTestErrorGitHubRequestId,
489
							retryAfterFilterCategory: telemetryProperties.retryAfterFilterCategory,
490
							fetcher: actualFetcher,
491
							suspendEventSeen,
492
							resumeEventSeen,
493
						},
494
						{
495
							totalTokenMax: chatEndpoint.modelMaxPromptTokens ?? -1,
496
							promptTokenCount: tokenCount,
497
							tokenCountMax: maxResponseTokens,
498
							timeToFirstToken,
499
							timeToFirstTokenEmitted: (baseTelemetry && streamRecorder.firstTokenEmittedTime) ? streamRecorder.firstTokenEmittedTime - baseTelemetry.issuedTime : -1,
500
							timeToCancelled: Date.now() - baseTelemetry.issuedTime,
501
							isVisionRequest: this.filterImageMessages(messages) ? 1 : -1,
502
							isBYOK: isBYOKModel(chatEndpoint),
503
							isAuto: isAutoModel(chatEndpoint),
504
							bytesReceived: actualBytesReceived,
505
							issuedTime: baseTelemetry.issuedTime,
506
						});
507
					pendingLoggedChatRequest?.resolveWithCancelation();
508
					// Set canceled status on OTel span
509
					otelInferenceSpan?.setAttributes({
510
						[GenAiAttr.RESPONSE_FINISH_REASONS]: ['cancelled'],
511
						[CopilotChatAttr.CANCELED]: true,
512
					});
513
					otelInferenceSpan?.end();
514
					otelInferenceSpan = undefined;
515
					return this.processCanceledResponse(response, ourRequestId, streamRecorder, telemetryProperties);
516
				case FetchResponseKind.Failed: {
517
					const processed = this.processFailedResponse(response, ourRequestId, isAutoModel(chatEndpoint) === 1);
518
					// Retry on server errors based on configured status codes
519
					const retryServerErrorStatusCodes = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.RetryServerErrorStatusCodes, this._experimentationService);
520
					const statusCodesToRetry = retryServerErrorStatusCodes
521
						.split(',')
522
						.map(s => parseInt(s.trim(), 10));
523
					const retryAfterServerError = enableRetryOnError && actualStatusCode !== undefined && statusCodesToRetry.includes(actualStatusCode);
524
					const retryWithoutWebSocket = enableRetryOnError && useWebSocket && (response.failKind === ChatFailKind.ServerError || response.failKind === ChatFailKind.Unknown);
525
					if (retryAfterServerError || retryWithoutWebSocket) {
526
						const { retryResult } = await this._retryAfterError({
527
							opts,
528
							processed,
529
							telemetryProperties,
530
							requestBody,
531
							tokenCount,
532
							maxResponseTokens,
533
							timeToError: timeToFirstToken,
534
							transport,
535
							actualFetcher,
536
							bytesReceived: actualBytesReceived,
537
							baseTelemetry,
538
							streamRecorder,
539
							retryReason: 'server_error',
540
							debugNamePrefix: 'retry-server-error-',
541
							pendingLoggedChatRequest,
542
							token,
543
							usernameToScrub,
544
							suspendEventSeen,
545
							resumeEventSeen,
546
						});
547
						if (retryResult) {
548
							return retryResult;
549
						}
550
					}
551
					Telemetry.sendResponseErrorTelemetry(this._telemetryService, {
552
						processed,
553
						telemetryProperties,
554
						chatEndpointInfo: chatEndpoint,
555
						requestBody,
556
						tokenCount,
557
						maxResponseTokens,
558
						timeToFirstToken,
559
						isVisionRequest: this.filterImageMessages(messages),
560
						transport,
561
						fetcher: actualFetcher,
562
						bytesReceived: actualBytesReceived,
563
						issuedTime: baseTelemetry.issuedTime,
564
						wasRetried: false,
565
						suspendEventSeen,
566
						resumeEventSeen,
567
					});
568
					pendingLoggedChatRequest?.resolve(processed);
569
					return processed;
570
				}
571
			}
572
		} catch (err) {
573
			// End OTel inference span on error if not already ended
574
			if (otelInferenceSpan) {
575
				otelInferenceSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));
576
				otelInferenceSpan.setAttribute(StdAttr.ERROR_TYPE, err instanceof Error ? err.constructor.name : 'Error');
577
				otelInferenceSpan.setAttribute(GenAiAttr.RESPONSE_FINISH_REASONS, ['error']);
578
				otelInferenceSpan.recordException(err);
579
				otelInferenceSpan.end();
580
			}
581
			const timeToError = Date.now() - baseTelemetry.issuedTime;
582
			if (err.fetcherId) {
583
				actualFetcher = err.fetcherId;
584
			}
585
			if (err.suspendEventSeen) {
586
				suspendEventSeen = true;
587
			}
588
			if (err.resumeEventSeen) {
589
				resumeEventSeen = true;
590
			}
591
			const processed = this.processError(err, ourRequestId, err.gitHubRequestId, usernameToScrub, isAutoModel(chatEndpoint) === 1);
592
			const retryNetworkError = enableRetryOnError && processed.type === ChatFetchResponseType.NetworkError && this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.RetryNetworkErrors, this._experimentationService);
593
			const retryWithoutWebSocket = enableRetryOnError && useWebSocket && (processed.type === ChatFetchResponseType.NetworkError || processed.type === ChatFetchResponseType.Failed);
594
			if (retryNetworkError || retryWithoutWebSocket) {
595
				const { retryResult, connectivityTestError, connectivityTestErrorGitHubRequestId } = await this._retryAfterError({
596
					opts,
597
					processed,
598
					telemetryProperties,
599
					requestBody,
600
					tokenCount,
601
					maxResponseTokens,
602
					timeToError,
603
					transport,
604
					actualFetcher,
605
					bytesReceived: err.bytesReceived,
606
					baseTelemetry,
607
					streamRecorder,
608
					retryReason: 'network_error',
609
					debugNamePrefix: 'retry-error-',
610
					pendingLoggedChatRequest,
611
					token,
612
					usernameToScrub,
613
					suspendEventSeen,
614
					resumeEventSeen,
615
				});
616
				if (retryResult) {
617
					return retryResult;
618
				}
619
				telemetryProperties = { ...telemetryProperties, connectivityTestError, connectivityTestErrorGitHubRequestId };
620
			}
621
			if (processed.type === ChatFetchResponseType.Canceled) {
622
				Telemetry.sendCancellationTelemetry(
623
					this._telemetryService,
624
					{
625
						source: telemetryProperties.messageSource ?? 'unknown',
626
						requestId: ourRequestId,
627
						model: chatEndpoint.model,
628
						apiType: chatEndpoint.apiType,
629
						transport,
630
						associatedRequestId: telemetryProperties.associatedRequestId,
631
						retryAfterError: telemetryProperties.retryAfterError,
632
						retryAfterErrorGitHubRequestId: telemetryProperties.retryAfterErrorGitHubRequestId,
633
						connectivityTestError: telemetryProperties.connectivityTestError,
634
						connectivityTestErrorGitHubRequestId: telemetryProperties.connectivityTestErrorGitHubRequestId,
635
						retryAfterFilterCategory: telemetryProperties.retryAfterFilterCategory,
636
						fetcher: actualFetcher,
637
						suspendEventSeen,
638
						resumeEventSeen,
639
					},
640
					{
641
						totalTokenMax: chatEndpoint.modelMaxPromptTokens ?? -1,
642
						promptTokenCount: tokenCount,
643
						tokenCountMax: maxResponseTokens,
644
						timeToFirstToken: undefined,
645
						timeToCancelled: timeToError,
646
						isVisionRequest: this.filterImageMessages(messages) ? 1 : -1,
647
						isBYOK: isBYOKModel(chatEndpoint),
648
						isAuto: isAutoModel(chatEndpoint),
649
						bytesReceived: err.bytesReceived,
650
						issuedTime: baseTelemetry.issuedTime,
651
					}
652
				);
653
			} else {
654
				Telemetry.sendResponseErrorTelemetry(this._telemetryService, {
655
					processed,
656
					telemetryProperties,
657
					chatEndpointInfo: chatEndpoint,
658
					requestBody,
659
					tokenCount,
660
					maxResponseTokens,
661
					timeToFirstToken: timeToError,
662
					isVisionRequest: this.filterImageMessages(messages),
663
					transport,
664
					fetcher: actualFetcher,
665
					bytesReceived: err.bytesReceived,
666
					issuedTime: baseTelemetry.issuedTime,
667
					wasRetried: false,
668
					suspendEventSeen,
669
					resumeEventSeen,
670
				});
671
			}
672
			pendingLoggedChatRequest?.resolve(processed);
673
			return processed;
674
		}
675
	}
676

677
	private async _checkNetworkConnectivity(useFetcher?: FetcherId): Promise<{ retryRequest: boolean; connectivityTestError?: string; connectivityTestErrorGitHubRequestId?: string }> {
678
		// Ping CAPI to check network connectivity before retrying
679
		const delays = this.connectivityCheckDelays;
680
		let connectivityTestError: string | undefined = undefined;
681
		let connectivityTestErrorGitHubRequestId: string | undefined = undefined;
682
		for (const delay of delays) {
683
			this._logService.info(`Waiting ${delay}ms before pinging CAPI to check network connectivity...`);
684
			await new Promise(resolve => setTimeout(resolve, delay));
685
			try {
686
				const isGHEnterprise = this._capiClientService.dotcomAPIURL !== 'https://api.github.com';
687
				const url = this._capiClientService.capiPingURL;
688
				const headers = await this._getAuthHeaders(isGHEnterprise, url);
689
				const res = await this._fetcherService.fetch(url, {
690
					headers,
691
					useFetcher,
692
					callSite: 'capi-ping',
693
				});
694
				if (res.status >= 200 && res.status < 300) {
695
					this._logService.info(`CAPI ping successful, proceeding with chat request retry...`);
696
					return { retryRequest: true, connectivityTestError, connectivityTestErrorGitHubRequestId };
697
				} else {
698
					connectivityTestError = `Status ${res.status}: ${res.statusText}`;
699
					connectivityTestErrorGitHubRequestId = res.headers.get('x-github-request-id') ?? '';
700
					this._logService.info(`CAPI ping returned status ${res.status}, retrying ping...`);
701
				}
702
			} catch (err) {
703
				connectivityTestError = collectSingleLineErrorMessage(err, true);
704
				connectivityTestErrorGitHubRequestId = undefined; // no response headers yet
705
				this._logService.info(`CAPI ping failed with error, retrying ping: ${connectivityTestError}`);
706
			}
707
		}
708
		return { retryRequest: false, connectivityTestError, connectivityTestErrorGitHubRequestId };
709
	}
710

711
	private async _getAuthHeaders(isGHEnterprise: boolean, url: string) {
712
		const authHeaders: Record<string, string> = {};
713
		if (isGHEnterprise) {
714
			let token = '';
715
			if (url === this._capiClientService.dotcomAPIURL) {
716
				token = this._authenticationService.anyGitHubSession?.accessToken || '';
717
			} else {
718
				try {
719
					token = (await this._authenticationService.getCopilotToken()).token;
720
				} catch (_err) {
721
					// Ignore error
722
					token = '';
723
				}
724
			}
725
			authHeaders['Authorization'] = `Bearer ${token}`;
726
		}
727
		return authHeaders;
728
	}
729

730
	private async _retryAfterError(params: {
731
		opts: IFetchMLOptions;
732
		processed: ChatFetchError;
733
		telemetryProperties: TelemetryProperties;
734
		requestBody: IEndpointBody;
735
		tokenCount: number;
736
		maxResponseTokens: number;
737
		timeToError: number;
738
		transport: string;
739
		actualFetcher: FetcherId | undefined;
740
		bytesReceived: number | undefined;
741
		baseTelemetry: TelemetryData;
742
		streamRecorder: FetchStreamRecorder;
743
		retryReason: 'network_error' | 'server_error';
744
		debugNamePrefix: string;
745
		pendingLoggedChatRequest: ReturnType<IRequestLogger['logChatRequest']>;
746
		token: CancellationToken;
747
		usernameToScrub: string | undefined;
748
		suspendEventSeen: boolean | undefined;
749
		resumeEventSeen: boolean | undefined;
750
	}): Promise<{ retryResult?: ChatResponses; connectivityTestError?: string; connectivityTestErrorGitHubRequestId?: string }> {
751
		const {
752
			opts,
753
			processed,
754
			telemetryProperties,
755
			requestBody,
756
			tokenCount,
757
			maxResponseTokens,
758
			timeToError,
759
			transport,
760
			actualFetcher,
761
			bytesReceived,
762
			baseTelemetry,
763
			streamRecorder,
764
			retryReason,
765
			debugNamePrefix,
766
			pendingLoggedChatRequest,
767
			token,
768
			usernameToScrub,
769
			suspendEventSeen,
770
			resumeEventSeen,
771
		} = params;
772

773
		// net::ERR_NETWORK_CHANGED: https://github.com/microsoft/vscode/issues/260297
774
		const isNetworkChangedError = ['darwin', 'linux'].includes(process.platform) && processed.reason.indexOf('net::ERR_NETWORK_CHANGED') !== -1;
775
		// When Electron's network process crashes, all requests through it fail permanently.
776
		// Fall back to node-fetch which bypasses Electron's network stack entirely.
777
		const fallbackEnabled = this._configurationService.getExperimentBasedConfig(
778
			ConfigKey.TeamInternal.FallbackNodeFetchOnNetworkProcessCrash, this._experimentationService);
779
		const isNetworkProcessCrash = processed.type === ChatFetchResponseType.NetworkError
780
			&& processed.isNetworkProcessCrash === true
781
			&& fallbackEnabled;
782
		const useFetcher = (isNetworkChangedError || isNetworkProcessCrash) ? 'node-fetch' : opts.useFetcher;
783
		this._logService.info(`Retrying chat request with ${useFetcher || 'default'} fetcher after: ${processed.reasonDetail || processed.reason}`);
784
		const connectivity = await this._checkNetworkConnectivity(useFetcher);
785
		const connectivityTestError = connectivity.connectivityTestError ? this.scrubErrorDetail(connectivity.connectivityTestError, usernameToScrub) : undefined;
786
		const connectivityTestErrorGitHubRequestId = connectivity.connectivityTestErrorGitHubRequestId;
787
		if (!connectivity.retryRequest) {
788
			this._logService.info(`Not retrying chat request as network connectivity could not be re-established.`);
789
			return { connectivityTestError, connectivityTestErrorGitHubRequestId };
790
		}
791

792
		Telemetry.sendResponseErrorTelemetry(
793
			this._telemetryService,
794
			{
795
				processed,
796
				telemetryProperties,
797
				chatEndpointInfo: opts.endpoint,
798
				requestBody,
799
				tokenCount,
800
				maxResponseTokens,
801
				timeToFirstToken: timeToError,
802
				isVisionRequest: this.filterImageMessages(opts.messages),
803
				transport,
804
				fetcher: actualFetcher,
805
				bytesReceived,
806
				issuedTime: baseTelemetry.issuedTime,
807
				wasRetried: true,
808
				suspendEventSeen,
809
				resumeEventSeen,
810
			},
811
		);
812

813
		streamRecorder.callback('', 0, { text: '', retryReason });
814

815
		const retryResult = await this.fetchMany({
816
			...opts,
817
			useWebSocket: false,
818
			ignoreStatefulMarker: opts.useWebSocket || opts.ignoreStatefulMarker,
819
			debugName: debugNamePrefix + opts.debugName,
820
			userInitiatedRequest: false, // do not mark the retry as user initiated
821
			telemetryProperties: {
822
				...telemetryProperties,
823
				retryAfterError: processed.reasonDetail || processed.reason,
824
				retryAfterErrorGitHubRequestId: processed.serverRequestId,
825
				connectivityTestError,
826
				connectivityTestErrorGitHubRequestId,
827
			},
828
			enableRetryOnError: false,
829
			useFetcher,
830
		}, token);
831

832
		pendingLoggedChatRequest?.resolve(retryResult, streamRecorder.deltas);
833
		if (opts.useWebSocket && retryResult.type === ChatFetchResponseType.Success) {
834
			this._consecutiveWebSocketRetryFallbacks++;
835
			this._logService.info(`[ChatWebSocketManager] WebSocket request failed with successful HTTP fallback (${this._consecutiveWebSocketRetryFallbacks} consecutive).`);
836
			if (opts.conversationId) {
837
				// Closing here because the retry is transparent.
838
				this._webSocketManager.closeConnection(opts.conversationId);
839
			}
840
		}
841
		return { retryResult, connectivityTestError, connectivityTestErrorGitHubRequestId };
842
	}
843

844
	private async _fetchAndStreamChat(
845
		chatEndpointInfo: IChatEndpoint,
846
		request: IEndpointBody,
847
		baseTelemetryData: TelemetryData,
848
		finishedCb: FinishedCallback,
849
		secretKey: string | undefined,
850
		copilotToken: CopilotToken,
851
		location: ChatLocation,
852
		ourRequestId: string,
853
		nChoices: number | undefined,
854
		cancellationToken: CancellationToken,
855
		countTokens: () => Promise<number>,
856
		userInitiatedRequest?: boolean,
857
		useWebSocket?: boolean,
858
		turnId?: string,
859
		conversationId?: string,
860
		telemetryProperties?: TelemetryProperties | undefined,
861
		useFetcher?: FetcherId,
862
		canRetryOnce?: boolean,
863
		requestKindOptions?: IBackgroundRequestOptions | ISubagentRequestOptions,
864
		summarizedAtRoundId?: string,
865
		modeChanged?: boolean,
866
	): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled; fetcher?: FetcherId; bytesReceived?: number; statusCode?: number; suspendEventSeen?: boolean; resumeEventSeen?: boolean; otelSpan?: ISpanHandle }> {
867
		const isPowerSaveBlockerEnabled = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.ChatRequestPowerSaveBlocker, this._experimentationService);
868
		const blockerHandle = isPowerSaveBlockerEnabled && location !== ChatLocation.Other ? this._powerService.acquirePowerSaveBlocker() : undefined;
869

870
		let suspendEventSeen = false;
871
		let resumeEventSeen = false;
872

873
		const suspendListener = this._powerService.onDidSuspend(() => {
874
			suspendEventSeen = true;
875
			this._logService.info(`System suspended during streaming request ${ourRequestId} (${ChatLocation.toString(location)})`);
876
		});
877

878
		const resumeListener = this._powerService.onDidResume(() => {
879
			resumeEventSeen = true;
880
			this._logService.info(`System resumed during streaming request ${ourRequestId} (${ChatLocation.toString(location)})`);
881
		});
882

883
		try {
884
			const fetchResult = await this._doFetchAndStreamChat(
885
				chatEndpointInfo,
886
				request,
887
				baseTelemetryData,
888
				finishedCb,
889
				secretKey,
890
				copilotToken,
891
				location,
892
				ourRequestId,
893
				nChoices,
894
				cancellationToken,
895
				countTokens,
896
				userInitiatedRequest,
897
				useWebSocket,
898
				turnId,
899
				conversationId,
900
				telemetryProperties,
901
				useFetcher,
902
				canRetryOnce,
903
				requestKindOptions,
904
				summarizedAtRoundId,
905
				modeChanged,
906
			);
907
			return { ...fetchResult, suspendEventSeen: suspendEventSeen || undefined, resumeEventSeen: resumeEventSeen || undefined };
908
		} catch (err) {
909
			if (suspendEventSeen) {
910
				err.suspendEventSeen = true;
911
			}
912
			if (resumeEventSeen) {
913
				err.resumeEventSeen = true;
914
			}
915
			throw err;
916
		} finally {
917
			suspendListener.dispose();
918
			resumeListener.dispose();
919
			blockerHandle?.dispose();
920
		}
921
	}
922

923
	private async _doFetchAndStreamChat(
924
		chatEndpointInfo: IChatEndpoint,
925
		request: IEndpointBody,
926
		baseTelemetryData: TelemetryData,
927
		finishedCb: FinishedCallback,
928
		secretKey: string | undefined,
929
		copilotToken: CopilotToken,
930
		location: ChatLocation,
931
		ourRequestId: string,
932
		nChoices: number | undefined,
933
		cancellationToken: CancellationToken,
934
		countTokens: () => Promise<number>,
935
		userInitiatedRequest?: boolean,
936
		useWebSocket?: boolean,
937
		turnId?: string,
938
		conversationId?: string,
939
		telemetryProperties?: TelemetryProperties | undefined,
940
		useFetcher?: FetcherId,
941
		canRetryOnce?: boolean,
942
		requestKindOptions?: IBackgroundRequestOptions | ISubagentRequestOptions,
943
		summarizedAtRoundId?: string,
944
		modeChanged?: boolean,
945
	): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled; fetcher?: FetcherId; bytesReceived?: number; statusCode?: number; otelSpan?: ISpanHandle }> {
946

947
		if (cancellationToken.isCancellationRequested) {
948
			return { result: { type: FetchResponseKind.Canceled, reason: 'before fetch request' } };
949
		}
950

951
		// OTel inference span for this LLM call
952
		const serverAddress = typeof chatEndpointInfo.urlOrRequestMetadata === 'string'
953
			? (() => { try { return new URL(chatEndpointInfo.urlOrRequestMetadata).hostname; } catch { return undefined; } })()
954
			: undefined;
955
		const chatSessionId = getCurrentCapturingToken()?.chatSessionId;
956
		const parentChatSessionId = getCurrentCapturingToken()?.parentChatSessionId;
957
		const debugLogLabel = getCurrentCapturingToken()?.debugLogLabel;
958
		const otelSpan = this._otelService.startSpan(`chat ${chatEndpointInfo.model}`, {
959
			kind: SpanKind.CLIENT,
960
			attributes: {
961
				[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,
962
				[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.GITHUB,
963
				[GenAiAttr.REQUEST_MODEL]: chatEndpointInfo.model,
964
				[GenAiAttr.CONVERSATION_ID]: telemetryProperties?.requestId ?? ourRequestId,
965
				[GenAiAttr.REQUEST_MAX_TOKENS]: request.max_tokens ?? request.max_output_tokens ?? request.max_completion_tokens ?? 2048,
966
				...(request.temperature !== undefined ? { [GenAiAttr.REQUEST_TEMPERATURE]: request.temperature } : {}),
967
				...(request.top_p !== undefined ? { [GenAiAttr.REQUEST_TOP_P]: request.top_p } : {}),
968
				[CopilotChatAttr.MAX_PROMPT_TOKENS]: chatEndpointInfo.modelMaxPromptTokens,
969
				...(serverAddress ? { [StdAttr.SERVER_ADDRESS]: serverAddress } : {}),
970
				...(conversationId ? { [CopilotChatAttr.SESSION_ID]: conversationId } : {}),
971
				...(chatSessionId ? { [CopilotChatAttr.CHAT_SESSION_ID]: chatSessionId } : {}),
972
				...(parentChatSessionId ? { [CopilotChatAttr.PARENT_CHAT_SESSION_ID]: parentChatSessionId } : {}),
973
				...(debugLogLabel ? { [CopilotChatAttr.DEBUG_LOG_LABEL]: debugLogLabel } : {}),
974
			},
975
		});
976
		const otelStartTime = Date.now();
977

978
		try {
979

980
			this._logService.debug(`modelMaxPromptTokens ${chatEndpointInfo.modelMaxPromptTokens}`);
981
			this._logService.debug(`modelMaxResponseTokens ${request.max_tokens ?? 2048}`);
982
			this._logService.debug(`chat model ${chatEndpointInfo.model}`);
983

984
			secretKey ??= copilotToken.token;
985
			if (!secretKey) {
986
				// If no key is set we error
987
				const urlOrRequestMetadata = stringifyUrlOrRequestMetadata(chatEndpointInfo.urlOrRequestMetadata);
988
				this._logService.error(`Failed to send request to ${urlOrRequestMetadata} due to missing key`);
989
				sendCommunicationErrorTelemetry(this._telemetryService, `Failed to send request to ${urlOrRequestMetadata} due to missing key`);
990
				return {
991
					result: {
992
						type: FetchResponseKind.Failed,
993
						modelRequestId: undefined,
994
						failKind: ChatFailKind.TokenExpiredOrInvalid,
995
						reason: 'key is missing'
996
					}
997
				};
998
			}
999

1000
			// WebSocket path: use persistent WebSocket connection for Responses API endpoints
1001
			if (useWebSocket && turnId && conversationId) {
1002
				const wsResult = await this._doFetchViaWebSocket(
1003
					chatEndpointInfo,
1004
					request,
1005
					baseTelemetryData,
1006
					finishedCb,
1007
					secretKey,
1008
					location,
1009
					ourRequestId,
1010
					turnId,
1011
					conversationId,
1012
					cancellationToken,
1013
					countTokens,
1014
					userInitiatedRequest,
1015
					telemetryProperties,
1016
					requestKindOptions,
1017
					summarizedAtRoundId,
1018
					modeChanged,
1019
				);
1020
				return { ...wsResult, otelSpan };
1021
			}
1022

1023
			const httpResult = await this._doFetchViaHttp(
1024
				chatEndpointInfo,
1025
				request,
1026
				baseTelemetryData,
1027
				finishedCb,
1028
				secretKey,
1029
				location,
1030
				ourRequestId,
1031
				nChoices,
1032
				cancellationToken,
1033
				userInitiatedRequest,
1034
				telemetryProperties,
1035
				useFetcher,
1036
				canRetryOnce,
1037
				requestKindOptions,
1038
			);
1039
			return { ...httpResult, otelSpan };
1040

1041
		} catch (err) {
1042
			otelSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));
1043
			otelSpan.setAttribute(StdAttr.ERROR_TYPE, err instanceof Error ? err.constructor.name : 'Error');
1044
			otelSpan.recordException(err);
1045
			throw err;
1046
		} finally {
1047
			const durationSec = (Date.now() - otelStartTime) / 1000;
1048
			GenAiMetrics.recordOperationDuration(this._otelService, durationSec, {
1049
				operationName: GenAiOperationName.CHAT,
1050
				providerName: GenAiProviderName.GITHUB,
1051
				requestModel: chatEndpointInfo.model,
1052
			});
1053
			// Span is NOT ended here — caller (fetchMany) will set token attributes and end it
1054
		}
1055
	}
1056

1057
	/**
1058
	 * Sends a chat request via a persistent WebSocket connection instead of HTTP POST.
1059
	 * Events are the same Responses API streaming events, processed by OpenAIResponsesProcessor.
1060
	 */
1061
	private async _doFetchViaWebSocket(
1062
		chatEndpointInfo: IChatEndpoint,
1063
		request: IEndpointBody,
1064
		baseTelemetryData: TelemetryData,
1065
		finishedCb: FinishedCallback,
1066
		secretKey: string,
1067
		location: ChatLocation,
1068
		ourRequestId: string,
1069
		turnId: string,
1070
		conversationId: string,
1071
		cancellationToken: CancellationToken,
1072
		countTokens: () => Promise<number>,
1073
		userInitiatedRequest: boolean | undefined,
1074
		telemetryProperties: TelemetryProperties | undefined,
1075
		requestKindOptions: IBackgroundRequestOptions | ISubagentRequestOptions | undefined,
1076
		summarizedAtRoundId: string | undefined,
1077
		modeChanged: boolean | undefined,
1078
	): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled }> {
1079
		const intent = locationToIntent(location);
1080
		const agentInteractionType = requestKindOptions?.kind === 'subagent' ?
1081
			'conversation-subagent' :
1082
			requestKindOptions?.kind === 'background' ?
1083
				'conversation-background' :
1084
				intent === 'conversation-agent' ? intent : undefined;
1085
		const additionalHeaders: Record<string, string> = {
1086
			'Authorization': `Bearer ${secretKey}`,
1087
			'X-Request-Id': ourRequestId,
1088
			'OpenAI-Intent': intent,
1089
			'X-GitHub-Api-Version': '2025-05-01',
1090
			'X-Interaction-Id': this._interactionService.interactionId,
1091
			...(chatEndpointInfo.getExtraHeaders ? chatEndpointInfo.getExtraHeaders(location) : {}),
1092
		};
1093
		if (agentInteractionType) {
1094
			additionalHeaders['X-Interaction-Type'] = agentInteractionType;
1095
			additionalHeaders['X-Agent-Task-Id'] = ourRequestId;
1096
		}
1097
		if (request.messages?.some((m: CAPIChatMessage) => Array.isArray(m.content) ? m.content.some(c => 'image_url' in c) : false) && chatEndpointInfo.supportsVision) {
1098
			additionalHeaders['Copilot-Vision-Request'] = 'true';
1099
		}
1100
		const connection = this._webSocketManager.getOrCreateConnection(conversationId, additionalHeaders, ourRequestId);
1101
		try {
1102
			await connection.connect();
1103
		} catch (err) {
1104
			(err as any).gitHubRequestId = connection.gitHubRequestId;
1105
			throw err;
1106
		}
1107

1108
		// Generate unique ID to link input and output messages
1109
		const modelCallId = generateUuid();
1110

1111
		const telemetryData = TelemetryData.createAndMarkAsIssued({
1112
			endpoint: 'completions',
1113
			engineName: 'chat',
1114
			uiKind: ChatLocation.toString(location),
1115
			transport: 'websocket',
1116
			...{ ...telemetryProperties, modelCallId },
1117
		}, {
1118
			maxTokenWindow: chatEndpointInfo.modelMaxPromptTokens
1119
		});
1120

1121
		const modelRequestId = getRequestId(connection.responseHeaders);
1122
		// Request id changes over the lifetime of the connection.
1123
		modelRequestId.headerRequestId = ourRequestId;
1124
		telemetryData.extendWithRequestId(modelRequestId);
1125
		if (modelRequestId.serverExperiments) {
1126
			this._telemetryService.setSharedProperty('capi.assignmentcontext', modelRequestId.serverExperiments);
1127
		}
1128

1129
		for (const [key, value] of Object.entries(request)) {
1130
			if (key === 'messages' || key === 'input') {
1131
				continue;
1132
			} // Skip messages (PII)
1133
			telemetryData.properties[`request.option.${key}`] = JSON.stringify(value) ?? 'undefined';
1134
		}
1135
		this._telemetryService.sendGHTelemetryEvent('request.sent', telemetryData.properties, telemetryData.measurements);
1136

1137
		const requestStart = Date.now();
1138
		const handle = connection.sendRequest(request, { userInitiated: !!userInitiatedRequest, turnId, requestId: ourRequestId, model: chatEndpointInfo.model, countTokens, tokenCountMax: chatEndpointInfo.maxOutputTokens, modelMaxPromptTokens: chatEndpointInfo.modelMaxPromptTokens, summarizedAtRoundId, modeChanged }, cancellationToken);
1139

1140
		const extendedBaseTelemetryData = baseTelemetryData.extendedBy({ modelCallId });
1141
		const processor = this._instantiationService.createInstance(OpenAIResponsesProcessor, extendedBaseTelemetryData, this._telemetryService, modelRequestId.headerRequestId, modelRequestId.gitHubRequestId, modelRequestId.serverExperiments, getResponsesApiCompactionThresholdFromBody(request));
1142

1143
		// Set up streaming first so event listeners are registered before we
1144
		// await the first event — AsyncIterableObject runs its executor eagerly.
1145
		const chatCompletions = new AsyncIterableObject<ChatCompletion>(async emitter => {
1146
			try {
1147
				await new Promise<void>((resolve, reject) => {
1148
					handle.onEvent(event => {
1149
						const completion = processor.push(event, finishedCb);
1150
						if (completion) {
1151
							sendCompletionOutputTelemetry(this._telemetryService, this._logService, completion, extendedBaseTelemetryData);
1152
							emitter.emitOne(completion);
1153
						}
1154

1155
						if (event.type === 'response.completed') {
1156
							const snapshots = (event as any).copilot_quota_snapshots;
1157
							if (snapshots && typeof snapshots === 'object') {
1158
								this._chatQuotaService.processQuotaSnapshots(snapshots);
1159
							}
1160
						}
1161
					});
1162

1163
					handle.onCAPIError(event => {
1164
						// Mid-stream CAPI error — throw so the caller can handle it
1165
						const error = new Error(`${event.error.message} (${event.error.code})`);
1166
						(error as any).gitHubRequestId = modelRequestId.gitHubRequestId;
1167
						(error as any).capiWebSocketError = event;
1168
						reject(error);
1169
					});
1170

1171
					handle.onError(error => {
1172
						(error as any).gitHubRequestId = modelRequestId.gitHubRequestId;
1173
						if (isCancellationError(error)) {
1174
							reject(error);
1175
							return;
1176
						}
1177

1178
						const warningTelemetry = telemetryData.extendedBy({ error: error.message });
1179
						this._telemetryService.sendGHTelemetryEvent('request.shownWarning', warningTelemetry.properties, warningTelemetry.measurements);
1180

1181
						const totalTimeMs = Date.now() - requestStart;
1182
						telemetryData.measurements.totalTimeMs = totalTimeMs;
1183
						telemetryData.properties.error = error.message;
1184

1185
						this._logService.debug(`request.error: [websocket], took ${totalTimeMs} ms`);
1186
						this._telemetryService.sendGHTelemetryEvent('request.error', telemetryData.properties, telemetryData.measurements);
1187

1188
						reject(error);
1189
					});
1190

1191
					handle.done.then(resolve, reject);
1192
				});
1193

1194
				const totalTimeMs = Date.now() - requestStart;
1195
				telemetryData.measurements.totalTimeMs = totalTimeMs;
1196
				this._logService.debug(`request.response: [websocket], took ${totalTimeMs} ms`);
1197
				this._telemetryService.sendGHTelemetryEvent('request.response', telemetryData.properties, telemetryData.measurements);
1198
			} finally {
1199
				let messagesToLog = request.messages;
1200
				if ((!messagesToLog || messagesToLog.length === 0) && (request as OpenAI.Responses.ResponseCreateParams).input) {
1201
					try {
1202
						const rawMessages = responseApiInputToRawMessagesForLogging(request as OpenAI.Responses.ResponseCreateParams);
1203
						messagesToLog = rawMessageToCAPI(rawMessages);
1204
					} catch (e) {
1205
						this._logService.error(`Failed to convert Response API input to messages for telemetry:`, e);
1206
						messagesToLog = [];
1207
					}
1208
				}
1209
				sendEngineMessagesTelemetry(this._telemetryService, messagesToLog ?? [], telemetryData, false, this._logService);
1210
			}
1211
		});
1212

1213
		// Wait for the first event to determine the response type,
1214
		// analogous to checking HTTP status code before streaming the body.
1215
		const firstEvent = await handle.firstEvent;
1216

1217
		if (cancellationToken.isCancellationRequested) {
1218
			return { result: { type: FetchResponseKind.Canceled, reason: 'after first WebSocket event' } };
1219
		}
1220

1221
		// CAPI error before any stream events — return Failed like HTTP non-200
1222
		if (isCAPIWebSocketError(firstEvent)) {
1223
			const totalTimeMs = Date.now() - requestStart;
1224
			telemetryData.measurements.totalTimeMs = totalTimeMs;
1225
			telemetryData.properties.error = `${firstEvent.error.message} (${firstEvent.error.code})`;
1226
			this._logService.debug(`request.error: [websocket capi error], took ${totalTimeMs} ms`);
1227
			this._telemetryService.sendGHTelemetryEvent('request.error', telemetryData.properties, telemetryData.measurements);
1228
			return { result: await this._handleWebSocketCAPIError(firstEvent, modelRequestId) };
1229
		}
1230

1231
		// Clear stale quota-exceeded state if the server accepted the request.
1232
		if (this._authenticationService.copilotToken?.isFreeUser && this._authenticationService.copilotToken?.isChatQuotaExceeded) {
1233
			this._authenticationService.resetCopilotToken();
1234
		}
1235

1236
		return {
1237
			result: {
1238
				type: FetchResponseKind.Success,
1239
				chatCompletions,
1240
			}
1241
		};
1242
	}
1243

1244
	private async _doFetchViaHttp(
1245
		chatEndpointInfo: IChatEndpoint,
1246
		request: IEndpointBody,
1247
		baseTelemetryData: TelemetryData,
1248
		finishedCb: FinishedCallback,
1249
		secretKey: string,
1250
		location: ChatLocation,
1251
		ourRequestId: string,
1252
		nChoices: number | undefined,
1253
		cancellationToken: CancellationToken,
1254
		userInitiatedRequest: boolean | undefined,
1255
		telemetryProperties: TelemetryProperties | undefined,
1256
		useFetcher: FetcherId | undefined,
1257
		canRetryOnce: boolean | undefined,
1258
		requestKindOptions: IBackgroundRequestOptions | ISubagentRequestOptions | undefined,
1259
	): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled; fetcher?: FetcherId; bytesReceived?: number; statusCode?: number }> {
1260
		// Generate unique ID to link input and output messages
1261
		const modelCallId = generateUuid();
1262

1263
		const response = await this._fetchWithInstrumentation(
1264
			chatEndpointInfo,
1265
			ourRequestId,
1266
			request,
1267
			secretKey,
1268
			location,
1269
			cancellationToken,
1270
			userInitiatedRequest,
1271
			{ ...telemetryProperties, modelCallId },
1272
			useFetcher,
1273
			canRetryOnce,
1274
			requestKindOptions,
1275
		);
1276

1277
		if (cancellationToken.isCancellationRequested) {
1278
			try {
1279
				// Destroy the stream so that the server is hopefully notified we don't want any more data
1280
				// and can cancel/forget about the request itself.
1281
				await response!.body.destroy();
1282
			} catch (e) {
1283
				this._logService.error(e, `Error destroying stream`);
1284
				this._telemetryService.sendGHTelemetryException(e, 'Error destroying stream');
1285
			}
1286
			return {
1287
				result: { type: FetchResponseKind.Canceled, reason: 'after fetch request' },
1288
				fetcher: response.fetcher,
1289
				bytesReceived: response.bytesReceived
1290
			};
1291
		}
1292

1293
		if (response.status === 200 && this._authenticationService.copilotToken?.isFreeUser && this._authenticationService.copilotToken?.isChatQuotaExceeded) {
1294
			this._authenticationService.resetCopilotToken();
1295
		}
1296

1297
		if (response.status !== 200) {
1298
			const telemetryData = createTelemetryData(chatEndpointInfo, location, ourRequestId);
1299
			this._logService.info('Request ID for failed request: ' + ourRequestId);
1300
			return {
1301
				result: await this._handleError(telemetryData, response, ourRequestId),
1302
				fetcher: response.fetcher,
1303
				bytesReceived: response.bytesReceived,
1304
				statusCode: response.status
1305
			};
1306
		}
1307

1308
		// Extend baseTelemetryData with modelCallId for output messages
1309
		const extendedBaseTelemetryData = baseTelemetryData.extendedBy({ modelCallId });
1310

1311
		let chatCompletions;
1312
		const gitHubRequestId = response.headers.get('x-github-request-id') ?? '';
1313
		try {
1314
			const completions = await chatEndpointInfo.processResponseFromChatEndpoint(
1315
				this._telemetryService,
1316
				this._logService,
1317
				response,
1318
				nChoices ?? /* OpenAI's default */ 1,
1319
				finishedCb,
1320
				extendedBaseTelemetryData,
1321
				cancellationToken,
1322
				location,
1323
			);
1324
			chatCompletions = new AsyncIterableObject<ChatCompletion>(async emitter => {
1325
				try {
1326
					for await (const completion of completions) {
1327
						emitter.emitOne(completion);
1328
					}
1329
				} catch (err) {
1330
					err.fetcherId = response.fetcher;
1331
					err.gitHubRequestId = gitHubRequestId;
1332
					err.bytesReceived = response.bytesReceived;
1333
					throw err;
1334
				}
1335
			});
1336
		} catch (err) {
1337
			err.fetcherId = response.fetcher;
1338
			err.gitHubRequestId = gitHubRequestId;
1339
			err.bytesReceived = response.bytesReceived;
1340
			throw err;
1341
		}
1342

1343
		// CAPI will return us a Copilot Edits Session Header which is our token to using the speculative decoding endpoint
1344
		// We should store this in the auth service for easy use later
1345
		if (response.headers.get('Copilot-Edits-Session')) {
1346
			this._authenticationService.speculativeDecodingEndpointToken = response.headers.get('Copilot-Edits-Session') ?? undefined;
1347
		}
1348

1349
		this._chatQuotaService.processQuotaHeaders(response.headers);
1350

1351
		return {
1352
			result: {
1353
				type: FetchResponseKind.Success,
1354
				chatCompletions,
1355
			},
1356
			fetcher: response.fetcher,
1357
			bytesReceived: response.bytesReceived
1358
		};
1359
	}
1360

1361
	private async _fetchWithInstrumentation(
1362
		chatEndpoint: IChatEndpoint,
1363
		ourRequestId: string,
1364
		request: IEndpointBody,
1365
		secretKey: string,
1366
		location: ChatLocation,
1367
		cancellationToken: CancellationToken,
1368
		userInitiatedRequest?: boolean,
1369
		telemetryProperties?: TelemetryProperties,
1370
		useFetcher?: FetcherId,
1371
		canRetryOnce?: boolean,
1372
		requestKindOptions?: IBackgroundRequestOptions | ISubagentRequestOptions,
1373
	): Promise<Response> {
1374

1375
		// If request contains an image, we include this header.
1376
		const additionalHeaders: Record<string, string> = {
1377
			'X-Interaction-Id': this._interactionService.interactionId,
1378
			'X-Initiator': userInitiatedRequest ? 'user' : 'agent', // Agent = a system request / not the primary user query.
1379
		};
1380
		if (request.messages?.some((m: CAPIChatMessage) => Array.isArray(m.content) ? m.content.some(c => 'image_url' in c) : false) && chatEndpoint.supportsVision) {
1381
			additionalHeaders['Copilot-Vision-Request'] = 'true';
1382
		}
1383
		const telemetryData = TelemetryData.createAndMarkAsIssued({
1384
			endpoint: 'completions',
1385
			engineName: 'chat',
1386
			uiKind: ChatLocation.toString(location),
1387
			transport: 'http',
1388
			...telemetryProperties // This includes the modelCallId from fetchAndStreamChat
1389
		}, {
1390
			maxTokenWindow: chatEndpoint.modelMaxPromptTokens
1391
		});
1392

1393
		for (const [key, value] of Object.entries(request)) {
1394
			if (key === 'messages' || key === 'input') {
1395
				continue;
1396
			} // Skip messages (PII)
1397
			telemetryData.properties[`request.option.${key}`] = JSON.stringify(value) ?? 'undefined';
1398
		}
1399

1400
		// The request ID we are passed in is sent in the request to the proxy, and included in our pre-request telemetry.
1401
		// We hope (but do not rely on) that the model will use the same ID in the response, allowing us to correlate
1402
		// the request and response.
1403
		telemetryData.properties['headerRequestId'] = ourRequestId;
1404

1405
		this._telemetryService.sendGHTelemetryEvent('request.sent', telemetryData.properties, telemetryData.measurements);
1406

1407
		const requestStart = Date.now();
1408
		const intent = locationToIntent(location);
1409

1410
		// Wrap the Promise with success/error callbacks so we can log/measure it
1411
		return this._instantiationService.invokeFunction(postRequest, {
1412
			endpointOrUrl: chatEndpoint,
1413
			secretKey,
1414
			intent,
1415
			requestId: ourRequestId,
1416
			body: request,
1417
			additionalHeaders,
1418
			cancelToken: cancellationToken,
1419
			useFetcher,
1420
			canRetryOnce,
1421
			location,
1422
			requestKindOptions,
1423
		}).then(response => {
1424
			const apim = response.headers.get('apim-request-id');
1425
			if (apim) {
1426
				this._logService.debug(`APIM request id: ${apim}`);
1427
			}
1428
			const ghRequestId = response.headers.get('x-github-request-id');
1429
			if (ghRequestId) {
1430
				this._logService.debug(`GH request id: ${ghRequestId}`);
1431
			}
1432
			// This ID is hopefully the one the same as ourRequestId, but it is not guaranteed.
1433
			// If they are different then we will override the original one we set in telemetryData above.
1434
			const modelRequestId = getRequestId(response.headers);
1435
			// Preserve ourRequestId as headerRequestId if the server didn't echo x-request-id
1436
			modelRequestId.headerRequestId = modelRequestId.headerRequestId || ourRequestId;
1437
			telemetryData.extendWithRequestId(modelRequestId);
1438
			if (modelRequestId.serverExperiments) {
1439
				this._telemetryService.setSharedProperty('capi.assignmentcontext', modelRequestId.serverExperiments);
1440
			}
1441

1442
			// TODO: Add response length (requires parsing)
1443
			const totalTimeMs = Date.now() - requestStart;
1444
			telemetryData.measurements.totalTimeMs = totalTimeMs;
1445

1446
			this._logService.debug(`request.response: [${stringifyUrlOrRequestMetadata(chatEndpoint.urlOrRequestMetadata)}], took ${totalTimeMs} ms`);
1447

1448
			this._telemetryService.sendGHTelemetryEvent('request.response', telemetryData.properties, telemetryData.measurements);
1449

1450
			return response;
1451
		})
1452
			.catch(error => {
1453
				if (this._fetcherService.isAbortError(error)) {
1454
					// If we cancelled a network request, we don't want to log a `request.error`
1455
					throw error;
1456
				}
1457

1458
				const warningTelemetry = telemetryData.extendedBy({ error: 'Network exception' });
1459
				this._telemetryService.sendGHTelemetryEvent('request.shownWarning', warningTelemetry.properties, warningTelemetry.measurements);
1460

1461
				telemetryData.properties.code = String(error.code ?? '');
1462
				telemetryData.properties.errno = String(error.errno ?? '');
1463
				telemetryData.properties.message = String(error.message ?? '');
1464
				telemetryData.properties.type = String(error.type ?? '');
1465

1466
				const totalTimeMs = Date.now() - requestStart;
1467
				telemetryData.measurements.totalTimeMs = totalTimeMs;
1468

1469
				this._logService.debug(`request.response: [${stringifyUrlOrRequestMetadata(chatEndpoint.urlOrRequestMetadata)}] took ${totalTimeMs} ms`);
1470

1471
				this._telemetryService.sendGHTelemetryEvent('request.error', telemetryData.properties, telemetryData.measurements);
1472

1473
				throw error;
1474
			})
1475
			.finally(() => {
1476
				let messagesToLog = request.messages;
1477

1478
				// For Response API (has input but no messages), convert input to messages for logging
1479
				if ((!messagesToLog || messagesToLog.length === 0) && (request as OpenAI.Responses.ResponseCreateParams).input) {
1480
					try {
1481
						const rawMessages = responseApiInputToRawMessagesForLogging(request as OpenAI.Responses.ResponseCreateParams);
1482
						messagesToLog = rawMessageToCAPI(rawMessages);
1483
					} catch (e) {
1484
						this._logService.error(`Failed to convert Response API input to messages for telemetry:`, e);
1485
						messagesToLog = [];
1486
					}
1487
				}
1488

1489
				sendEngineMessagesTelemetry(this._telemetryService, messagesToLog ?? [], telemetryData, false, this._logService);
1490
			});
1491
	}
1492

1493
	private async _handleError(
1494
		telemetryData: TelemetryData,
1495
		response: Response,
1496
		requestId: string
1497
	): Promise<ChatRequestFailed> {
1498
		const modelRequestIdObj = getRequestId(response.headers);
1499
		requestId = modelRequestIdObj.headerRequestId || requestId;
1500
		modelRequestIdObj.headerRequestId = requestId;
1501

1502
		telemetryData.properties.error = `Response status was ${response.status}`;
1503
		telemetryData.properties.status = String(response.status);
1504
		this._telemetryService.sendGHTelemetryEvent('request.shownWarning', telemetryData.properties, telemetryData.measurements);
1505

1506
		const text = await response.text();
1507
		let jsonData: Record<string, any> | undefined;
1508
		try {
1509
			jsonData = JSON.parse(text);
1510
			jsonData = jsonData?.error ?? jsonData; // Extract nested error object if it exists
1511
		} catch {
1512
			// JSON parsing failed, it's not json content.
1513
		}
1514

1515
		const reasonNoText = `Server error: ${response.status}`;
1516
		const reason = `${reasonNoText} ${text}`;
1517
		this._logService.error(reason);
1518

1519
		if (400 <= response.status && response.status < 500) {
1520

1521
			if (response.status === 400 && text.includes('off_topic')) {
1522
				return {
1523
					type: FetchResponseKind.Failed,
1524
					modelRequestId: modelRequestIdObj,
1525
					failKind: ChatFailKind.OffTopic,
1526
					reason: 'filtered as off_topic by intent classifier: message was not programming related',
1527
				};
1528
			}
1529

1530
			if (response.status === 401 && text.includes('authorize_url') && jsonData?.authorize_url) {
1531
				return {
1532
					type: FetchResponseKind.Failed,
1533
					modelRequestId: modelRequestIdObj,
1534
					failKind: ChatFailKind.AgentUnauthorized,
1535
					reason: response.statusText || response.statusText,
1536
					data: jsonData
1537
				};
1538
			}
1539

1540
			if (response.status === 400 && jsonData?.code === 'previous_response_not_found') {
1541
				return {
1542
					type: FetchResponseKind.Failed,
1543
					modelRequestId: modelRequestIdObj,
1544
					failKind: ChatFailKind.InvalidPreviousResponseId,
1545
					reason: jsonData.message || 'Invalid previous response ID',
1546
					data: jsonData,
1547
				};
1548
			}
1549

1550
			if (response.status === 401 || response.status === 403) {
1551
				// Token has expired or invalid, fetch a new one on next request
1552
				// TODO(drifkin): these actions should probably happen in vsc specific code
1553
				this._authenticationService.resetCopilotToken(response.status);
1554
				return {
1555
					type: FetchResponseKind.Failed,
1556
					modelRequestId: modelRequestIdObj,
1557
					failKind: ChatFailKind.TokenExpiredOrInvalid,
1558
					reason: jsonData?.message || `token expired or invalid: ${response.status}`,
1559
				};
1560
			}
1561

1562
			if (response.status === 402) {
1563
				// When we receive a 402, we have exceed a quota
1564
				// This is stored on the token so let's refresh it
1565
				if (!this._authenticationService.copilotToken?.isChatQuotaExceeded) {
1566
					this._authenticationService.resetCopilotToken(response.status);
1567
					await this._authenticationService.getCopilotToken();
1568
				}
1569

1570

1571
				const retryAfter = response.headers.get('retry-after');
1572

1573
				const convertToDate = (retryAfterString: string | null): Date | undefined => {
1574
					if (!retryAfterString) {
1575
						return undefined;
1576
					}
1577

1578
					// Try treating it as a date
1579
					const retryAfterDate = new Date(retryAfterString);
1580
					if (!isNaN(retryAfterDate.getDate())) {
1581
						return retryAfterDate;
1582
					}
1583

1584
					// It is not a date, try treating it as a duration from the current date
1585
					const retryAfterDuration = parseInt(retryAfterString, 10);
1586
					if (isNaN(retryAfterDuration)) {
1587
						return undefined;
1588
					}
1589

1590
					return new Date(Date.now() + retryAfterDuration * 1000);
1591
				};
1592

1593
				const retryAfterDate = convertToDate(retryAfter);
1594

1595
				return {
1596
					type: FetchResponseKind.Failed,
1597
					modelRequestId: modelRequestIdObj,
1598
					failKind: ChatFailKind.QuotaExceeded,
1599
					reason: jsonData?.message ?? 'Free tier quota exceeded',
1600
					data: {
1601
						capiError: jsonData,
1602
						retryAfter: retryAfterDate
1603
					}
1604
				};
1605
			}
1606

1607
			if (response.status === 404) {
1608
				let errorReason: string;
1609

1610
				// Check if response body is valid JSON
1611
				if (!jsonData) {
1612
					errorReason = text;
1613
				} else {
1614
					errorReason = JSON.stringify(jsonData);
1615
				}
1616

1617
				return {
1618
					type: FetchResponseKind.Failed,
1619
					modelRequestId: modelRequestIdObj,
1620
					failKind: ChatFailKind.NotFound,
1621
					reason: errorReason
1622
				};
1623
			}
1624

1625
			if (response.status === 422) {
1626
				return {
1627
					type: FetchResponseKind.Failed,
1628
					modelRequestId: modelRequestIdObj,
1629
					failKind: ChatFailKind.ContentFilter,
1630
					reason: 'Filtered by Responsible AI Service\n\n' + text,
1631
				};
1632
			}
1633

1634
			if (response.status === 424) {
1635
				return {
1636
					type: FetchResponseKind.Failed,
1637
					modelRequestId: modelRequestIdObj,
1638
					failKind: ChatFailKind.AgentFailedDependency,
1639
					reason: text
1640
				};
1641
			}
1642

1643
			if (response.status === 429) {
1644
				let rateLimitReason = text;
1645
				rateLimitReason = jsonData?.message ?? jsonData?.code;
1646

1647
				if (text.includes('extension_blocked') && jsonData?.code === 'extension_blocked' && jsonData?.type === 'rate_limit_error') {
1648
					return {
1649
						type: FetchResponseKind.Failed,
1650
						modelRequestId: modelRequestIdObj,
1651
						failKind: ChatFailKind.ExtensionBlocked,
1652
						reason: 'Extension blocked',
1653
						data: {
1654
							...jsonData?.message,
1655
							retryAfter: response.headers.get('retry-after'),
1656
						}
1657
					};
1658
				}
1659

1660
				// HTTP 429 Too Many Requests
1661
				return {
1662
					type: FetchResponseKind.Failed,
1663
					modelRequestId: modelRequestIdObj,
1664
					failKind: ChatFailKind.RateLimited,
1665
					reason: rateLimitReason,
1666
					data: {
1667
						retryAfter: response.headers.get('retry-after'),
1668
						rateLimitKey: response.headers.get('x-ratelimit-exceeded'),
1669
						capiError: jsonData
1670
					}
1671
				};
1672
			}
1673

1674
			if (response.status === 466) {
1675
				this._logService.info(text);
1676
				return {
1677
					type: FetchResponseKind.Failed,
1678
					modelRequestId: modelRequestIdObj,
1679
					failKind: ChatFailKind.ClientNotSupported,
1680
					reason: `client not supported: ${text}`
1681
				};
1682
			}
1683

1684
			if (response.status === 499) {
1685
				this._logService.info('Cancelled by server');
1686
				return {
1687
					type: FetchResponseKind.Failed,
1688
					modelRequestId: modelRequestIdObj,
1689
					failKind: ChatFailKind.ServerCanceled,
1690
					reason: 'canceled by server'
1691
				};
1692
			}
1693

1694
		} else if (500 <= response.status && response.status < 600) {
1695

1696
			if (response.status === 503) {
1697
				return {
1698
					type: FetchResponseKind.Failed,
1699
					modelRequestId: modelRequestIdObj,
1700
					failKind: ChatFailKind.RateLimited,
1701
					reason: 'Upstream provider rate limit hit',
1702
					data: {
1703
						retryAfter: null,
1704
						rateLimitKey: null,
1705
						capiError: { code: 'upstream_provider_rate_limit', message: text }
1706
					}
1707
				};
1708
			}
1709

1710
			// HTTP 5xx Server Error
1711
			return {
1712
				type: FetchResponseKind.Failed,
1713
				modelRequestId: modelRequestIdObj,
1714
				failKind: ChatFailKind.ServerError,
1715
				reason: reasonNoText,
1716
			};
1717
		}
1718

1719
		this._logService.error(`Request Failed: ${response.status} ${text}`);
1720

1721
		sendCommunicationErrorTelemetry(this._telemetryService, 'Unhandled status from server: ' + response.status, text);
1722

1723
		return {
1724
			type: FetchResponseKind.Failed,
1725
			modelRequestId: modelRequestIdObj,
1726
			failKind: ChatFailKind.Unknown,
1727
			reason: `Request Failed: ${response.status} ${text}`
1728
		};
1729
	}
1730

1731
	private async processSuccessfulResponse(
1732
		response: ChatResults,
1733
		messages: Raw.ChatMessage[],
1734
		requestBody: IEndpointBody,
1735
		requestId: string,
1736
		maxResponseTokens: number,
1737
		promptTokenCount: number,
1738
		timeToFirstToken: number,
1739
		streamRecorder: FetchStreamRecorder,
1740
		baseTelemetry: TelemetryData,
1741
		chatEndpointInfo: IChatEndpoint,
1742
		userInitiatedRequest: boolean | undefined,
1743
		transport: string,
1744
		fetcher: FetcherId | undefined,
1745
		bytesReceived: number | undefined,
1746
		suspendEventSeen: boolean | undefined,
1747
		resumeEventSeen: boolean | undefined,
1748
	): Promise<ChatResponses | ChatFetchRetriableError<string[]>> {
1749

1750
		const completions: ChatCompletion[] = [];
1751

1752
		for await (const chatCompletion of response.chatCompletions) {
1753
			Telemetry.sendSuccessTelemetry(
1754
				this._telemetryService,
1755
				{
1756
					chatCompletion,
1757
					baseTelemetry,
1758
					userInitiatedRequest,
1759
					chatEndpointInfo,
1760
					requestBody,
1761
					maxResponseTokens,
1762
					promptTokenCount,
1763
					timeToFirstToken,
1764
					timeToFirstTokenEmitted: (baseTelemetry && streamRecorder.firstTokenEmittedTime) ? streamRecorder.firstTokenEmittedTime - baseTelemetry.issuedTime : -1,
1765
					hasImageMessages: this.filterImageMessages(messages),
1766
					transport,
1767
					fetcher,
1768
					bytesReceived,
1769
					suspendEventSeen,
1770
					resumeEventSeen,
1771
				}
1772
			);
1773

1774
			if (!this.isRepetitive(chatCompletion, baseTelemetry?.properties)) {
1775
				completions.push(chatCompletion);
1776
			}
1777
		}
1778
		const successFinishReasons = new Set([FinishedCompletionReason.Stop, FinishedCompletionReason.ClientTrimmed, FinishedCompletionReason.FunctionCall, FinishedCompletionReason.ToolCalls]);
1779
		const successfulCompletions = completions.filter(c => successFinishReasons.has(c.finishReason));
1780
		if (successfulCompletions.length >= 1) {
1781
			return {
1782
				type: ChatFetchResponseType.Success,
1783
				resolvedModel: successfulCompletions[0].model,
1784
				usage: successfulCompletions.length === 1 ? successfulCompletions[0].usage : undefined,
1785
				value: successfulCompletions.map(c => getTextPart(c.message.content)),
1786
				requestId,
1787
				serverRequestId: successfulCompletions[0].requestId.headerRequestId,
1788
			};
1789
		}
1790

1791
		const result = completions.at(0);
1792

1793
		switch (result?.finishReason) {
1794
			case FinishedCompletionReason.ContentFilter:
1795
				return {
1796
					type: ChatFetchResponseType.FilteredRetry,
1797
					category: result.filterReason ?? FilterReason.Copyright,
1798
					reason: 'Response got filtered.',
1799
					value: completions.map(c => getTextPart(c.message.content)),
1800
					requestId: requestId,
1801
					serverRequestId: result.requestId.headerRequestId,
1802
				};
1803
			case FinishedCompletionReason.Length:
1804
				return {
1805
					type: ChatFetchResponseType.Length,
1806
					reason: 'Response too long.',
1807
					requestId: requestId,
1808
					serverRequestId: result.requestId.headerRequestId,
1809
					truncatedValue: getTextPart(result.message.content)
1810
				};
1811
			case FinishedCompletionReason.ServerError:
1812
				return {
1813
					type: ChatFetchResponseType.Failed,
1814
					reason: 'Server error. Stream terminated',
1815
					requestId: requestId,
1816
					serverRequestId: result.requestId.headerRequestId,
1817
					streamError: result.error
1818
				};
1819
		}
1820
		return {
1821
			type: ChatFetchResponseType.Unknown,
1822
			reason: RESPONSE_CONTAINED_NO_CHOICES,
1823
			requestId: requestId,
1824
			serverRequestId: result?.requestId.headerRequestId,
1825
		};
1826
	}
1827

1828
	private filterImageMessages(messages: Raw.ChatMessage[]): boolean {
1829
		return messages?.some(m => Array.isArray(m.content) ? m.content.some(c => 'imageUrl' in c) : false);
1830
	}
1831

1832
	private isRepetitive(chatCompletion: ChatCompletion, telemetryProperties?: TelemetryProperties) {
1833
		const lineRepetitionStats = calculateLineRepetitionStats(getTextPart(chatCompletion.message.content));
1834
		const hasRepetition = isRepetitive(chatCompletion.tokens);
1835
		if (hasRepetition) {
1836
			const telemetryData = TelemetryData.createAndMarkAsIssued();
1837
			telemetryData.extendWithRequestId(chatCompletion.requestId);
1838
			const extended = telemetryData.extendedBy(telemetryProperties);
1839
			this._telemetryService.sendEnhancedGHTelemetryEvent('conversation.repetition.detected', extended.properties, extended.measurements);
1840
		}
1841
		if (lineRepetitionStats.numberOfRepetitions >= 10) {
1842
			/* __GDPR__
1843
				"conversation.repetition.detected" : {
1844
					"owner": "lramos15",
1845
					"comment": "Calculates the number of repetitions in a response. Useful for loop detection",
1846
					"finishReason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response finished. Helps identify cancellation vs length limits" },
1847
					"requestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Id for this message request." },
1848
					"lengthOfLine": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Length of the repeating line, in characters." },
1849
					"numberOfRepetitions": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Number of times the line repeats." },
1850
					"totalLines": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Number of total lines in the response." }
1851
				}
1852
			*/
1853
			this._telemetryService.sendMSFTTelemetryEvent('conversation.repetition.detected', {
1854
				requestId: chatCompletion.requestId.headerRequestId,
1855
				finishReason: chatCompletion.finishReason,
1856
			}, {
1857
				numberOfRepetitions: lineRepetitionStats.numberOfRepetitions,
1858
				lengthOfLine: lineRepetitionStats.mostRepeatedLine.length,
1859
				totalLines: lineRepetitionStats.totalLines
1860
			});
1861
		}
1862
		return hasRepetition;
1863
	}
1864

1865
	/**
1866
	 * Check for repetition in partial response deltas from a cancelled request.
1867
	 *
1868
	 * This method performs the same repetition detection as the `isRepetitive` method,
1869
	 * but operates on partial response data collected before the request was cancelled.
1870
	 *
1871
	 * Key differences from completed requests:
1872
	 * - Text is reconstructed from delta.text values instead of message.content
1873
	 * - Tokens are approximated by splitting text on whitespace instead of using
1874
	 *   the actual token array (which is only available in completed responses)
1875
	 * - Enhanced telemetry won't include RequestId fields since we only have the
1876
	 *   headerRequestId string, not the full RequestId object
1877
	 * - The finishReason is marked as 'canceled' to distinguish from server-generated
1878
	 *   finish reasons
1879
	 */
1880
	private checkRepetitionInDeltas(
1881
		deltas: IResponseDelta[],
1882
		requestId: string,
1883
		telemetryProperties?: TelemetryProperties
1884
	): void {
1885
		// Reconstruct the text content from deltas (filter out null, undefined, and empty text values)
1886
		const textContent = deltas.filter(delta => delta.text?.length > 0).map(delta => delta.text).join('');
1887

1888
		// Early exit if no content
1889
		if (!textContent || textContent.trim().length === 0) {
1890
			return;
1891
		}
1892

1893
		// For cancelled requests, we don't have the actual token array (only available in ChatCompletion),
1894
		// so we approximate by splitting text content on whitespace. This is less precise than actual
1895
		// tokenization but sufficient for detecting obvious repetition patterns.
1896
		const tokens = textContent.split(/\s+/).filter(t => t.length > 0);
1897

1898
		// Check for line repetition
1899
		const lineRepetitionStats = calculateLineRepetitionStats(textContent);
1900

1901
		// Check for token-level repetition
1902
		const hasRepetition = isRepetitive(tokens);
1903

1904
		// Send telemetry if repetition is detected
1905
		if (hasRepetition) {
1906
			const telemetryData = TelemetryData.createAndMarkAsIssued();
1907
			const extended = telemetryData.extendedBy(telemetryProperties);
1908
			// Note: For cancelled requests, we don't have a full RequestId object,
1909
			// so we can't use extendWithRequestId like the non-cancelled path does.
1910
			// This means enhanced telemetry for cancelled requests won't include
1911
			// completionId, created, deploymentId, or serverExperiments fields.
1912
			this._telemetryService.sendEnhancedGHTelemetryEvent('conversation.repetition.detected', extended.properties, extended.measurements);
1913
		}
1914

1915
		if (lineRepetitionStats.numberOfRepetitions >= 10) {
1916
			this._telemetryService.sendMSFTTelemetryEvent('conversation.repetition.detected', {
1917
				requestId: requestId,
1918
				finishReason: 'canceled', // Client-side finish reason to distinguish from server-generated reasons
1919
			}, {
1920
				numberOfRepetitions: lineRepetitionStats.numberOfRepetitions,
1921
				lengthOfLine: lineRepetitionStats.mostRepeatedLine.length,
1922
				totalLines: lineRepetitionStats.totalLines
1923
			});
1924
		}
1925
	}
1926

1927
	private processCanceledResponse(
1928
		response: ChatRequestCanceled,
1929
		requestId: string,
1930
		streamRecorder?: FetchStreamRecorder,
1931
		telemetryProperties?: TelemetryProperties
1932
	): ChatResponses {
1933
		// Check for repetition in the partial response before cancellation
1934
		if (streamRecorder && streamRecorder.deltas.length > 0) {
1935
			this.checkRepetitionInDeltas(streamRecorder.deltas, requestId, telemetryProperties);
1936
		}
1937

1938
		return {
1939
			type: ChatFetchResponseType.Canceled,
1940
			reason: response.reason,
1941
			requestId: requestId,
1942
			serverRequestId: undefined,
1943
		};
1944
	}
1945

1946
	private processFailedResponse(response: ChatRequestFailed, requestId: string, isAuto: boolean): ChatFetchError {
1947
		const serverRequestId = response.modelRequestId?.gitHubRequestId;
1948
		const reason = response.reason;
1949
		if (response.failKind === ChatFailKind.RateLimited) {
1950
			return { type: ChatFetchResponseType.RateLimited, reason, requestId, serverRequestId, retryAfter: response.data?.retryAfter, rateLimitKey: (response.data?.rateLimitKey || ''), isAuto, capiError: response.data?.capiError };
1951
		}
1952
		if (response.failKind === ChatFailKind.QuotaExceeded) {
1953
			return { type: ChatFetchResponseType.QuotaExceeded, reason, requestId, serverRequestId, retryAfter: response.data?.retryAfter, capiError: response.data?.capiError };
1954
		}
1955
		if (response.failKind === ChatFailKind.OffTopic) {
1956
			return { type: ChatFetchResponseType.OffTopic, reason, requestId, serverRequestId };
1957
		}
1958
		if (response.failKind === ChatFailKind.TokenExpiredOrInvalid || response.failKind === ChatFailKind.ClientNotSupported || reason.includes('Bad request: ')) {
1959
			return { type: ChatFetchResponseType.BadRequest, reason, requestId, serverRequestId };
1960
		}
1961
		if (response.failKind === ChatFailKind.ServerError) {
1962
			return { type: ChatFetchResponseType.Failed, reason, requestId, serverRequestId };
1963
		}
1964
		if (response.failKind === ChatFailKind.ContentFilter) {
1965
			return { type: ChatFetchResponseType.PromptFiltered, reason, category: FilterReason.Prompt, requestId, serverRequestId };
1966
		}
1967
		if (response.failKind === ChatFailKind.AgentUnauthorized) {
1968
			return { type: ChatFetchResponseType.AgentUnauthorized, reason, authorizationUrl: response.data!.authorize_url, requestId, serverRequestId };
1969
		}
1970
		if (response.failKind === ChatFailKind.AgentFailedDependency) {
1971
			return { type: ChatFetchResponseType.AgentFailedDependency, reason, requestId, serverRequestId };
1972
		}
1973
		if (response.failKind === ChatFailKind.ExtensionBlocked) {
1974
			const retryAfter = typeof response.data?.retryAfter === 'number' ? response.data.retryAfter : 300;
1975
			return { type: ChatFetchResponseType.ExtensionBlocked, reason, requestId, retryAfter, learnMoreLink: response.data?.learnMoreLink ?? '', serverRequestId };
1976
		}
1977
		if (response.failKind === ChatFailKind.NotFound) {
1978
			return { type: ChatFetchResponseType.NotFound, reason, requestId, serverRequestId };
1979
		}
1980
		if (response.failKind === ChatFailKind.InvalidPreviousResponseId) {
1981
			return { type: ChatFetchResponseType.InvalidStatefulMarker, reason, requestId, serverRequestId };
1982
		}
1983

1984
		return { type: ChatFetchResponseType.Failed, reason, requestId, serverRequestId };
1985
	}
1986

1987
	private processError(err: unknown, requestId: string, gitHubRequestId: string | undefined, usernameToScrub: string | undefined, isAuto: boolean): ChatFetchError {
1988
		const capiWebSocketError = (err as any)?.capiWebSocketError as CAPIWebSocketErrorEvent | undefined;
1989
		if (capiWebSocketError) {
1990
			return this._handleWebSocketError(capiWebSocketError, requestId, gitHubRequestId, isAuto);
1991
		}
1992

1993
		const fetcher = this._fetcherService;
1994
		// If we cancelled a network request, we don't want to log an error
1995
		if (fetcher.isAbortError(err)) {
1996
			return {
1997
				type: ChatFetchResponseType.Canceled,
1998
				reason: 'network request aborted',
1999
				requestId: requestId,
2000
				serverRequestId: gitHubRequestId,
2001
			};
2002
		}
2003
		if (isCancellationError(err)) {
2004
			return {
2005
				type: ChatFetchResponseType.Canceled,
2006
				reason: 'Got a cancellation error',
2007
				requestId: requestId,
2008
				serverRequestId: gitHubRequestId,
2009
			};
2010
		}
2011
		if (err && (
2012
			(err instanceof Error && err.message === 'Premature close') ||
2013
			(typeof err === 'object' && (err as any).code === 'ERR_STREAM_PREMATURE_CLOSE') /* to be extra sure */)
2014
		) {
2015
			return {
2016
				type: ChatFetchResponseType.Canceled,
2017
				reason: 'Stream closed prematurely',
2018
				requestId: requestId,
2019
				serverRequestId: gitHubRequestId,
2020
			};
2021
		}
2022
		this._logService.error(ErrorUtils.fromUnknown(err), `Error on conversation request`);
2023
		this._telemetryService.sendGHTelemetryException(err, 'Error on conversation request');
2024
		const userMessage = fetcher.getUserMessageForFetcherError(err);
2025
		const errorDetail = collectSingleLineErrorMessage(err, true);
2026
		const scrubbedErrorDetail = this.scrubErrorDetail(errorDetail, usernameToScrub);
2027
		if (fetcher.isInternetDisconnectedError(err)) {
2028
			return {
2029
				type: ChatFetchResponseType.NetworkError,
2030
				reason: `It appears you're not connected to the internet, please check your network connection and try again.`,
2031
				reasonDetail: scrubbedErrorDetail,
2032
				requestId: requestId,
2033
				serverRequestId: gitHubRequestId,
2034
			};
2035
		} else if (fetcher.isFetcherError(err)) {
2036
			const isNetworkProcessCrash = fetcher.isNetworkProcessCrashedError(err);
2037
			return {
2038
				type: ChatFetchResponseType.NetworkError,
2039
				reason: userMessage,
2040
				reasonDetail: scrubbedErrorDetail,
2041
				requestId: requestId,
2042
				serverRequestId: gitHubRequestId,
2043
				...(isNetworkProcessCrash ? { isNetworkProcessCrash: true } : {}),
2044
			};
2045
		} else {
2046
			return {
2047
				type: ChatFetchResponseType.Failed,
2048
				reason: 'Error on conversation request. Check the log for more details.',
2049
				reasonDetail: scrubbedErrorDetail,
2050
				requestId: requestId,
2051
				serverRequestId: gitHubRequestId,
2052
			};
2053
		}
2054
	}
2055

2056
	private async _handleWebSocketCAPIError(event: CAPIWebSocketErrorEvent, modelRequestId: RequestId): Promise<ChatRequestFailed> {
2057
		const { code, message } = event.error;
2058
		const capiError = { code, message };
2059
		const codePrefix = code.split(':')[0];
2060

2061
		this._logService.error(`WebSocket CAPI error: ${message} (${code})`);
2062

2063
		if (codePrefix === 'rate_limited' || codePrefix === 'user_model_rate_limited' || codePrefix === 'user_global_rate_limited' || codePrefix === 'integration_rate_limited' || codePrefix === 'model_overloaded' || codePrefix === 'agent_mode_limit_exceeded') {
2064
			return {
2065
				type: FetchResponseKind.Failed,
2066
				modelRequestId,
2067
				failKind: ChatFailKind.RateLimited,
2068
				reason: message,
2069
				data: { capiError },
2070
			};
2071
		}
2072
		if (codePrefix === 'quota_exceeded' || codePrefix === 'free_quota_exceeded' || codePrefix === 'overage_limit_reached' || codePrefix === 'billing_not_configured') {
2073
			// Refresh the copilot token so isChatQuotaExceeded reflects the new state,
2074
			// matching the HTTP 402 handler behavior.
2075
			if (!this._authenticationService.copilotToken?.isChatQuotaExceeded) {
2076
				this._authenticationService.resetCopilotToken(402);
2077
				await this._authenticationService.getCopilotToken();
2078
			}
2079
			return {
2080
				type: FetchResponseKind.Failed,
2081
				modelRequestId,
2082
				failKind: ChatFailKind.QuotaExceeded,
2083
				reason: message,
2084
				data: { capiError },
2085
			};
2086
		}
2087
		if (code === 'content_filter') {
2088
			return {
2089
				type: FetchResponseKind.Failed,
2090
				modelRequestId,
2091
				failKind: ChatFailKind.ContentFilter,
2092
				reason: message,
2093
			};
2094
		}
2095
		if (code === 'not_found') {
2096
			return {
2097
				type: FetchResponseKind.Failed,
2098
				modelRequestId,
2099
				failKind: ChatFailKind.NotFound,
2100
				reason: message,
2101
			};
2102
		}
2103
		if (code === 'request_too_large') {
2104
			return {
2105
				type: FetchResponseKind.Failed,
2106
				modelRequestId,
2107
				failKind: ChatFailKind.Unknown,
2108
				reason: `Request Failed: ${code} ${message}`,
2109
			};
2110
		}
2111
		if (code === 'service_unavailable') {
2112
			return {
2113
				type: FetchResponseKind.Failed,
2114
				modelRequestId,
2115
				failKind: ChatFailKind.ServerError,
2116
				reason: `Request Failed: ${code} ${message}`,
2117
			};
2118
		}
2119
		if (code === 'bad_request') {
2120
			return {
2121
				type: FetchResponseKind.Failed,
2122
				modelRequestId,
2123
				failKind: ChatFailKind.Unknown,
2124
				reason: `Request Failed: ${code} ${message}`,
2125
			};
2126
		}
2127

2128
		// internal_error, session_expired, or any unknown code
2129
		return {
2130
			type: FetchResponseKind.Failed,
2131
			modelRequestId,
2132
			failKind: ChatFailKind.ServerError,
2133
			reason: `Request Failed: ${code} ${message || 'WebSocket server error'}`,
2134
		};
2135
	}
2136

2137
	private _handleWebSocketError(event: CAPIWebSocketErrorEvent, requestId: string, serverRequestId: string | undefined, isAuto: boolean): ChatFetchError {
2138
		const { code, message } = event.error;
2139
		const capiError = { code, message };
2140
		const codePrefix = code.split(':')[0];
2141

2142
		if (codePrefix === 'rate_limited' || codePrefix === 'user_model_rate_limited' || codePrefix === 'user_global_rate_limited' || codePrefix === 'integration_rate_limited' || codePrefix === 'model_overloaded' || codePrefix === 'agent_mode_limit_exceeded') {
2143
			return { type: ChatFetchResponseType.RateLimited, reason: message, requestId, serverRequestId, retryAfter: undefined, rateLimitKey: '', isAuto, capiError };
2144
		}
2145
		if (codePrefix === 'quota_exceeded' || codePrefix === 'free_quota_exceeded' || codePrefix === 'overage_limit_reached' || codePrefix === 'billing_not_configured') {
2146
			return { type: ChatFetchResponseType.QuotaExceeded, reason: message, requestId, serverRequestId, capiError, retryAfter: undefined };
2147
		}
2148
		if (code === 'content_filter') {
2149
			return { type: ChatFetchResponseType.PromptFiltered, reason: message, category: FilterReason.Prompt, requestId, serverRequestId };
2150
		}
2151
		if (code === 'not_found') {
2152
			return { type: ChatFetchResponseType.NotFound, reason: message, requestId, serverRequestId };
2153
		}
2154
		if (code === 'bad_request') {
2155
			return { type: ChatFetchResponseType.BadRequest, reason: message, requestId, serverRequestId };
2156
		}
2157

2158
		// internal_error, session_expired, service_unavailable, request_too_large, or any unknown code
2159
		return { type: ChatFetchResponseType.Failed, reason: `Request Failed: ${code} ${message || 'WebSocket server error'}`, requestId, serverRequestId };
2160
	}
2161

2162
	private scrubErrorDetail(errorDetail: string, usernameToScrub: string | undefined) {
2163
		if (usernameToScrub) {
2164
			const regex = new RegExp(escapeRegExpCharacters(usernameToScrub), 'ig');
2165
			errorDetail = errorDetail.replaceAll(regex, '<login>');
2166
		}
2167
		return errorDetail.replaceAll(/(?<=logged in as )(?!<login>)[^\s]+/ig, '!<login>!'); // marking fallback with !
2168
	}
2169
}
2170

2171
/**
2172
 * Validates a chat request payload to ensure it is valid
2173
 * @param params The params being sent in the chat request
2174
 * @returns Whether the chat payload is valid
2175
 */
2176
function isValidChatPayload(messages: Raw.ChatMessage[], postOptions: OptionalChatRequestParams, endpoint: IChatEndpoint, configurationService: IConfigurationService, experimentationService: IExperimentationService): { isValid: boolean; reason: string } {
2177
	if (messages.length === 0) {
2178
		return { isValid: false, reason: asUnexpected('No messages provided') };
2179
	}
2180
	if (postOptions?.max_tokens && postOptions?.max_tokens < 1) {
2181
		return { isValid: false, reason: asUnexpected('Invalid response token parameter') };
2182
	}
2183

2184
	const functionNamePattern = /^[a-zA-Z0-9_-]+$/;
2185
	if (
2186
		postOptions?.functions?.some(f => !f.name.match(functionNamePattern)) ||
2187
		postOptions?.function_call?.name && !postOptions.function_call.name.match(functionNamePattern)
2188
	) {
2189
		return { isValid: false, reason: asUnexpected('Function names must match ^[a-zA-Z0-9_-]+$') };
2190
	}
2191

2192
	if (postOptions?.tools && postOptions.tools.length > HARD_TOOL_LIMIT && !endpoint.supportsToolSearch) {
2193
		return { isValid: false, reason: `Tool limit exceeded (${postOptions.tools.length}/${HARD_TOOL_LIMIT}). Click "Configure Tools" in the chat input to disable ${postOptions.tools.length - HARD_TOOL_LIMIT} tools and retry.` };
2194
	}
2195

2196
	return { isValid: true, reason: '' };
2197
}
2198

2199
function asUnexpected(reason: string) {
2200
	return `Prompt failed validation with the reason: ${reason}. Please file an issue.`;
2201
}
2202

2203
export function createTelemetryData(chatEndpointInfo: IChatEndpoint, location: ChatLocation, headerRequestId: string) {
2204
	return TelemetryData.createAndMarkAsIssued({
2205
		endpoint: 'completions',
2206
		engineName: 'chat',
2207
		uiKind: ChatLocation.toString(location),
2208
		headerRequestId
2209
	});
2210
}
2211

2212
/**
2213
 * WARNING: The value that is returned from this function drives the disablement of RAI for full-file rewrite requests
2214
 * in Copilot Edits, Copilot Chat, Agent Mode, and Inline Chat.
2215
 * If your chat location generates full-file rewrite requests and you are unsure if changing something here will cause problems, please talk to @roblourens
2216
 */
2217

2218
export function locationToIntent(location: ChatLocation): string {
2219
	switch (location) {
2220
		case ChatLocation.Panel:
2221
			return 'conversation-panel';
2222
		case ChatLocation.Editor:
2223
			return 'conversation-inline';
2224
		case ChatLocation.EditingSession:
2225
			return 'conversation-edits';
2226
		case ChatLocation.Notebook:
2227
			return 'conversation-notebook';
2228
		case ChatLocation.Terminal:
2229
			return 'conversation-terminal';
2230
		case ChatLocation.Other:
2231
			return 'conversation-other';
2232
		case ChatLocation.Agent:
2233
			return 'conversation-agent';
2234
		case ChatLocation.ResponsesProxy:
2235
			return 'responses-proxy';
2236
		case ChatLocation.MessagesProxy:
2237
			return 'messages-proxy';
2238
	}
2239
}
2240

2241
Product

Resources

Company