Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { Raw } from '@vscode/prompt-tsx';
7
import type { OpenAI } from 'openai';
8
import type { CancellationToken } from 'vscode';
9
import { IAuthenticationService } from '../../../platform/authentication/common/authentication';
10
import { CopilotToken } from '../../../platform/authentication/common/copilotToken';
11
import { FetchStreamRecorder, IChatMLFetcher, IFetchMLOptions, Source } from '../../../platform/chat/common/chatMLFetcher';
12
import { IChatQuotaService } from '../../../platform/chat/common/chatQuotaService';
13
import { ChatFetchError, ChatFetchResponseType, ChatFetchRetriableError, ChatLocation, ChatResponse, ChatResponses, RESPONSE_CONTAINED_NO_CHOICES } from '../../../platform/chat/common/commonTypes';
14
import { IConversationOptions } from '../../../platform/chat/common/conversationOptions';
15
import { getTextPart, toTextParts } from '../../../platform/chat/common/globalStringUtils';
16
import { IInteractionService } from '../../../platform/chat/common/interactionService';
17
import { ConfigKey, HARD_TOOL_LIMIT, IConfigurationService } from '../../../platform/configuration/common/configurationService';
18
import { ICAPIClientService } from '../../../platform/endpoint/common/capiClient';
19
import { isAutoModel } from '../../../platform/endpoint/node/autoChatEndpoint';
20
import { getResponsesApiCompactionThresholdFromBody, OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging, sendCompletionOutputTelemetry } from '../../../platform/endpoint/node/responsesApi';
21
import { collectSingleLineErrorMessage, ILogService } from '../../../platform/log/common/logService';
22
import { FinishedCallback, getRequestId, IResponseDelta, OptionalChatRequestParams, RequestId } from '../../../platform/networking/common/fetch';
23
import { FetcherId, IFetcherService, Response } from '../../../platform/networking/common/fetcherService';
24
import { IBackgroundRequestOptions, IChatEndpoint, IEndpointBody, ISubagentRequestOptions, postRequest, stringifyUrlOrRequestMetadata } from '../../../platform/networking/common/networking';
25
import { CAPIChatMessage, ChatCompletion, FilterReason, FinishedCompletionReason, rawMessageToCAPI } from '../../../platform/networking/common/openai';
26
import { sendEngineMessagesTelemetry } from '../../../platform/networking/node/chatStream';
27
import { CAPIWebSocketErrorEvent, IChatWebSocketManager, isCAPIWebSocketError } from '../../../platform/networking/node/chatWebSocketManager';
28
import { sendCommunicationErrorTelemetry } from '../../../platform/networking/node/stream';
29
import { ChatFailKind, ChatRequestCanceled, ChatRequestFailed, ChatResults, FetchResponseKind } from '../../../platform/openai/node/fetch';
30
import { CopilotChatAttr, emitInferenceDetailsEvent, GenAiAttr, GenAiMetrics, GenAiOperationName, GenAiProviderName, normalizeProviderMessages, StdAttr, toSystemInstructions, toToolDefinitions, truncateForOTel } from '../../../platform/otel/common/index';
31
import { IOTelService, ISpanHandle, SpanKind, SpanStatusCode } from '../../../platform/otel/common/otelService';
32
import { IRequestLogger } from '../../../platform/requestLogger/common/requestLogger';
33
import { getCurrentCapturingToken } from '../../../platform/requestLogger/node/requestLogger';
34
import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
35
import { ITelemetryService, TelemetryProperties } from '../../../platform/telemetry/common/telemetry';
36
import { TelemetryData } from '../../../platform/telemetry/common/telemetryData';
37
import { isEncryptedThinkingDelta } from '../../../platform/thinking/common/thinking';
38
import { calculateLineRepetitionStats, isRepetitive } from '../../../util/common/anomalyDetection';
39
import { ErrorUtils } from '../../../util/common/errors';
40
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
41
import { isCancellationError } from '../../../util/vs/base/common/errors';
42
import { Emitter } from '../../../util/vs/base/common/event';
43
import { Disposable } from '../../../util/vs/base/common/lifecycle';
44
import { escapeRegExpCharacters } from '../../../util/vs/base/common/strings';
45
import { generateUuid } from '../../../util/vs/base/common/uuid';
46
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
47
import { isBYOKModel } from '../../byok/node/openAIEndpoint';
48
import { EXTENSION_ID } from '../../common/constants';
49
import { IPowerService } from '../../power/common/powerService';
50
import { ChatMLFetcherTelemetrySender as Telemetry } from './chatMLFetcherTelemetry';
51
52
export interface IMadeChatRequestEvent {
53
readonly messages: Raw.ChatMessage[];
54
readonly model: string;
55
readonly source?: Source;
56
readonly tokenCount?: number;
57
}
58
59
export abstract class AbstractChatMLFetcher extends Disposable implements IChatMLFetcher {
60
61
declare _serviceBrand: undefined;
62
63
constructor(
64
protected readonly options: IConversationOptions,
65
) {
66
super();
67
}
68
69
protected preparePostOptions(requestOptions: OptionalChatRequestParams): OptionalChatRequestParams {
70
return {
71
temperature: this.options.temperature,
72
top_p: this.options.topP,
73
// we disallow `stream=false` because we don't support non-streamed response
74
...requestOptions,
75
stream: true
76
};
77
}
78
79
protected readonly _onDidMakeChatMLRequest = this._register(new Emitter<IMadeChatRequestEvent>());
80
readonly onDidMakeChatMLRequest = this._onDidMakeChatMLRequest.event;
81
82
public async fetchOne(opts: IFetchMLOptions, token: CancellationToken): Promise<ChatResponse> {
83
const resp = await this.fetchMany({
84
...opts,
85
requestOptions: { ...opts.requestOptions, n: 1 }
86
}, token);
87
if (resp.type === ChatFetchResponseType.Success) {
88
return { ...resp, value: resp.value[0] };
89
}
90
return resp;
91
}
92
93
/**
94
* Note: the returned array of strings may be less than `n` (e.g., in case there were errors during streaming)
95
*/
96
public abstract fetchMany(opts: IFetchMLOptions, token: CancellationToken): Promise<ChatResponses>;
97
}
98
99
export class ChatMLFetcherImpl extends AbstractChatMLFetcher {
100
101
private static readonly _maxConsecutiveWebSocketFallbacks = 3;
102
103
/**
104
* Delays (in ms) between connectivity check attempts before retrying a failed request.
105
* Configurable for testing purposes.
106
*/
107
public connectivityCheckDelays = [1000, 10000, 10000];
108
109
/**
110
* Tracks consecutive WebSocket request failures where the HTTP retry succeeded.
111
* After {@link _maxConsecutiveWebSocketFallbacks} such failures, WebSocket requests are disabled entirely.
112
*/
113
private _consecutiveWebSocketRetryFallbacks = 0;
114
115
constructor(
116
@IFetcherService private readonly _fetcherService: IFetcherService,
117
@ITelemetryService private readonly _telemetryService: ITelemetryService,
118
@IRequestLogger private readonly _requestLogger: IRequestLogger,
119
@ILogService private readonly _logService: ILogService,
120
@IAuthenticationService private readonly _authenticationService: IAuthenticationService,
121
@IInteractionService private readonly _interactionService: IInteractionService,
122
@IChatQuotaService private readonly _chatQuotaService: IChatQuotaService,
123
@ICAPIClientService private readonly _capiClientService: ICAPIClientService,
124
@IConversationOptions options: IConversationOptions,
125
@IConfigurationService private readonly _configurationService: IConfigurationService,
126
@IExperimentationService private readonly _experimentationService: IExperimentationService,
127
@IPowerService private readonly _powerService: IPowerService,
128
@IInstantiationService private readonly _instantiationService: IInstantiationService,
129
@IChatWebSocketManager private readonly _webSocketManager: IChatWebSocketManager,
130
@IOTelService private readonly _otelService: IOTelService,
131
) {
132
super(options);
133
}
134
135
/**
136
* Note: the returned array of strings may be less than `n` (e.g., in case there were errors during streaming)
137
*/
138
public async fetchMany(opts: IFetchMLOptions, token: CancellationToken): Promise<ChatResponses> {
139
let { debugName, endpoint: chatEndpoint, finishedCb, location, messages, requestOptions, source, telemetryProperties, userInitiatedRequest, requestKindOptions, conversationId, turnId, useWebSocket, ignoreStatefulMarker } = opts;
140
if (useWebSocket && this._consecutiveWebSocketRetryFallbacks >= ChatMLFetcherImpl._maxConsecutiveWebSocketFallbacks) {
141
this._logService.debug(`[ChatWebSocketManager] Disabling WebSocket for request due to ${this._consecutiveWebSocketRetryFallbacks} consecutive WebSocket failures with successful HTTP fallback.`);
142
useWebSocket = false;
143
ignoreStatefulMarker = true;
144
}
145
if (!telemetryProperties) {
146
telemetryProperties = {};
147
}
148
149
if (!telemetryProperties.messageSource) {
150
telemetryProperties.messageSource = debugName;
151
}
152
153
const transport = useWebSocket ? 'websocket' : 'http';
154
155
// TODO @lramos15 telemetry should not drive request ids
156
const ourRequestId = telemetryProperties.requestId ?? telemetryProperties.messageId ?? generateUuid();
157
158
const maxResponseTokens = chatEndpoint.maxOutputTokens;
159
if (!requestOptions?.prediction) {
160
requestOptions = { max_tokens: maxResponseTokens, ...requestOptions };
161
}
162
// Avoid sending a prediction with no content as this will yield a 400 Bad Request
163
if (!requestOptions.prediction?.content) {
164
delete requestOptions['prediction'];
165
}
166
167
const postOptions = this.preparePostOptions(requestOptions);
168
const requestBody = chatEndpoint.createRequestBody({
169
...opts,
170
ignoreStatefulMarker,
171
requestId: ourRequestId,
172
postOptions
173
});
174
175
176
const baseTelemetry = TelemetryData.createAndMarkAsIssued({
177
...telemetryProperties,
178
...(conversationId ? { conversationId } : {}),
179
headerRequestId: ourRequestId,
180
baseModel: chatEndpoint.model,
181
uiKind: ChatLocation.toString(location)
182
});
183
184
const pendingLoggedChatRequest = this._requestLogger.logChatRequest(debugName, chatEndpoint, {
185
messages: opts.messages,
186
model: chatEndpoint.model,
187
ourRequestId,
188
location: opts.location,
189
body: requestBody,
190
ignoreStatefulMarker,
191
isConversationRequest: opts.isConversationRequest,
192
customMetadata: opts.customMetadata
193
});
194
let tokenCount = -1;
195
const streamRecorder = new FetchStreamRecorder(finishedCb);
196
const enableRetryOnError = opts.enableRetryOnError ?? opts.enableRetryOnFilter;
197
const canRetryOnce = opts.canRetryOnceWithoutRollback ?? !(opts.enableRetryOnFilter || opts.enableRetryOnError);
198
let usernameToScrub: string | undefined;
199
let actualFetcher: FetcherId | undefined;
200
let actualBytesReceived: number | undefined;
201
let actualStatusCode: number | undefined;
202
let suspendEventSeen: boolean | undefined;
203
let resumeEventSeen: boolean | undefined;
204
let otelInferenceSpan: ISpanHandle | undefined;
205
try {
206
let response: ChatResults | ChatRequestFailed | ChatRequestCanceled;
207
const payloadValidationResult = isValidChatPayload(opts.messages, postOptions, chatEndpoint, this._configurationService, this._experimentationService);
208
if (!payloadValidationResult.isValid) {
209
response = {
210
type: FetchResponseKind.Failed,
211
modelRequestId: undefined,
212
failKind: ChatFailKind.ValidationFailed,
213
reason: payloadValidationResult.reason,
214
};
215
} else {
216
let tokenCountPromise: Promise<number> | undefined;
217
const countTokens = () => tokenCountPromise ??= chatEndpoint.acquireTokenizer().countMessagesTokens(messages);
218
const copilotToken = await this._authenticationService.getCopilotToken();
219
usernameToScrub = copilotToken.username;
220
221
const fetchResult = await this._fetchAndStreamChat(
222
chatEndpoint,
223
requestBody,
224
baseTelemetry,
225
streamRecorder.callback,
226
requestOptions.secretKey,
227
copilotToken,
228
opts.location,
229
ourRequestId,
230
postOptions.n,
231
token,
232
countTokens,
233
userInitiatedRequest,
234
useWebSocket,
235
turnId,
236
conversationId,
237
telemetryProperties,
238
opts.useFetcher,
239
canRetryOnce,
240
requestKindOptions,
241
opts.summarizedAtRoundId,
242
opts.modeChanged,
243
);
244
response = fetchResult.result;
245
actualFetcher = fetchResult.fetcher;
246
actualBytesReceived = fetchResult.bytesReceived;
247
actualStatusCode = fetchResult.statusCode;
248
suspendEventSeen = fetchResult.suspendEventSeen;
249
resumeEventSeen = fetchResult.resumeEventSeen;
250
otelInferenceSpan = fetchResult.otelSpan;
251
// Tag span with debug name so orphaned spans (title, progressMessages, etc.) are identifiable
252
otelInferenceSpan?.setAttribute(GenAiAttr.AGENT_NAME, debugName);
253
254
// Extract and set structured prompt sections for the debug panel
255
if (otelInferenceSpan) {
256
// Support both Chat Completions API (messages) and Responses API (input) formats
257
const capiMessages = (requestBody.messages ?? requestBody.input) as ReadonlyArray<{ role?: string; content?: string | unknown[] }> | undefined;
258
// User request: last user-role message
259
const userMessages = capiMessages?.filter(m => m.role === 'user');
260
const lastUserMsg = userMessages?.[userMessages.length - 1];
261
if (lastUserMsg?.content) {
262
const userContent = typeof lastUserMsg.content === 'string'
263
? lastUserMsg.content
264
: JSON.stringify(lastUserMsg.content);
265
otelInferenceSpan.setAttribute(CopilotChatAttr.USER_REQUEST, truncateForOTel(userContent));
266
}
267
// System instructions — check messages array, top-level system (Anthropic), or instructions (Responses API)
268
const systemMsg = capiMessages?.find(m => m.role === 'system');
269
const systemContent = systemMsg?.content
270
?? (requestBody as Record<string, unknown>).system
271
?? (requestBody as Record<string, unknown>).instructions;
272
if (systemContent) {
273
let systemText: string;
274
if (typeof systemContent === 'string') {
275
systemText = systemContent;
276
} else if (Array.isArray(systemContent)) {
277
// Anthropic format: array of content blocks — extract text only,
278
// dropping metadata like cache_control so the value is stable across turns.
279
systemText = (systemContent as Array<{ text?: string }>)
280
.map(b => b.text ?? '')
281
.join('\n');
282
} else {
283
systemText = JSON.stringify(systemContent);
284
}
285
// Format as OTel GenAI system instruction JSON schema
286
const systemInstructions = toSystemInstructions(systemText);
287
if (systemInstructions) {
288
otelInferenceSpan.setAttribute(GenAiAttr.SYSTEM_INSTRUCTIONS, JSON.stringify(systemInstructions));
289
}
290
}
291
}
292
293
// Always capture full request content for the debug panel
294
if (otelInferenceSpan) {
295
const capiMessages = (requestBody.messages ?? requestBody.input) as ReadonlyArray<Record<string, unknown>> | undefined;
296
if (capiMessages) {
297
// Normalize provider-specific content (Anthropic tool_use/tool_result, OpenAI tool messages) to OTel schema
298
otelInferenceSpan.setAttribute(GenAiAttr.INPUT_MESSAGES, truncateForOTel(JSON.stringify(normalizeProviderMessages(capiMessages))));
299
}
300
// Tool definitions: emit on every chat span so trace viewers can render the
301
// tool catalog per LLM call (issue #299934). Includes `parameters` per
302
// OTel GenAI semantic conventions (issue #300318).
303
const toolDefs = toToolDefinitions(requestBody.tools);
304
if (toolDefs) {
305
otelInferenceSpan.setAttribute(GenAiAttr.TOOL_DEFINITIONS, truncateForOTel(JSON.stringify(toolDefs)));
306
}
307
}
308
tokenCount = await countTokens();
309
const extensionId = source?.extensionId ?? EXTENSION_ID;
310
this._onDidMakeChatMLRequest.fire({
311
messages,
312
model: chatEndpoint.model,
313
source: { extensionId },
314
tokenCount
315
});
316
}
317
const timeToFirstToken = Date.now() - baseTelemetry.issuedTime;
318
pendingLoggedChatRequest?.markTimeToFirstToken(timeToFirstToken);
319
switch (response.type) {
320
case FetchResponseKind.Success: {
321
const result = await this.processSuccessfulResponse(response, messages, requestBody, ourRequestId, maxResponseTokens, tokenCount, timeToFirstToken, streamRecorder, baseTelemetry, chatEndpoint, userInitiatedRequest, transport, actualFetcher, actualBytesReceived, suspendEventSeen, resumeEventSeen);
322
323
// Handle FilteredRetry case with augmented messages
324
if (result.type === ChatFetchResponseType.FilteredRetry) {
325
326
if (opts.enableRetryOnFilter) {
327
streamRecorder.callback('', 0, { text: '', retryReason: result.category });
328
329
const filteredContent = result.value[0];
330
if (filteredContent) {
331
const retryMessage = (result.category === FilterReason.Copyright) ?
332
`The previous response (copied below) was filtered due to being too similar to existing public code. Please suggest something similar in function that does not match public code. Here's the previous response: ${filteredContent}\n\n` :
333
`The previous response (copied below) was filtered due to triggering our content safety filters, which looks for hateful, self-harm, sexual, or violent content. Please suggest something similar in content that does not trigger these filters. Here's the previous response: ${filteredContent}\n\n`;
334
const augmentedMessages: Raw.ChatMessage[] = [
335
...messages,
336
{
337
role: Raw.ChatRole.User,
338
content: toTextParts(retryMessage)
339
}
340
];
341
342
// Retry with augmented messages
343
const retryResult = await this.fetchMany({
344
...opts,
345
debugName: 'retry-' + debugName,
346
messages: augmentedMessages,
347
finishedCb,
348
location,
349
endpoint: chatEndpoint,
350
source,
351
requestOptions,
352
userInitiatedRequest: false, // do not mark the retry as user initiated
353
telemetryProperties: { ...telemetryProperties, retryAfterFilterCategory: result.category ?? 'uncategorized' },
354
enableRetryOnFilter: false,
355
canRetryOnceWithoutRollback: false,
356
enableRetryOnError,
357
}, token);
358
359
pendingLoggedChatRequest?.resolve(retryResult, streamRecorder.deltas);
360
if (retryResult.type === ChatFetchResponseType.Success) {
361
return retryResult;
362
}
363
}
364
}
365
366
return {
367
type: ChatFetchResponseType.Filtered,
368
category: result.category,
369
reason: 'Response got filtered.',
370
requestId: result.requestId,
371
serverRequestId: result.serverRequestId
372
};
373
}
374
375
pendingLoggedChatRequest?.resolve(result, streamRecorder.deltas);
376
377
// Record OTel token usage metrics if available
378
if (result.type === ChatFetchResponseType.Success && result.usage) {
379
const metricAttrs = {
380
operationName: GenAiOperationName.CHAT,
381
providerName: GenAiProviderName.GITHUB,
382
requestModel: chatEndpoint.model,
383
responseModel: result.resolvedModel,
384
};
385
if (result.usage.prompt_tokens) {
386
GenAiMetrics.recordTokenUsage(this._otelService, result.usage.prompt_tokens, 'input', metricAttrs);
387
}
388
if (result.usage.completion_tokens) {
389
GenAiMetrics.recordTokenUsage(this._otelService, result.usage.completion_tokens, 'output', metricAttrs);
390
}
391
392
// Set token usage and response details on the chat span before ending it
393
otelInferenceSpan?.setAttributes({
394
[GenAiAttr.USAGE_INPUT_TOKENS]: result.usage.prompt_tokens ?? 0,
395
[GenAiAttr.USAGE_OUTPUT_TOKENS]: result.usage.completion_tokens ?? 0,
396
[GenAiAttr.RESPONSE_MODEL]: result.resolvedModel ?? chatEndpoint.model,
397
[GenAiAttr.RESPONSE_ID]: result.requestId,
398
[GenAiAttr.RESPONSE_FINISH_REASONS]: ['stop'],
399
...(result.usage.prompt_tokens_details?.cached_tokens
400
? { [GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS]: result.usage.prompt_tokens_details.cached_tokens }
401
: {}),
402
...(result.usage.prompt_tokens_details?.cache_creation_input_tokens
403
? { [GenAiAttr.USAGE_CACHE_CREATION_INPUT_TOKENS]: result.usage.prompt_tokens_details.cache_creation_input_tokens }
404
: {}),
405
[CopilotChatAttr.TIME_TO_FIRST_TOKEN]: timeToFirstToken,
406
...(result.serverRequestId ? { [CopilotChatAttr.SERVER_REQUEST_ID]: result.serverRequestId } : {}),
407
...(result.usage.completion_tokens_details?.reasoning_tokens
408
? { [GenAiAttr.USAGE_REASONING_TOKENS]: result.usage.completion_tokens_details.reasoning_tokens }
409
: {}),
410
});
411
}
412
// Always capture response content for the debug panel
413
if (otelInferenceSpan && result.type === ChatFetchResponseType.Success) {
414
const responseText = streamRecorder.deltas.map(d => d.text).join('');
415
const toolCalls = streamRecorder.deltas
416
.filter(d => d.copilotToolCalls?.length)
417
.flatMap(d => d.copilotToolCalls!.map(tc => ({
418
type: 'tool_call' as const, id: tc.id, name: tc.name, arguments: tc.arguments
419
})));
420
const parts: Array<{ type: string; content?: string; id?: string; name?: string; arguments?: unknown }> = [];
421
if (responseText) {
422
parts.push({ type: 'text', content: responseText });
423
}
424
parts.push(...toolCalls);
425
if (parts.length > 0) {
426
otelInferenceSpan.setAttribute(GenAiAttr.OUTPUT_MESSAGES, truncateForOTel(JSON.stringify([{ role: 'assistant', parts }])));
427
}
428
// Capture reasoning/thinking text if present
429
const hasThinking = streamRecorder.deltas.some(d => d.thinking);
430
if (hasThinking) {
431
const thinkingTexts = streamRecorder.deltas
432
.filter(d => d.thinking && !isEncryptedThinkingDelta(d.thinking) && d.thinking.text)
433
.map(d => {
434
const t = d.thinking!;
435
if ('encrypted' in t) { return ''; }
436
return Array.isArray(t.text) ? t.text.join('') : (t.text ?? '');
437
});
438
const reasoningText = thinkingTexts.join('');
439
otelInferenceSpan.setAttribute(CopilotChatAttr.REASONING_CONTENT, truncateForOTel(reasoningText || '[encrypted]'));
440
}
441
}
442
443
// Emit OTel inference details event BEFORE ending the span
444
// so the log record inherits the active trace context
445
emitInferenceDetailsEvent(
446
this._otelService,
447
{
448
model: chatEndpoint.model,
449
temperature: requestOptions?.temperature,
450
maxTokens: requestOptions?.max_tokens,
451
},
452
result.type === ChatFetchResponseType.Success ? {
453
id: result.requestId,
454
model: result.resolvedModel,
455
finishReasons: ['stop'],
456
inputTokens: result.usage?.prompt_tokens,
457
outputTokens: result.usage?.completion_tokens,
458
} : undefined,
459
);
460
461
otelInferenceSpan?.end();
462
otelInferenceSpan = undefined;
463
464
// Record OTel time-to-first-token metric
465
if (timeToFirstToken > 0) {
466
GenAiMetrics.recordTimeToFirstToken(this._otelService, chatEndpoint.model, timeToFirstToken / 1000);
467
}
468
469
if (useWebSocket && result.type === ChatFetchResponseType.Success) {
470
this._consecutiveWebSocketRetryFallbacks = 0;
471
}
472
473
return result;
474
}
475
case FetchResponseKind.Canceled:
476
Telemetry.sendCancellationTelemetry(
477
this._telemetryService,
478
{
479
source: telemetryProperties.messageSource ?? 'unknown',
480
requestId: ourRequestId,
481
model: chatEndpoint.model,
482
apiType: chatEndpoint.apiType,
483
transport,
484
associatedRequestId: telemetryProperties.associatedRequestId,
485
retryAfterError: telemetryProperties.retryAfterError,
486
retryAfterErrorGitHubRequestId: telemetryProperties.retryAfterErrorGitHubRequestId,
487
connectivityTestError: telemetryProperties.connectivityTestError,
488
connectivityTestErrorGitHubRequestId: telemetryProperties.connectivityTestErrorGitHubRequestId,
489
retryAfterFilterCategory: telemetryProperties.retryAfterFilterCategory,
490
fetcher: actualFetcher,
491
suspendEventSeen,
492
resumeEventSeen,
493
},
494
{
495
totalTokenMax: chatEndpoint.modelMaxPromptTokens ?? -1,
496
promptTokenCount: tokenCount,
497
tokenCountMax: maxResponseTokens,
498
timeToFirstToken,
499
timeToFirstTokenEmitted: (baseTelemetry && streamRecorder.firstTokenEmittedTime) ? streamRecorder.firstTokenEmittedTime - baseTelemetry.issuedTime : -1,
500
timeToCancelled: Date.now() - baseTelemetry.issuedTime,
501
isVisionRequest: this.filterImageMessages(messages) ? 1 : -1,
502
isBYOK: isBYOKModel(chatEndpoint),
503
isAuto: isAutoModel(chatEndpoint),
504
bytesReceived: actualBytesReceived,
505
issuedTime: baseTelemetry.issuedTime,
506
});
507
pendingLoggedChatRequest?.resolveWithCancelation();
508
// Set canceled status on OTel span
509
otelInferenceSpan?.setAttributes({
510
[GenAiAttr.RESPONSE_FINISH_REASONS]: ['cancelled'],
511
[CopilotChatAttr.CANCELED]: true,
512
});
513
otelInferenceSpan?.end();
514
otelInferenceSpan = undefined;
515
return this.processCanceledResponse(response, ourRequestId, streamRecorder, telemetryProperties);
516
case FetchResponseKind.Failed: {
517
const processed = this.processFailedResponse(response, ourRequestId, isAutoModel(chatEndpoint) === 1);
518
// Retry on server errors based on configured status codes
519
const retryServerErrorStatusCodes = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.RetryServerErrorStatusCodes, this._experimentationService);
520
const statusCodesToRetry = retryServerErrorStatusCodes
521
.split(',')
522
.map(s => parseInt(s.trim(), 10));
523
const retryAfterServerError = enableRetryOnError && actualStatusCode !== undefined && statusCodesToRetry.includes(actualStatusCode);
524
const retryWithoutWebSocket = enableRetryOnError && useWebSocket && (response.failKind === ChatFailKind.ServerError || response.failKind === ChatFailKind.Unknown);
525
if (retryAfterServerError || retryWithoutWebSocket) {
526
const { retryResult } = await this._retryAfterError({
527
opts,
528
processed,
529
telemetryProperties,
530
requestBody,
531
tokenCount,
532
maxResponseTokens,
533
timeToError: timeToFirstToken,
534
transport,
535
actualFetcher,
536
bytesReceived: actualBytesReceived,
537
baseTelemetry,
538
streamRecorder,
539
retryReason: 'server_error',
540
debugNamePrefix: 'retry-server-error-',
541
pendingLoggedChatRequest,
542
token,
543
usernameToScrub,
544
suspendEventSeen,
545
resumeEventSeen,
546
});
547
if (retryResult) {
548
return retryResult;
549
}
550
}
551
Telemetry.sendResponseErrorTelemetry(this._telemetryService, {
552
processed,
553
telemetryProperties,
554
chatEndpointInfo: chatEndpoint,
555
requestBody,
556
tokenCount,
557
maxResponseTokens,
558
timeToFirstToken,
559
isVisionRequest: this.filterImageMessages(messages),
560
transport,
561
fetcher: actualFetcher,
562
bytesReceived: actualBytesReceived,
563
issuedTime: baseTelemetry.issuedTime,
564
wasRetried: false,
565
suspendEventSeen,
566
resumeEventSeen,
567
});
568
pendingLoggedChatRequest?.resolve(processed);
569
return processed;
570
}
571
}
572
} catch (err) {
573
// End OTel inference span on error if not already ended
574
if (otelInferenceSpan) {
575
otelInferenceSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));
576
otelInferenceSpan.setAttribute(StdAttr.ERROR_TYPE, err instanceof Error ? err.constructor.name : 'Error');
577
otelInferenceSpan.setAttribute(GenAiAttr.RESPONSE_FINISH_REASONS, ['error']);
578
otelInferenceSpan.recordException(err);
579
otelInferenceSpan.end();
580
}
581
const timeToError = Date.now() - baseTelemetry.issuedTime;
582
if (err.fetcherId) {
583
actualFetcher = err.fetcherId;
584
}
585
if (err.suspendEventSeen) {
586
suspendEventSeen = true;
587
}
588
if (err.resumeEventSeen) {
589
resumeEventSeen = true;
590
}
591
const processed = this.processError(err, ourRequestId, err.gitHubRequestId, usernameToScrub, isAutoModel(chatEndpoint) === 1);
592
const retryNetworkError = enableRetryOnError && processed.type === ChatFetchResponseType.NetworkError && this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.RetryNetworkErrors, this._experimentationService);
593
const retryWithoutWebSocket = enableRetryOnError && useWebSocket && (processed.type === ChatFetchResponseType.NetworkError || processed.type === ChatFetchResponseType.Failed);
594
if (retryNetworkError || retryWithoutWebSocket) {
595
const { retryResult, connectivityTestError, connectivityTestErrorGitHubRequestId } = await this._retryAfterError({
596
opts,
597
processed,
598
telemetryProperties,
599
requestBody,
600
tokenCount,
601
maxResponseTokens,
602
timeToError,
603
transport,
604
actualFetcher,
605
bytesReceived: err.bytesReceived,
606
baseTelemetry,
607
streamRecorder,
608
retryReason: 'network_error',
609
debugNamePrefix: 'retry-error-',
610
pendingLoggedChatRequest,
611
token,
612
usernameToScrub,
613
suspendEventSeen,
614
resumeEventSeen,
615
});
616
if (retryResult) {
617
return retryResult;
618
}
619
telemetryProperties = { ...telemetryProperties, connectivityTestError, connectivityTestErrorGitHubRequestId };
620
}
621
if (processed.type === ChatFetchResponseType.Canceled) {
622
Telemetry.sendCancellationTelemetry(
623
this._telemetryService,
624
{
625
source: telemetryProperties.messageSource ?? 'unknown',
626
requestId: ourRequestId,
627
model: chatEndpoint.model,
628
apiType: chatEndpoint.apiType,
629
transport,
630
associatedRequestId: telemetryProperties.associatedRequestId,
631
retryAfterError: telemetryProperties.retryAfterError,
632
retryAfterErrorGitHubRequestId: telemetryProperties.retryAfterErrorGitHubRequestId,
633
connectivityTestError: telemetryProperties.connectivityTestError,
634
connectivityTestErrorGitHubRequestId: telemetryProperties.connectivityTestErrorGitHubRequestId,
635
retryAfterFilterCategory: telemetryProperties.retryAfterFilterCategory,
636
fetcher: actualFetcher,
637
suspendEventSeen,
638
resumeEventSeen,
639
},
640
{
641
totalTokenMax: chatEndpoint.modelMaxPromptTokens ?? -1,
642
promptTokenCount: tokenCount,
643
tokenCountMax: maxResponseTokens,
644
timeToFirstToken: undefined,
645
timeToCancelled: timeToError,
646
isVisionRequest: this.filterImageMessages(messages) ? 1 : -1,
647
isBYOK: isBYOKModel(chatEndpoint),
648
isAuto: isAutoModel(chatEndpoint),
649
bytesReceived: err.bytesReceived,
650
issuedTime: baseTelemetry.issuedTime,
651
}
652
);
653
} else {
654
Telemetry.sendResponseErrorTelemetry(this._telemetryService, {
655
processed,
656
telemetryProperties,
657
chatEndpointInfo: chatEndpoint,
658
requestBody,
659
tokenCount,
660
maxResponseTokens,
661
timeToFirstToken: timeToError,
662
isVisionRequest: this.filterImageMessages(messages),
663
transport,
664
fetcher: actualFetcher,
665
bytesReceived: err.bytesReceived,
666
issuedTime: baseTelemetry.issuedTime,
667
wasRetried: false,
668
suspendEventSeen,
669
resumeEventSeen,
670
});
671
}
672
pendingLoggedChatRequest?.resolve(processed);
673
return processed;
674
}
675
}
676
677
private async _checkNetworkConnectivity(useFetcher?: FetcherId): Promise<{ retryRequest: boolean; connectivityTestError?: string; connectivityTestErrorGitHubRequestId?: string }> {
678
// Ping CAPI to check network connectivity before retrying
679
const delays = this.connectivityCheckDelays;
680
let connectivityTestError: string | undefined = undefined;
681
let connectivityTestErrorGitHubRequestId: string | undefined = undefined;
682
for (const delay of delays) {
683
this._logService.info(`Waiting ${delay}ms before pinging CAPI to check network connectivity...`);
684
await new Promise(resolve => setTimeout(resolve, delay));
685
try {
686
const isGHEnterprise = this._capiClientService.dotcomAPIURL !== 'https://api.github.com';
687
const url = this._capiClientService.capiPingURL;
688
const headers = await this._getAuthHeaders(isGHEnterprise, url);
689
const res = await this._fetcherService.fetch(url, {
690
headers,
691
useFetcher,
692
callSite: 'capi-ping',
693
});
694
if (res.status >= 200 && res.status < 300) {
695
this._logService.info(`CAPI ping successful, proceeding with chat request retry...`);
696
return { retryRequest: true, connectivityTestError, connectivityTestErrorGitHubRequestId };
697
} else {
698
connectivityTestError = `Status ${res.status}: ${res.statusText}`;
699
connectivityTestErrorGitHubRequestId = res.headers.get('x-github-request-id') ?? '';
700
this._logService.info(`CAPI ping returned status ${res.status}, retrying ping...`);
701
}
702
} catch (err) {
703
connectivityTestError = collectSingleLineErrorMessage(err, true);
704
connectivityTestErrorGitHubRequestId = undefined; // no response headers yet
705
this._logService.info(`CAPI ping failed with error, retrying ping: ${connectivityTestError}`);
706
}
707
}
708
return { retryRequest: false, connectivityTestError, connectivityTestErrorGitHubRequestId };
709
}
710
711
private async _getAuthHeaders(isGHEnterprise: boolean, url: string) {
712
const authHeaders: Record<string, string> = {};
713
if (isGHEnterprise) {
714
let token = '';
715
if (url === this._capiClientService.dotcomAPIURL) {
716
token = this._authenticationService.anyGitHubSession?.accessToken || '';
717
} else {
718
try {
719
token = (await this._authenticationService.getCopilotToken()).token;
720
} catch (_err) {
721
// Ignore error
722
token = '';
723
}
724
}
725
authHeaders['Authorization'] = `Bearer ${token}`;
726
}
727
return authHeaders;
728
}
729
730
private async _retryAfterError(params: {
731
opts: IFetchMLOptions;
732
processed: ChatFetchError;
733
telemetryProperties: TelemetryProperties;
734
requestBody: IEndpointBody;
735
tokenCount: number;
736
maxResponseTokens: number;
737
timeToError: number;
738
transport: string;
739
actualFetcher: FetcherId | undefined;
740
bytesReceived: number | undefined;
741
baseTelemetry: TelemetryData;
742
streamRecorder: FetchStreamRecorder;
743
retryReason: 'network_error' | 'server_error';
744
debugNamePrefix: string;
745
pendingLoggedChatRequest: ReturnType<IRequestLogger['logChatRequest']>;
746
token: CancellationToken;
747
usernameToScrub: string | undefined;
748
suspendEventSeen: boolean | undefined;
749
resumeEventSeen: boolean | undefined;
750
}): Promise<{ retryResult?: ChatResponses; connectivityTestError?: string; connectivityTestErrorGitHubRequestId?: string }> {
751
const {
752
opts,
753
processed,
754
telemetryProperties,
755
requestBody,
756
tokenCount,
757
maxResponseTokens,
758
timeToError,
759
transport,
760
actualFetcher,
761
bytesReceived,
762
baseTelemetry,
763
streamRecorder,
764
retryReason,
765
debugNamePrefix,
766
pendingLoggedChatRequest,
767
token,
768
usernameToScrub,
769
suspendEventSeen,
770
resumeEventSeen,
771
} = params;
772
773
// net::ERR_NETWORK_CHANGED: https://github.com/microsoft/vscode/issues/260297
774
const isNetworkChangedError = ['darwin', 'linux'].includes(process.platform) && processed.reason.indexOf('net::ERR_NETWORK_CHANGED') !== -1;
775
// When Electron's network process crashes, all requests through it fail permanently.
776
// Fall back to node-fetch which bypasses Electron's network stack entirely.
777
const fallbackEnabled = this._configurationService.getExperimentBasedConfig(
778
ConfigKey.TeamInternal.FallbackNodeFetchOnNetworkProcessCrash, this._experimentationService);
779
const isNetworkProcessCrash = processed.type === ChatFetchResponseType.NetworkError
780
&& processed.isNetworkProcessCrash === true
781
&& fallbackEnabled;
782
const useFetcher = (isNetworkChangedError || isNetworkProcessCrash) ? 'node-fetch' : opts.useFetcher;
783
this._logService.info(`Retrying chat request with ${useFetcher || 'default'} fetcher after: ${processed.reasonDetail || processed.reason}`);
784
const connectivity = await this._checkNetworkConnectivity(useFetcher);
785
const connectivityTestError = connectivity.connectivityTestError ? this.scrubErrorDetail(connectivity.connectivityTestError, usernameToScrub) : undefined;
786
const connectivityTestErrorGitHubRequestId = connectivity.connectivityTestErrorGitHubRequestId;
787
if (!connectivity.retryRequest) {
788
this._logService.info(`Not retrying chat request as network connectivity could not be re-established.`);
789
return { connectivityTestError, connectivityTestErrorGitHubRequestId };
790
}
791
792
Telemetry.sendResponseErrorTelemetry(
793
this._telemetryService,
794
{
795
processed,
796
telemetryProperties,
797
chatEndpointInfo: opts.endpoint,
798
requestBody,
799
tokenCount,
800
maxResponseTokens,
801
timeToFirstToken: timeToError,
802
isVisionRequest: this.filterImageMessages(opts.messages),
803
transport,
804
fetcher: actualFetcher,
805
bytesReceived,
806
issuedTime: baseTelemetry.issuedTime,
807
wasRetried: true,
808
suspendEventSeen,
809
resumeEventSeen,
810
},
811
);
812
813
streamRecorder.callback('', 0, { text: '', retryReason });
814
815
const retryResult = await this.fetchMany({
816
...opts,
817
useWebSocket: false,
818
ignoreStatefulMarker: opts.useWebSocket || opts.ignoreStatefulMarker,
819
debugName: debugNamePrefix + opts.debugName,
820
userInitiatedRequest: false, // do not mark the retry as user initiated
821
telemetryProperties: {
822
...telemetryProperties,
823
retryAfterError: processed.reasonDetail || processed.reason,
824
retryAfterErrorGitHubRequestId: processed.serverRequestId,
825
connectivityTestError,
826
connectivityTestErrorGitHubRequestId,
827
},
828
enableRetryOnError: false,
829
useFetcher,
830
}, token);
831
832
pendingLoggedChatRequest?.resolve(retryResult, streamRecorder.deltas);
833
if (opts.useWebSocket && retryResult.type === ChatFetchResponseType.Success) {
834
this._consecutiveWebSocketRetryFallbacks++;
835
this._logService.info(`[ChatWebSocketManager] WebSocket request failed with successful HTTP fallback (${this._consecutiveWebSocketRetryFallbacks} consecutive).`);
836
if (opts.conversationId) {
837
// Closing here because the retry is transparent.
838
this._webSocketManager.closeConnection(opts.conversationId);
839
}
840
}
841
return { retryResult, connectivityTestError, connectivityTestErrorGitHubRequestId };
842
}
843
844
private async _fetchAndStreamChat(
845
chatEndpointInfo: IChatEndpoint,
846
request: IEndpointBody,
847
baseTelemetryData: TelemetryData,
848
finishedCb: FinishedCallback,
849
secretKey: string | undefined,
850
copilotToken: CopilotToken,
851
location: ChatLocation,
852
ourRequestId: string,
853
nChoices: number | undefined,
854
cancellationToken: CancellationToken,
855
countTokens: () => Promise<number>,
856
userInitiatedRequest?: boolean,
857
useWebSocket?: boolean,
858
turnId?: string,
859
conversationId?: string,
860
telemetryProperties?: TelemetryProperties | undefined,
861
useFetcher?: FetcherId,
862
canRetryOnce?: boolean,
863
requestKindOptions?: IBackgroundRequestOptions | ISubagentRequestOptions,
864
summarizedAtRoundId?: string,
865
modeChanged?: boolean,
866
): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled; fetcher?: FetcherId; bytesReceived?: number; statusCode?: number; suspendEventSeen?: boolean; resumeEventSeen?: boolean; otelSpan?: ISpanHandle }> {
867
const isPowerSaveBlockerEnabled = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.ChatRequestPowerSaveBlocker, this._experimentationService);
868
const blockerHandle = isPowerSaveBlockerEnabled && location !== ChatLocation.Other ? this._powerService.acquirePowerSaveBlocker() : undefined;
869
870
let suspendEventSeen = false;
871
let resumeEventSeen = false;
872
873
const suspendListener = this._powerService.onDidSuspend(() => {
874
suspendEventSeen = true;
875
this._logService.info(`System suspended during streaming request ${ourRequestId} (${ChatLocation.toString(location)})`);
876
});
877
878
const resumeListener = this._powerService.onDidResume(() => {
879
resumeEventSeen = true;
880
this._logService.info(`System resumed during streaming request ${ourRequestId} (${ChatLocation.toString(location)})`);
881
});
882
883
try {
884
const fetchResult = await this._doFetchAndStreamChat(
885
chatEndpointInfo,
886
request,
887
baseTelemetryData,
888
finishedCb,
889
secretKey,
890
copilotToken,
891
location,
892
ourRequestId,
893
nChoices,
894
cancellationToken,
895
countTokens,
896
userInitiatedRequest,
897
useWebSocket,
898
turnId,
899
conversationId,
900
telemetryProperties,
901
useFetcher,
902
canRetryOnce,
903
requestKindOptions,
904
summarizedAtRoundId,
905
modeChanged,
906
);
907
return { ...fetchResult, suspendEventSeen: suspendEventSeen || undefined, resumeEventSeen: resumeEventSeen || undefined };
908
} catch (err) {
909
if (suspendEventSeen) {
910
err.suspendEventSeen = true;
911
}
912
if (resumeEventSeen) {
913
err.resumeEventSeen = true;
914
}
915
throw err;
916
} finally {
917
suspendListener.dispose();
918
resumeListener.dispose();
919
blockerHandle?.dispose();
920
}
921
}
922
923
private async _doFetchAndStreamChat(
924
chatEndpointInfo: IChatEndpoint,
925
request: IEndpointBody,
926
baseTelemetryData: TelemetryData,
927
finishedCb: FinishedCallback,
928
secretKey: string | undefined,
929
copilotToken: CopilotToken,
930
location: ChatLocation,
931
ourRequestId: string,
932
nChoices: number | undefined,
933
cancellationToken: CancellationToken,
934
countTokens: () => Promise<number>,
935
userInitiatedRequest?: boolean,
936
useWebSocket?: boolean,
937
turnId?: string,
938
conversationId?: string,
939
telemetryProperties?: TelemetryProperties | undefined,
940
useFetcher?: FetcherId,
941
canRetryOnce?: boolean,
942
requestKindOptions?: IBackgroundRequestOptions | ISubagentRequestOptions,
943
summarizedAtRoundId?: string,
944
modeChanged?: boolean,
945
): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled; fetcher?: FetcherId; bytesReceived?: number; statusCode?: number; otelSpan?: ISpanHandle }> {
946
947
if (cancellationToken.isCancellationRequested) {
948
return { result: { type: FetchResponseKind.Canceled, reason: 'before fetch request' } };
949
}
950
951
// OTel inference span for this LLM call
952
const serverAddress = typeof chatEndpointInfo.urlOrRequestMetadata === 'string'
953
? (() => { try { return new URL(chatEndpointInfo.urlOrRequestMetadata).hostname; } catch { return undefined; } })()
954
: undefined;
955
const chatSessionId = getCurrentCapturingToken()?.chatSessionId;
956
const parentChatSessionId = getCurrentCapturingToken()?.parentChatSessionId;
957
const debugLogLabel = getCurrentCapturingToken()?.debugLogLabel;
958
const otelSpan = this._otelService.startSpan(`chat ${chatEndpointInfo.model}`, {
959
kind: SpanKind.CLIENT,
960
attributes: {
961
[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,
962
[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.GITHUB,
963
[GenAiAttr.REQUEST_MODEL]: chatEndpointInfo.model,
964
[GenAiAttr.CONVERSATION_ID]: telemetryProperties?.requestId ?? ourRequestId,
965
[GenAiAttr.REQUEST_MAX_TOKENS]: request.max_tokens ?? request.max_output_tokens ?? request.max_completion_tokens ?? 2048,
966
...(request.temperature !== undefined ? { [GenAiAttr.REQUEST_TEMPERATURE]: request.temperature } : {}),
967
...(request.top_p !== undefined ? { [GenAiAttr.REQUEST_TOP_P]: request.top_p } : {}),
968
[CopilotChatAttr.MAX_PROMPT_TOKENS]: chatEndpointInfo.modelMaxPromptTokens,
969
...(serverAddress ? { [StdAttr.SERVER_ADDRESS]: serverAddress } : {}),
970
...(conversationId ? { [CopilotChatAttr.SESSION_ID]: conversationId } : {}),
971
...(chatSessionId ? { [CopilotChatAttr.CHAT_SESSION_ID]: chatSessionId } : {}),
972
...(parentChatSessionId ? { [CopilotChatAttr.PARENT_CHAT_SESSION_ID]: parentChatSessionId } : {}),
973
...(debugLogLabel ? { [CopilotChatAttr.DEBUG_LOG_LABEL]: debugLogLabel } : {}),
974
},
975
});
976
const otelStartTime = Date.now();
977
978
try {
979
980
this._logService.debug(`modelMaxPromptTokens ${chatEndpointInfo.modelMaxPromptTokens}`);
981
this._logService.debug(`modelMaxResponseTokens ${request.max_tokens ?? 2048}`);
982
this._logService.debug(`chat model ${chatEndpointInfo.model}`);
983
984
secretKey ??= copilotToken.token;
985
if (!secretKey) {
986
// If no key is set we error
987
const urlOrRequestMetadata = stringifyUrlOrRequestMetadata(chatEndpointInfo.urlOrRequestMetadata);
988
this._logService.error(`Failed to send request to ${urlOrRequestMetadata} due to missing key`);
989
sendCommunicationErrorTelemetry(this._telemetryService, `Failed to send request to ${urlOrRequestMetadata} due to missing key`);
990
return {
991
result: {
992
type: FetchResponseKind.Failed,
993
modelRequestId: undefined,
994
failKind: ChatFailKind.TokenExpiredOrInvalid,
995
reason: 'key is missing'
996
}
997
};
998
}
999
1000
// WebSocket path: use persistent WebSocket connection for Responses API endpoints
1001
if (useWebSocket && turnId && conversationId) {
1002
const wsResult = await this._doFetchViaWebSocket(
1003
chatEndpointInfo,
1004
request,
1005
baseTelemetryData,
1006
finishedCb,
1007
secretKey,
1008
location,
1009
ourRequestId,
1010
turnId,
1011
conversationId,
1012
cancellationToken,
1013
countTokens,
1014
userInitiatedRequest,
1015
telemetryProperties,
1016
requestKindOptions,
1017
summarizedAtRoundId,
1018
modeChanged,
1019
);
1020
return { ...wsResult, otelSpan };
1021
}
1022
1023
const httpResult = await this._doFetchViaHttp(
1024
chatEndpointInfo,
1025
request,
1026
baseTelemetryData,
1027
finishedCb,
1028
secretKey,
1029
location,
1030
ourRequestId,
1031
nChoices,
1032
cancellationToken,
1033
userInitiatedRequest,
1034
telemetryProperties,
1035
useFetcher,
1036
canRetryOnce,
1037
requestKindOptions,
1038
);
1039
return { ...httpResult, otelSpan };
1040
1041
} catch (err) {
1042
otelSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));
1043
otelSpan.setAttribute(StdAttr.ERROR_TYPE, err instanceof Error ? err.constructor.name : 'Error');
1044
otelSpan.recordException(err);
1045
throw err;
1046
} finally {
1047
const durationSec = (Date.now() - otelStartTime) / 1000;
1048
GenAiMetrics.recordOperationDuration(this._otelService, durationSec, {
1049
operationName: GenAiOperationName.CHAT,
1050
providerName: GenAiProviderName.GITHUB,
1051
requestModel: chatEndpointInfo.model,
1052
});
1053
// Span is NOT ended here — caller (fetchMany) will set token attributes and end it
1054
}
1055
}
1056
1057
/**
1058
* Sends a chat request via a persistent WebSocket connection instead of HTTP POST.
1059
* Events are the same Responses API streaming events, processed by OpenAIResponsesProcessor.
1060
*/
1061
private async _doFetchViaWebSocket(
1062
chatEndpointInfo: IChatEndpoint,
1063
request: IEndpointBody,
1064
baseTelemetryData: TelemetryData,
1065
finishedCb: FinishedCallback,
1066
secretKey: string,
1067
location: ChatLocation,
1068
ourRequestId: string,
1069
turnId: string,
1070
conversationId: string,
1071
cancellationToken: CancellationToken,
1072
countTokens: () => Promise<number>,
1073
userInitiatedRequest: boolean | undefined,
1074
telemetryProperties: TelemetryProperties | undefined,
1075
requestKindOptions: IBackgroundRequestOptions | ISubagentRequestOptions | undefined,
1076
summarizedAtRoundId: string | undefined,
1077
modeChanged: boolean | undefined,
1078
): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled }> {
1079
const intent = locationToIntent(location);
1080
const agentInteractionType = requestKindOptions?.kind === 'subagent' ?
1081
'conversation-subagent' :
1082
requestKindOptions?.kind === 'background' ?
1083
'conversation-background' :
1084
intent === 'conversation-agent' ? intent : undefined;
1085
const additionalHeaders: Record<string, string> = {
1086
'Authorization': `Bearer ${secretKey}`,
1087
'X-Request-Id': ourRequestId,
1088
'OpenAI-Intent': intent,
1089
'X-GitHub-Api-Version': '2025-05-01',
1090
'X-Interaction-Id': this._interactionService.interactionId,
1091
...(chatEndpointInfo.getExtraHeaders ? chatEndpointInfo.getExtraHeaders(location) : {}),
1092
};
1093
if (agentInteractionType) {
1094
additionalHeaders['X-Interaction-Type'] = agentInteractionType;
1095
additionalHeaders['X-Agent-Task-Id'] = ourRequestId;
1096
}
1097
if (request.messages?.some((m: CAPIChatMessage) => Array.isArray(m.content) ? m.content.some(c => 'image_url' in c) : false) && chatEndpointInfo.supportsVision) {
1098
additionalHeaders['Copilot-Vision-Request'] = 'true';
1099
}
1100
const connection = this._webSocketManager.getOrCreateConnection(conversationId, additionalHeaders, ourRequestId);
1101
try {
1102
await connection.connect();
1103
} catch (err) {
1104
(err as any).gitHubRequestId = connection.gitHubRequestId;
1105
throw err;
1106
}
1107
1108
// Generate unique ID to link input and output messages
1109
const modelCallId = generateUuid();
1110
1111
const telemetryData = TelemetryData.createAndMarkAsIssued({
1112
endpoint: 'completions',
1113
engineName: 'chat',
1114
uiKind: ChatLocation.toString(location),
1115
transport: 'websocket',
1116
...{ ...telemetryProperties, modelCallId },
1117
}, {
1118
maxTokenWindow: chatEndpointInfo.modelMaxPromptTokens
1119
});
1120
1121
const modelRequestId = getRequestId(connection.responseHeaders);
1122
// Request id changes over the lifetime of the connection.
1123
modelRequestId.headerRequestId = ourRequestId;
1124
telemetryData.extendWithRequestId(modelRequestId);
1125
if (modelRequestId.serverExperiments) {
1126
this._telemetryService.setSharedProperty('capi.assignmentcontext', modelRequestId.serverExperiments);
1127
}
1128
1129
for (const [key, value] of Object.entries(request)) {
1130
if (key === 'messages' || key === 'input') {
1131
continue;
1132
} // Skip messages (PII)
1133
telemetryData.properties[`request.option.${key}`] = JSON.stringify(value) ?? 'undefined';
1134
}
1135
this._telemetryService.sendGHTelemetryEvent('request.sent', telemetryData.properties, telemetryData.measurements);
1136
1137
const requestStart = Date.now();
1138
const handle = connection.sendRequest(request, { userInitiated: !!userInitiatedRequest, turnId, requestId: ourRequestId, model: chatEndpointInfo.model, countTokens, tokenCountMax: chatEndpointInfo.maxOutputTokens, modelMaxPromptTokens: chatEndpointInfo.modelMaxPromptTokens, summarizedAtRoundId, modeChanged }, cancellationToken);
1139
1140
const extendedBaseTelemetryData = baseTelemetryData.extendedBy({ modelCallId });
1141
const processor = this._instantiationService.createInstance(OpenAIResponsesProcessor, extendedBaseTelemetryData, this._telemetryService, modelRequestId.headerRequestId, modelRequestId.gitHubRequestId, modelRequestId.serverExperiments, getResponsesApiCompactionThresholdFromBody(request));
1142
1143
// Set up streaming first so event listeners are registered before we
1144
// await the first event — AsyncIterableObject runs its executor eagerly.
1145
const chatCompletions = new AsyncIterableObject<ChatCompletion>(async emitter => {
1146
try {
1147
await new Promise<void>((resolve, reject) => {
1148
handle.onEvent(event => {
1149
const completion = processor.push(event, finishedCb);
1150
if (completion) {
1151
sendCompletionOutputTelemetry(this._telemetryService, this._logService, completion, extendedBaseTelemetryData);
1152
emitter.emitOne(completion);
1153
}
1154
1155
if (event.type === 'response.completed') {
1156
const snapshots = (event as any).copilot_quota_snapshots;
1157
if (snapshots && typeof snapshots === 'object') {
1158
this._chatQuotaService.processQuotaSnapshots(snapshots);
1159
}
1160
}
1161
});
1162
1163
handle.onCAPIError(event => {
1164
// Mid-stream CAPI error — throw so the caller can handle it
1165
const error = new Error(`${event.error.message} (${event.error.code})`);
1166
(error as any).gitHubRequestId = modelRequestId.gitHubRequestId;
1167
(error as any).capiWebSocketError = event;
1168
reject(error);
1169
});
1170
1171
handle.onError(error => {
1172
(error as any).gitHubRequestId = modelRequestId.gitHubRequestId;
1173
if (isCancellationError(error)) {
1174
reject(error);
1175
return;
1176
}
1177
1178
const warningTelemetry = telemetryData.extendedBy({ error: error.message });
1179
this._telemetryService.sendGHTelemetryEvent('request.shownWarning', warningTelemetry.properties, warningTelemetry.measurements);
1180
1181
const totalTimeMs = Date.now() - requestStart;
1182
telemetryData.measurements.totalTimeMs = totalTimeMs;
1183
telemetryData.properties.error = error.message;
1184
1185
this._logService.debug(`request.error: [websocket], took ${totalTimeMs} ms`);
1186
this._telemetryService.sendGHTelemetryEvent('request.error', telemetryData.properties, telemetryData.measurements);
1187
1188
reject(error);
1189
});
1190
1191
handle.done.then(resolve, reject);
1192
});
1193
1194
const totalTimeMs = Date.now() - requestStart;
1195
telemetryData.measurements.totalTimeMs = totalTimeMs;
1196
this._logService.debug(`request.response: [websocket], took ${totalTimeMs} ms`);
1197
this._telemetryService.sendGHTelemetryEvent('request.response', telemetryData.properties, telemetryData.measurements);
1198
} finally {
1199
let messagesToLog = request.messages;
1200
if ((!messagesToLog || messagesToLog.length === 0) && (request as OpenAI.Responses.ResponseCreateParams).input) {
1201
try {
1202
const rawMessages = responseApiInputToRawMessagesForLogging(request as OpenAI.Responses.ResponseCreateParams);
1203
messagesToLog = rawMessageToCAPI(rawMessages);
1204
} catch (e) {
1205
this._logService.error(`Failed to convert Response API input to messages for telemetry:`, e);
1206
messagesToLog = [];
1207
}
1208
}
1209
sendEngineMessagesTelemetry(this._telemetryService, messagesToLog ?? [], telemetryData, false, this._logService);
1210
}
1211
});
1212
1213
// Wait for the first event to determine the response type,
1214
// analogous to checking HTTP status code before streaming the body.
1215
const firstEvent = await handle.firstEvent;
1216
1217
if (cancellationToken.isCancellationRequested) {
1218
return { result: { type: FetchResponseKind.Canceled, reason: 'after first WebSocket event' } };
1219
}
1220
1221
// CAPI error before any stream events — return Failed like HTTP non-200
1222
if (isCAPIWebSocketError(firstEvent)) {
1223
const totalTimeMs = Date.now() - requestStart;
1224
telemetryData.measurements.totalTimeMs = totalTimeMs;
1225
telemetryData.properties.error = `${firstEvent.error.message} (${firstEvent.error.code})`;
1226
this._logService.debug(`request.error: [websocket capi error], took ${totalTimeMs} ms`);
1227
this._telemetryService.sendGHTelemetryEvent('request.error', telemetryData.properties, telemetryData.measurements);
1228
return { result: await this._handleWebSocketCAPIError(firstEvent, modelRequestId) };
1229
}
1230
1231
// Clear stale quota-exceeded state if the server accepted the request.
1232
if (this._authenticationService.copilotToken?.isFreeUser && this._authenticationService.copilotToken?.isChatQuotaExceeded) {
1233
this._authenticationService.resetCopilotToken();
1234
}
1235
1236
return {
1237
result: {
1238
type: FetchResponseKind.Success,
1239
chatCompletions,
1240
}
1241
};
1242
}
1243
1244
private async _doFetchViaHttp(
1245
chatEndpointInfo: IChatEndpoint,
1246
request: IEndpointBody,
1247
baseTelemetryData: TelemetryData,
1248
finishedCb: FinishedCallback,
1249
secretKey: string,
1250
location: ChatLocation,
1251
ourRequestId: string,
1252
nChoices: number | undefined,
1253
cancellationToken: CancellationToken,
1254
userInitiatedRequest: boolean | undefined,
1255
telemetryProperties: TelemetryProperties | undefined,
1256
useFetcher: FetcherId | undefined,
1257
canRetryOnce: boolean | undefined,
1258
requestKindOptions: IBackgroundRequestOptions | ISubagentRequestOptions | undefined,
1259
): Promise<{ result: ChatResults | ChatRequestFailed | ChatRequestCanceled; fetcher?: FetcherId; bytesReceived?: number; statusCode?: number }> {
1260
// Generate unique ID to link input and output messages
1261
const modelCallId = generateUuid();
1262
1263
const response = await this._fetchWithInstrumentation(
1264
chatEndpointInfo,
1265
ourRequestId,
1266
request,
1267
secretKey,
1268
location,
1269
cancellationToken,
1270
userInitiatedRequest,
1271
{ ...telemetryProperties, modelCallId },
1272
useFetcher,
1273
canRetryOnce,
1274
requestKindOptions,
1275
);
1276
1277
if (cancellationToken.isCancellationRequested) {
1278
try {
1279
// Destroy the stream so that the server is hopefully notified we don't want any more data
1280
// and can cancel/forget about the request itself.
1281
await response!.body.destroy();
1282
} catch (e) {
1283
this._logService.error(e, `Error destroying stream`);
1284
this._telemetryService.sendGHTelemetryException(e, 'Error destroying stream');
1285
}
1286
return {
1287
result: { type: FetchResponseKind.Canceled, reason: 'after fetch request' },
1288
fetcher: response.fetcher,
1289
bytesReceived: response.bytesReceived
1290
};
1291
}
1292
1293
if (response.status === 200 && this._authenticationService.copilotToken?.isFreeUser && this._authenticationService.copilotToken?.isChatQuotaExceeded) {
1294
this._authenticationService.resetCopilotToken();
1295
}
1296
1297
if (response.status !== 200) {
1298
const telemetryData = createTelemetryData(chatEndpointInfo, location, ourRequestId);
1299
this._logService.info('Request ID for failed request: ' + ourRequestId);
1300
return {
1301
result: await this._handleError(telemetryData, response, ourRequestId),
1302
fetcher: response.fetcher,
1303
bytesReceived: response.bytesReceived,
1304
statusCode: response.status
1305
};
1306
}
1307
1308
// Extend baseTelemetryData with modelCallId for output messages
1309
const extendedBaseTelemetryData = baseTelemetryData.extendedBy({ modelCallId });
1310
1311
let chatCompletions;
1312
const gitHubRequestId = response.headers.get('x-github-request-id') ?? '';
1313
try {
1314
const completions = await chatEndpointInfo.processResponseFromChatEndpoint(
1315
this._telemetryService,
1316
this._logService,
1317
response,
1318
nChoices ?? /* OpenAI's default */ 1,
1319
finishedCb,
1320
extendedBaseTelemetryData,
1321
cancellationToken,
1322
location,
1323
);
1324
chatCompletions = new AsyncIterableObject<ChatCompletion>(async emitter => {
1325
try {
1326
for await (const completion of completions) {
1327
emitter.emitOne(completion);
1328
}
1329
} catch (err) {
1330
err.fetcherId = response.fetcher;
1331
err.gitHubRequestId = gitHubRequestId;
1332
err.bytesReceived = response.bytesReceived;
1333
throw err;
1334
}
1335
});
1336
} catch (err) {
1337
err.fetcherId = response.fetcher;
1338
err.gitHubRequestId = gitHubRequestId;
1339
err.bytesReceived = response.bytesReceived;
1340
throw err;
1341
}
1342
1343
// CAPI will return us a Copilot Edits Session Header which is our token to using the speculative decoding endpoint
1344
// We should store this in the auth service for easy use later
1345
if (response.headers.get('Copilot-Edits-Session')) {
1346
this._authenticationService.speculativeDecodingEndpointToken = response.headers.get('Copilot-Edits-Session') ?? undefined;
1347
}
1348
1349
this._chatQuotaService.processQuotaHeaders(response.headers);
1350
1351
return {
1352
result: {
1353
type: FetchResponseKind.Success,
1354
chatCompletions,
1355
},
1356
fetcher: response.fetcher,
1357
bytesReceived: response.bytesReceived
1358
};
1359
}
1360
1361
private async _fetchWithInstrumentation(
1362
chatEndpoint: IChatEndpoint,
1363
ourRequestId: string,
1364
request: IEndpointBody,
1365
secretKey: string,
1366
location: ChatLocation,
1367
cancellationToken: CancellationToken,
1368
userInitiatedRequest?: boolean,
1369
telemetryProperties?: TelemetryProperties,
1370
useFetcher?: FetcherId,
1371
canRetryOnce?: boolean,
1372
requestKindOptions?: IBackgroundRequestOptions | ISubagentRequestOptions,
1373
): Promise<Response> {
1374
1375
// If request contains an image, we include this header.
1376
const additionalHeaders: Record<string, string> = {
1377
'X-Interaction-Id': this._interactionService.interactionId,
1378
'X-Initiator': userInitiatedRequest ? 'user' : 'agent', // Agent = a system request / not the primary user query.
1379
};
1380
if (request.messages?.some((m: CAPIChatMessage) => Array.isArray(m.content) ? m.content.some(c => 'image_url' in c) : false) && chatEndpoint.supportsVision) {
1381
additionalHeaders['Copilot-Vision-Request'] = 'true';
1382
}
1383
const telemetryData = TelemetryData.createAndMarkAsIssued({
1384
endpoint: 'completions',
1385
engineName: 'chat',
1386
uiKind: ChatLocation.toString(location),
1387
transport: 'http',
1388
...telemetryProperties // This includes the modelCallId from fetchAndStreamChat
1389
}, {
1390
maxTokenWindow: chatEndpoint.modelMaxPromptTokens
1391
});
1392
1393
for (const [key, value] of Object.entries(request)) {
1394
if (key === 'messages' || key === 'input') {
1395
continue;
1396
} // Skip messages (PII)
1397
telemetryData.properties[`request.option.${key}`] = JSON.stringify(value) ?? 'undefined';
1398
}
1399
1400
// The request ID we are passed in is sent in the request to the proxy, and included in our pre-request telemetry.
1401
// We hope (but do not rely on) that the model will use the same ID in the response, allowing us to correlate
1402
// the request and response.
1403
telemetryData.properties['headerRequestId'] = ourRequestId;
1404
1405
this._telemetryService.sendGHTelemetryEvent('request.sent', telemetryData.properties, telemetryData.measurements);
1406
1407
const requestStart = Date.now();
1408
const intent = locationToIntent(location);
1409
1410
// Wrap the Promise with success/error callbacks so we can log/measure it
1411
return this._instantiationService.invokeFunction(postRequest, {
1412
endpointOrUrl: chatEndpoint,
1413
secretKey,
1414
intent,
1415
requestId: ourRequestId,
1416
body: request,
1417
additionalHeaders,
1418
cancelToken: cancellationToken,
1419
useFetcher,
1420
canRetryOnce,
1421
location,
1422
requestKindOptions,
1423
}).then(response => {
1424
const apim = response.headers.get('apim-request-id');
1425
if (apim) {
1426
this._logService.debug(`APIM request id: ${apim}`);
1427
}
1428
const ghRequestId = response.headers.get('x-github-request-id');
1429
if (ghRequestId) {
1430
this._logService.debug(`GH request id: ${ghRequestId}`);
1431
}
1432
// This ID is hopefully the one the same as ourRequestId, but it is not guaranteed.
1433
// If they are different then we will override the original one we set in telemetryData above.
1434
const modelRequestId = getRequestId(response.headers);
1435
// Preserve ourRequestId as headerRequestId if the server didn't echo x-request-id
1436
modelRequestId.headerRequestId = modelRequestId.headerRequestId || ourRequestId;
1437
telemetryData.extendWithRequestId(modelRequestId);
1438
if (modelRequestId.serverExperiments) {
1439
this._telemetryService.setSharedProperty('capi.assignmentcontext', modelRequestId.serverExperiments);
1440
}
1441
1442
// TODO: Add response length (requires parsing)
1443
const totalTimeMs = Date.now() - requestStart;
1444
telemetryData.measurements.totalTimeMs = totalTimeMs;
1445
1446
this._logService.debug(`request.response: [${stringifyUrlOrRequestMetadata(chatEndpoint.urlOrRequestMetadata)}], took ${totalTimeMs} ms`);
1447
1448
this._telemetryService.sendGHTelemetryEvent('request.response', telemetryData.properties, telemetryData.measurements);
1449
1450
return response;
1451
})
1452
.catch(error => {
1453
if (this._fetcherService.isAbortError(error)) {
1454
// If we cancelled a network request, we don't want to log a `request.error`
1455
throw error;
1456
}
1457
1458
const warningTelemetry = telemetryData.extendedBy({ error: 'Network exception' });
1459
this._telemetryService.sendGHTelemetryEvent('request.shownWarning', warningTelemetry.properties, warningTelemetry.measurements);
1460
1461
telemetryData.properties.code = String(error.code ?? '');
1462
telemetryData.properties.errno = String(error.errno ?? '');
1463
telemetryData.properties.message = String(error.message ?? '');
1464
telemetryData.properties.type = String(error.type ?? '');
1465
1466
const totalTimeMs = Date.now() - requestStart;
1467
telemetryData.measurements.totalTimeMs = totalTimeMs;
1468
1469
this._logService.debug(`request.response: [${stringifyUrlOrRequestMetadata(chatEndpoint.urlOrRequestMetadata)}] took ${totalTimeMs} ms`);
1470
1471
this._telemetryService.sendGHTelemetryEvent('request.error', telemetryData.properties, telemetryData.measurements);
1472
1473
throw error;
1474
})
1475
.finally(() => {
1476
let messagesToLog = request.messages;
1477
1478
// For Response API (has input but no messages), convert input to messages for logging
1479
if ((!messagesToLog || messagesToLog.length === 0) && (request as OpenAI.Responses.ResponseCreateParams).input) {
1480
try {
1481
const rawMessages = responseApiInputToRawMessagesForLogging(request as OpenAI.Responses.ResponseCreateParams);
1482
messagesToLog = rawMessageToCAPI(rawMessages);
1483
} catch (e) {
1484
this._logService.error(`Failed to convert Response API input to messages for telemetry:`, e);
1485
messagesToLog = [];
1486
}
1487
}
1488
1489
sendEngineMessagesTelemetry(this._telemetryService, messagesToLog ?? [], telemetryData, false, this._logService);
1490
});
1491
}
1492
1493
private async _handleError(
1494
telemetryData: TelemetryData,
1495
response: Response,
1496
requestId: string
1497
): Promise<ChatRequestFailed> {
1498
const modelRequestIdObj = getRequestId(response.headers);
1499
requestId = modelRequestIdObj.headerRequestId || requestId;
1500
modelRequestIdObj.headerRequestId = requestId;
1501
1502
telemetryData.properties.error = `Response status was ${response.status}`;
1503
telemetryData.properties.status = String(response.status);
1504
this._telemetryService.sendGHTelemetryEvent('request.shownWarning', telemetryData.properties, telemetryData.measurements);
1505
1506
const text = await response.text();
1507
let jsonData: Record<string, any> | undefined;
1508
try {
1509
jsonData = JSON.parse(text);
1510
jsonData = jsonData?.error ?? jsonData; // Extract nested error object if it exists
1511
} catch {
1512
// JSON parsing failed, it's not json content.
1513
}
1514
1515
const reasonNoText = `Server error: ${response.status}`;
1516
const reason = `${reasonNoText} ${text}`;
1517
this._logService.error(reason);
1518
1519
if (400 <= response.status && response.status < 500) {
1520
1521
if (response.status === 400 && text.includes('off_topic')) {
1522
return {
1523
type: FetchResponseKind.Failed,
1524
modelRequestId: modelRequestIdObj,
1525
failKind: ChatFailKind.OffTopic,
1526
reason: 'filtered as off_topic by intent classifier: message was not programming related',
1527
};
1528
}
1529
1530
if (response.status === 401 && text.includes('authorize_url') && jsonData?.authorize_url) {
1531
return {
1532
type: FetchResponseKind.Failed,
1533
modelRequestId: modelRequestIdObj,
1534
failKind: ChatFailKind.AgentUnauthorized,
1535
reason: response.statusText || response.statusText,
1536
data: jsonData
1537
};
1538
}
1539
1540
if (response.status === 400 && jsonData?.code === 'previous_response_not_found') {
1541
return {
1542
type: FetchResponseKind.Failed,
1543
modelRequestId: modelRequestIdObj,
1544
failKind: ChatFailKind.InvalidPreviousResponseId,
1545
reason: jsonData.message || 'Invalid previous response ID',
1546
data: jsonData,
1547
};
1548
}
1549
1550
if (response.status === 401 || response.status === 403) {
1551
// Token has expired or invalid, fetch a new one on next request
1552
// TODO(drifkin): these actions should probably happen in vsc specific code
1553
this._authenticationService.resetCopilotToken(response.status);
1554
return {
1555
type: FetchResponseKind.Failed,
1556
modelRequestId: modelRequestIdObj,
1557
failKind: ChatFailKind.TokenExpiredOrInvalid,
1558
reason: jsonData?.message || `token expired or invalid: ${response.status}`,
1559
};
1560
}
1561
1562
if (response.status === 402) {
1563
// When we receive a 402, we have exceed a quota
1564
// This is stored on the token so let's refresh it
1565
if (!this._authenticationService.copilotToken?.isChatQuotaExceeded) {
1566
this._authenticationService.resetCopilotToken(response.status);
1567
await this._authenticationService.getCopilotToken();
1568
}
1569
1570
1571
const retryAfter = response.headers.get('retry-after');
1572
1573
const convertToDate = (retryAfterString: string | null): Date | undefined => {
1574
if (!retryAfterString) {
1575
return undefined;
1576
}
1577
1578
// Try treating it as a date
1579
const retryAfterDate = new Date(retryAfterString);
1580
if (!isNaN(retryAfterDate.getDate())) {
1581
return retryAfterDate;
1582
}
1583
1584
// It is not a date, try treating it as a duration from the current date
1585
const retryAfterDuration = parseInt(retryAfterString, 10);
1586
if (isNaN(retryAfterDuration)) {
1587
return undefined;
1588
}
1589
1590
return new Date(Date.now() + retryAfterDuration * 1000);
1591
};
1592
1593
const retryAfterDate = convertToDate(retryAfter);
1594
1595
return {
1596
type: FetchResponseKind.Failed,
1597
modelRequestId: modelRequestIdObj,
1598
failKind: ChatFailKind.QuotaExceeded,
1599
reason: jsonData?.message ?? 'Free tier quota exceeded',
1600
data: {
1601
capiError: jsonData,
1602
retryAfter: retryAfterDate
1603
}
1604
};
1605
}
1606
1607
if (response.status === 404) {
1608
let errorReason: string;
1609
1610
// Check if response body is valid JSON
1611
if (!jsonData) {
1612
errorReason = text;
1613
} else {
1614
errorReason = JSON.stringify(jsonData);
1615
}
1616
1617
return {
1618
type: FetchResponseKind.Failed,
1619
modelRequestId: modelRequestIdObj,
1620
failKind: ChatFailKind.NotFound,
1621
reason: errorReason
1622
};
1623
}
1624
1625
if (response.status === 422) {
1626
return {
1627
type: FetchResponseKind.Failed,
1628
modelRequestId: modelRequestIdObj,
1629
failKind: ChatFailKind.ContentFilter,
1630
reason: 'Filtered by Responsible AI Service\n\n' + text,
1631
};
1632
}
1633
1634
if (response.status === 424) {
1635
return {
1636
type: FetchResponseKind.Failed,
1637
modelRequestId: modelRequestIdObj,
1638
failKind: ChatFailKind.AgentFailedDependency,
1639
reason: text
1640
};
1641
}
1642
1643
if (response.status === 429) {
1644
let rateLimitReason = text;
1645
rateLimitReason = jsonData?.message ?? jsonData?.code;
1646
1647
if (text.includes('extension_blocked') && jsonData?.code === 'extension_blocked' && jsonData?.type === 'rate_limit_error') {
1648
return {
1649
type: FetchResponseKind.Failed,
1650
modelRequestId: modelRequestIdObj,
1651
failKind: ChatFailKind.ExtensionBlocked,
1652
reason: 'Extension blocked',
1653
data: {
1654
...jsonData?.message,
1655
retryAfter: response.headers.get('retry-after'),
1656
}
1657
};
1658
}
1659
1660
// HTTP 429 Too Many Requests
1661
return {
1662
type: FetchResponseKind.Failed,
1663
modelRequestId: modelRequestIdObj,
1664
failKind: ChatFailKind.RateLimited,
1665
reason: rateLimitReason,
1666
data: {
1667
retryAfter: response.headers.get('retry-after'),
1668
rateLimitKey: response.headers.get('x-ratelimit-exceeded'),
1669
capiError: jsonData
1670
}
1671
};
1672
}
1673
1674
if (response.status === 466) {
1675
this._logService.info(text);
1676
return {
1677
type: FetchResponseKind.Failed,
1678
modelRequestId: modelRequestIdObj,
1679
failKind: ChatFailKind.ClientNotSupported,
1680
reason: `client not supported: ${text}`
1681
};
1682
}
1683
1684
if (response.status === 499) {
1685
this._logService.info('Cancelled by server');
1686
return {
1687
type: FetchResponseKind.Failed,
1688
modelRequestId: modelRequestIdObj,
1689
failKind: ChatFailKind.ServerCanceled,
1690
reason: 'canceled by server'
1691
};
1692
}
1693
1694
} else if (500 <= response.status && response.status < 600) {
1695
1696
if (response.status === 503) {
1697
return {
1698
type: FetchResponseKind.Failed,
1699
modelRequestId: modelRequestIdObj,
1700
failKind: ChatFailKind.RateLimited,
1701
reason: 'Upstream provider rate limit hit',
1702
data: {
1703
retryAfter: null,
1704
rateLimitKey: null,
1705
capiError: { code: 'upstream_provider_rate_limit', message: text }
1706
}
1707
};
1708
}
1709
1710
// HTTP 5xx Server Error
1711
return {
1712
type: FetchResponseKind.Failed,
1713
modelRequestId: modelRequestIdObj,
1714
failKind: ChatFailKind.ServerError,
1715
reason: reasonNoText,
1716
};
1717
}
1718
1719
this._logService.error(`Request Failed: ${response.status} ${text}`);
1720
1721
sendCommunicationErrorTelemetry(this._telemetryService, 'Unhandled status from server: ' + response.status, text);
1722
1723
return {
1724
type: FetchResponseKind.Failed,
1725
modelRequestId: modelRequestIdObj,
1726
failKind: ChatFailKind.Unknown,
1727
reason: `Request Failed: ${response.status} ${text}`
1728
};
1729
}
1730
1731
private async processSuccessfulResponse(
1732
response: ChatResults,
1733
messages: Raw.ChatMessage[],
1734
requestBody: IEndpointBody,
1735
requestId: string,
1736
maxResponseTokens: number,
1737
promptTokenCount: number,
1738
timeToFirstToken: number,
1739
streamRecorder: FetchStreamRecorder,
1740
baseTelemetry: TelemetryData,
1741
chatEndpointInfo: IChatEndpoint,
1742
userInitiatedRequest: boolean | undefined,
1743
transport: string,
1744
fetcher: FetcherId | undefined,
1745
bytesReceived: number | undefined,
1746
suspendEventSeen: boolean | undefined,
1747
resumeEventSeen: boolean | undefined,
1748
): Promise<ChatResponses | ChatFetchRetriableError<string[]>> {
1749
1750
const completions: ChatCompletion[] = [];
1751
1752
for await (const chatCompletion of response.chatCompletions) {
1753
Telemetry.sendSuccessTelemetry(
1754
this._telemetryService,
1755
{
1756
chatCompletion,
1757
baseTelemetry,
1758
userInitiatedRequest,
1759
chatEndpointInfo,
1760
requestBody,
1761
maxResponseTokens,
1762
promptTokenCount,
1763
timeToFirstToken,
1764
timeToFirstTokenEmitted: (baseTelemetry && streamRecorder.firstTokenEmittedTime) ? streamRecorder.firstTokenEmittedTime - baseTelemetry.issuedTime : -1,
1765
hasImageMessages: this.filterImageMessages(messages),
1766
transport,
1767
fetcher,
1768
bytesReceived,
1769
suspendEventSeen,
1770
resumeEventSeen,
1771
}
1772
);
1773
1774
if (!this.isRepetitive(chatCompletion, baseTelemetry?.properties)) {
1775
completions.push(chatCompletion);
1776
}
1777
}
1778
const successFinishReasons = new Set([FinishedCompletionReason.Stop, FinishedCompletionReason.ClientTrimmed, FinishedCompletionReason.FunctionCall, FinishedCompletionReason.ToolCalls]);
1779
const successfulCompletions = completions.filter(c => successFinishReasons.has(c.finishReason));
1780
if (successfulCompletions.length >= 1) {
1781
return {
1782
type: ChatFetchResponseType.Success,
1783
resolvedModel: successfulCompletions[0].model,
1784
usage: successfulCompletions.length === 1 ? successfulCompletions[0].usage : undefined,
1785
value: successfulCompletions.map(c => getTextPart(c.message.content)),
1786
requestId,
1787
serverRequestId: successfulCompletions[0].requestId.headerRequestId,
1788
};
1789
}
1790
1791
const result = completions.at(0);
1792
1793
switch (result?.finishReason) {
1794
case FinishedCompletionReason.ContentFilter:
1795
return {
1796
type: ChatFetchResponseType.FilteredRetry,
1797
category: result.filterReason ?? FilterReason.Copyright,
1798
reason: 'Response got filtered.',
1799
value: completions.map(c => getTextPart(c.message.content)),
1800
requestId: requestId,
1801
serverRequestId: result.requestId.headerRequestId,
1802
};
1803
case FinishedCompletionReason.Length:
1804
return {
1805
type: ChatFetchResponseType.Length,
1806
reason: 'Response too long.',
1807
requestId: requestId,
1808
serverRequestId: result.requestId.headerRequestId,
1809
truncatedValue: getTextPart(result.message.content)
1810
};
1811
case FinishedCompletionReason.ServerError:
1812
return {
1813
type: ChatFetchResponseType.Failed,
1814
reason: 'Server error. Stream terminated',
1815
requestId: requestId,
1816
serverRequestId: result.requestId.headerRequestId,
1817
streamError: result.error
1818
};
1819
}
1820
return {
1821
type: ChatFetchResponseType.Unknown,
1822
reason: RESPONSE_CONTAINED_NO_CHOICES,
1823
requestId: requestId,
1824
serverRequestId: result?.requestId.headerRequestId,
1825
};
1826
}
1827
1828
private filterImageMessages(messages: Raw.ChatMessage[]): boolean {
1829
return messages?.some(m => Array.isArray(m.content) ? m.content.some(c => 'imageUrl' in c) : false);
1830
}
1831
1832
private isRepetitive(chatCompletion: ChatCompletion, telemetryProperties?: TelemetryProperties) {
1833
const lineRepetitionStats = calculateLineRepetitionStats(getTextPart(chatCompletion.message.content));
1834
const hasRepetition = isRepetitive(chatCompletion.tokens);
1835
if (hasRepetition) {
1836
const telemetryData = TelemetryData.createAndMarkAsIssued();
1837
telemetryData.extendWithRequestId(chatCompletion.requestId);
1838
const extended = telemetryData.extendedBy(telemetryProperties);
1839
this._telemetryService.sendEnhancedGHTelemetryEvent('conversation.repetition.detected', extended.properties, extended.measurements);
1840
}
1841
if (lineRepetitionStats.numberOfRepetitions >= 10) {
1842
/* __GDPR__
1843
"conversation.repetition.detected" : {
1844
"owner": "lramos15",
1845
"comment": "Calculates the number of repetitions in a response. Useful for loop detection",
1846
"finishReason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response finished. Helps identify cancellation vs length limits" },
1847
"requestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Id for this message request." },
1848
"lengthOfLine": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Length of the repeating line, in characters." },
1849
"numberOfRepetitions": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Number of times the line repeats." },
1850
"totalLines": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Number of total lines in the response." }
1851
}
1852
*/
1853
this._telemetryService.sendMSFTTelemetryEvent('conversation.repetition.detected', {
1854
requestId: chatCompletion.requestId.headerRequestId,
1855
finishReason: chatCompletion.finishReason,
1856
}, {
1857
numberOfRepetitions: lineRepetitionStats.numberOfRepetitions,
1858
lengthOfLine: lineRepetitionStats.mostRepeatedLine.length,
1859
totalLines: lineRepetitionStats.totalLines
1860
});
1861
}
1862
return hasRepetition;
1863
}
1864
1865
/**
1866
* Check for repetition in partial response deltas from a cancelled request.
1867
*
1868
* This method performs the same repetition detection as the `isRepetitive` method,
1869
* but operates on partial response data collected before the request was cancelled.
1870
*
1871
* Key differences from completed requests:
1872
* - Text is reconstructed from delta.text values instead of message.content
1873
* - Tokens are approximated by splitting text on whitespace instead of using
1874
* the actual token array (which is only available in completed responses)
1875
* - Enhanced telemetry won't include RequestId fields since we only have the
1876
* headerRequestId string, not the full RequestId object
1877
* - The finishReason is marked as 'canceled' to distinguish from server-generated
1878
* finish reasons
1879
*/
1880
private checkRepetitionInDeltas(
1881
deltas: IResponseDelta[],
1882
requestId: string,
1883
telemetryProperties?: TelemetryProperties
1884
): void {
1885
// Reconstruct the text content from deltas (filter out null, undefined, and empty text values)
1886
const textContent = deltas.filter(delta => delta.text?.length > 0).map(delta => delta.text).join('');
1887
1888
// Early exit if no content
1889
if (!textContent || textContent.trim().length === 0) {
1890
return;
1891
}
1892
1893
// For cancelled requests, we don't have the actual token array (only available in ChatCompletion),
1894
// so we approximate by splitting text content on whitespace. This is less precise than actual
1895
// tokenization but sufficient for detecting obvious repetition patterns.
1896
const tokens = textContent.split(/\s+/).filter(t => t.length > 0);
1897
1898
// Check for line repetition
1899
const lineRepetitionStats = calculateLineRepetitionStats(textContent);
1900
1901
// Check for token-level repetition
1902
const hasRepetition = isRepetitive(tokens);
1903
1904
// Send telemetry if repetition is detected
1905
if (hasRepetition) {
1906
const telemetryData = TelemetryData.createAndMarkAsIssued();
1907
const extended = telemetryData.extendedBy(telemetryProperties);
1908
// Note: For cancelled requests, we don't have a full RequestId object,
1909
// so we can't use extendWithRequestId like the non-cancelled path does.
1910
// This means enhanced telemetry for cancelled requests won't include
1911
// completionId, created, deploymentId, or serverExperiments fields.
1912
this._telemetryService.sendEnhancedGHTelemetryEvent('conversation.repetition.detected', extended.properties, extended.measurements);
1913
}
1914
1915
if (lineRepetitionStats.numberOfRepetitions >= 10) {
1916
this._telemetryService.sendMSFTTelemetryEvent('conversation.repetition.detected', {
1917
requestId: requestId,
1918
finishReason: 'canceled', // Client-side finish reason to distinguish from server-generated reasons
1919
}, {
1920
numberOfRepetitions: lineRepetitionStats.numberOfRepetitions,
1921
lengthOfLine: lineRepetitionStats.mostRepeatedLine.length,
1922
totalLines: lineRepetitionStats.totalLines
1923
});
1924
}
1925
}
1926
1927
private processCanceledResponse(
1928
response: ChatRequestCanceled,
1929
requestId: string,
1930
streamRecorder?: FetchStreamRecorder,
1931
telemetryProperties?: TelemetryProperties
1932
): ChatResponses {
1933
// Check for repetition in the partial response before cancellation
1934
if (streamRecorder && streamRecorder.deltas.length > 0) {
1935
this.checkRepetitionInDeltas(streamRecorder.deltas, requestId, telemetryProperties);
1936
}
1937
1938
return {
1939
type: ChatFetchResponseType.Canceled,
1940
reason: response.reason,
1941
requestId: requestId,
1942
serverRequestId: undefined,
1943
};
1944
}
1945
1946
private processFailedResponse(response: ChatRequestFailed, requestId: string, isAuto: boolean): ChatFetchError {
1947
const serverRequestId = response.modelRequestId?.gitHubRequestId;
1948
const reason = response.reason;
1949
if (response.failKind === ChatFailKind.RateLimited) {
1950
return { type: ChatFetchResponseType.RateLimited, reason, requestId, serverRequestId, retryAfter: response.data?.retryAfter, rateLimitKey: (response.data?.rateLimitKey || ''), isAuto, capiError: response.data?.capiError };
1951
}
1952
if (response.failKind === ChatFailKind.QuotaExceeded) {
1953
return { type: ChatFetchResponseType.QuotaExceeded, reason, requestId, serverRequestId, retryAfter: response.data?.retryAfter, capiError: response.data?.capiError };
1954
}
1955
if (response.failKind === ChatFailKind.OffTopic) {
1956
return { type: ChatFetchResponseType.OffTopic, reason, requestId, serverRequestId };
1957
}
1958
if (response.failKind === ChatFailKind.TokenExpiredOrInvalid || response.failKind === ChatFailKind.ClientNotSupported || reason.includes('Bad request: ')) {
1959
return { type: ChatFetchResponseType.BadRequest, reason, requestId, serverRequestId };
1960
}
1961
if (response.failKind === ChatFailKind.ServerError) {
1962
return { type: ChatFetchResponseType.Failed, reason, requestId, serverRequestId };
1963
}
1964
if (response.failKind === ChatFailKind.ContentFilter) {
1965
return { type: ChatFetchResponseType.PromptFiltered, reason, category: FilterReason.Prompt, requestId, serverRequestId };
1966
}
1967
if (response.failKind === ChatFailKind.AgentUnauthorized) {
1968
return { type: ChatFetchResponseType.AgentUnauthorized, reason, authorizationUrl: response.data!.authorize_url, requestId, serverRequestId };
1969
}
1970
if (response.failKind === ChatFailKind.AgentFailedDependency) {
1971
return { type: ChatFetchResponseType.AgentFailedDependency, reason, requestId, serverRequestId };
1972
}
1973
if (response.failKind === ChatFailKind.ExtensionBlocked) {
1974
const retryAfter = typeof response.data?.retryAfter === 'number' ? response.data.retryAfter : 300;
1975
return { type: ChatFetchResponseType.ExtensionBlocked, reason, requestId, retryAfter, learnMoreLink: response.data?.learnMoreLink ?? '', serverRequestId };
1976
}
1977
if (response.failKind === ChatFailKind.NotFound) {
1978
return { type: ChatFetchResponseType.NotFound, reason, requestId, serverRequestId };
1979
}
1980
if (response.failKind === ChatFailKind.InvalidPreviousResponseId) {
1981
return { type: ChatFetchResponseType.InvalidStatefulMarker, reason, requestId, serverRequestId };
1982
}
1983
1984
return { type: ChatFetchResponseType.Failed, reason, requestId, serverRequestId };
1985
}
1986
1987
private processError(err: unknown, requestId: string, gitHubRequestId: string | undefined, usernameToScrub: string | undefined, isAuto: boolean): ChatFetchError {
1988
const capiWebSocketError = (err as any)?.capiWebSocketError as CAPIWebSocketErrorEvent | undefined;
1989
if (capiWebSocketError) {
1990
return this._handleWebSocketError(capiWebSocketError, requestId, gitHubRequestId, isAuto);
1991
}
1992
1993
const fetcher = this._fetcherService;
1994
// If we cancelled a network request, we don't want to log an error
1995
if (fetcher.isAbortError(err)) {
1996
return {
1997
type: ChatFetchResponseType.Canceled,
1998
reason: 'network request aborted',
1999
requestId: requestId,
2000
serverRequestId: gitHubRequestId,
2001
};
2002
}
2003
if (isCancellationError(err)) {
2004
return {
2005
type: ChatFetchResponseType.Canceled,
2006
reason: 'Got a cancellation error',
2007
requestId: requestId,
2008
serverRequestId: gitHubRequestId,
2009
};
2010
}
2011
if (err && (
2012
(err instanceof Error && err.message === 'Premature close') ||
2013
(typeof err === 'object' && (err as any).code === 'ERR_STREAM_PREMATURE_CLOSE') /* to be extra sure */)
2014
) {
2015
return {
2016
type: ChatFetchResponseType.Canceled,
2017
reason: 'Stream closed prematurely',
2018
requestId: requestId,
2019
serverRequestId: gitHubRequestId,
2020
};
2021
}
2022
this._logService.error(ErrorUtils.fromUnknown(err), `Error on conversation request`);
2023
this._telemetryService.sendGHTelemetryException(err, 'Error on conversation request');
2024
const userMessage = fetcher.getUserMessageForFetcherError(err);
2025
const errorDetail = collectSingleLineErrorMessage(err, true);
2026
const scrubbedErrorDetail = this.scrubErrorDetail(errorDetail, usernameToScrub);
2027
if (fetcher.isInternetDisconnectedError(err)) {
2028
return {
2029
type: ChatFetchResponseType.NetworkError,
2030
reason: `It appears you're not connected to the internet, please check your network connection and try again.`,
2031
reasonDetail: scrubbedErrorDetail,
2032
requestId: requestId,
2033
serverRequestId: gitHubRequestId,
2034
};
2035
} else if (fetcher.isFetcherError(err)) {
2036
const isNetworkProcessCrash = fetcher.isNetworkProcessCrashedError(err);
2037
return {
2038
type: ChatFetchResponseType.NetworkError,
2039
reason: userMessage,
2040
reasonDetail: scrubbedErrorDetail,
2041
requestId: requestId,
2042
serverRequestId: gitHubRequestId,
2043
...(isNetworkProcessCrash ? { isNetworkProcessCrash: true } : {}),
2044
};
2045
} else {
2046
return {
2047
type: ChatFetchResponseType.Failed,
2048
reason: 'Error on conversation request. Check the log for more details.',
2049
reasonDetail: scrubbedErrorDetail,
2050
requestId: requestId,
2051
serverRequestId: gitHubRequestId,
2052
};
2053
}
2054
}
2055
2056
private async _handleWebSocketCAPIError(event: CAPIWebSocketErrorEvent, modelRequestId: RequestId): Promise<ChatRequestFailed> {
2057
const { code, message } = event.error;
2058
const capiError = { code, message };
2059
const codePrefix = code.split(':')[0];
2060
2061
this._logService.error(`WebSocket CAPI error: ${message} (${code})`);
2062
2063
if (codePrefix === 'rate_limited' || codePrefix === 'user_model_rate_limited' || codePrefix === 'user_global_rate_limited' || codePrefix === 'integration_rate_limited' || codePrefix === 'model_overloaded' || codePrefix === 'agent_mode_limit_exceeded') {
2064
return {
2065
type: FetchResponseKind.Failed,
2066
modelRequestId,
2067
failKind: ChatFailKind.RateLimited,
2068
reason: message,
2069
data: { capiError },
2070
};
2071
}
2072
if (codePrefix === 'quota_exceeded' || codePrefix === 'free_quota_exceeded' || codePrefix === 'overage_limit_reached' || codePrefix === 'billing_not_configured') {
2073
// Refresh the copilot token so isChatQuotaExceeded reflects the new state,
2074
// matching the HTTP 402 handler behavior.
2075
if (!this._authenticationService.copilotToken?.isChatQuotaExceeded) {
2076
this._authenticationService.resetCopilotToken(402);
2077
await this._authenticationService.getCopilotToken();
2078
}
2079
return {
2080
type: FetchResponseKind.Failed,
2081
modelRequestId,
2082
failKind: ChatFailKind.QuotaExceeded,
2083
reason: message,
2084
data: { capiError },
2085
};
2086
}
2087
if (code === 'content_filter') {
2088
return {
2089
type: FetchResponseKind.Failed,
2090
modelRequestId,
2091
failKind: ChatFailKind.ContentFilter,
2092
reason: message,
2093
};
2094
}
2095
if (code === 'not_found') {
2096
return {
2097
type: FetchResponseKind.Failed,
2098
modelRequestId,
2099
failKind: ChatFailKind.NotFound,
2100
reason: message,
2101
};
2102
}
2103
if (code === 'request_too_large') {
2104
return {
2105
type: FetchResponseKind.Failed,
2106
modelRequestId,
2107
failKind: ChatFailKind.Unknown,
2108
reason: `Request Failed: ${code} ${message}`,
2109
};
2110
}
2111
if (code === 'service_unavailable') {
2112
return {
2113
type: FetchResponseKind.Failed,
2114
modelRequestId,
2115
failKind: ChatFailKind.ServerError,
2116
reason: `Request Failed: ${code} ${message}`,
2117
};
2118
}
2119
if (code === 'bad_request') {
2120
return {
2121
type: FetchResponseKind.Failed,
2122
modelRequestId,
2123
failKind: ChatFailKind.Unknown,
2124
reason: `Request Failed: ${code} ${message}`,
2125
};
2126
}
2127
2128
// internal_error, session_expired, or any unknown code
2129
return {
2130
type: FetchResponseKind.Failed,
2131
modelRequestId,
2132
failKind: ChatFailKind.ServerError,
2133
reason: `Request Failed: ${code} ${message || 'WebSocket server error'}`,
2134
};
2135
}
2136
2137
private _handleWebSocketError(event: CAPIWebSocketErrorEvent, requestId: string, serverRequestId: string | undefined, isAuto: boolean): ChatFetchError {
2138
const { code, message } = event.error;
2139
const capiError = { code, message };
2140
const codePrefix = code.split(':')[0];
2141
2142
if (codePrefix === 'rate_limited' || codePrefix === 'user_model_rate_limited' || codePrefix === 'user_global_rate_limited' || codePrefix === 'integration_rate_limited' || codePrefix === 'model_overloaded' || codePrefix === 'agent_mode_limit_exceeded') {
2143
return { type: ChatFetchResponseType.RateLimited, reason: message, requestId, serverRequestId, retryAfter: undefined, rateLimitKey: '', isAuto, capiError };
2144
}
2145
if (codePrefix === 'quota_exceeded' || codePrefix === 'free_quota_exceeded' || codePrefix === 'overage_limit_reached' || codePrefix === 'billing_not_configured') {
2146
return { type: ChatFetchResponseType.QuotaExceeded, reason: message, requestId, serverRequestId, capiError, retryAfter: undefined };
2147
}
2148
if (code === 'content_filter') {
2149
return { type: ChatFetchResponseType.PromptFiltered, reason: message, category: FilterReason.Prompt, requestId, serverRequestId };
2150
}
2151
if (code === 'not_found') {
2152
return { type: ChatFetchResponseType.NotFound, reason: message, requestId, serverRequestId };
2153
}
2154
if (code === 'bad_request') {
2155
return { type: ChatFetchResponseType.BadRequest, reason: message, requestId, serverRequestId };
2156
}
2157
2158
// internal_error, session_expired, service_unavailable, request_too_large, or any unknown code
2159
return { type: ChatFetchResponseType.Failed, reason: `Request Failed: ${code} ${message || 'WebSocket server error'}`, requestId, serverRequestId };
2160
}
2161
2162
private scrubErrorDetail(errorDetail: string, usernameToScrub: string | undefined) {
2163
if (usernameToScrub) {
2164
const regex = new RegExp(escapeRegExpCharacters(usernameToScrub), 'ig');
2165
errorDetail = errorDetail.replaceAll(regex, '<login>');
2166
}
2167
return errorDetail.replaceAll(/(?<=logged in as )(?!<login>)[^\s]+/ig, '!<login>!'); // marking fallback with !
2168
}
2169
}
2170
2171
/**
2172
* Validates a chat request payload to ensure it is valid
2173
* @param params The params being sent in the chat request
2174
* @returns Whether the chat payload is valid
2175
*/
2176
function isValidChatPayload(messages: Raw.ChatMessage[], postOptions: OptionalChatRequestParams, endpoint: IChatEndpoint, configurationService: IConfigurationService, experimentationService: IExperimentationService): { isValid: boolean; reason: string } {
2177
if (messages.length === 0) {
2178
return { isValid: false, reason: asUnexpected('No messages provided') };
2179
}
2180
if (postOptions?.max_tokens && postOptions?.max_tokens < 1) {
2181
return { isValid: false, reason: asUnexpected('Invalid response token parameter') };
2182
}
2183
2184
const functionNamePattern = /^[a-zA-Z0-9_-]+$/;
2185
if (
2186
postOptions?.functions?.some(f => !f.name.match(functionNamePattern)) ||
2187
postOptions?.function_call?.name && !postOptions.function_call.name.match(functionNamePattern)
2188
) {
2189
return { isValid: false, reason: asUnexpected('Function names must match ^[a-zA-Z0-9_-]+$') };
2190
}
2191
2192
if (postOptions?.tools && postOptions.tools.length > HARD_TOOL_LIMIT && !endpoint.supportsToolSearch) {
2193
return { isValid: false, reason: `Tool limit exceeded (${postOptions.tools.length}/${HARD_TOOL_LIMIT}). Click "Configure Tools" in the chat input to disable ${postOptions.tools.length - HARD_TOOL_LIMIT} tools and retry.` };
2194
}
2195
2196
return { isValid: true, reason: '' };
2197
}
2198
2199
function asUnexpected(reason: string) {
2200
return `Prompt failed validation with the reason: ${reason}. Please file an issue.`;
2201
}
2202
2203
export function createTelemetryData(chatEndpointInfo: IChatEndpoint, location: ChatLocation, headerRequestId: string) {
2204
return TelemetryData.createAndMarkAsIssued({
2205
endpoint: 'completions',
2206
engineName: 'chat',
2207
uiKind: ChatLocation.toString(location),
2208
headerRequestId
2209
});
2210
}
2211
2212
/**
2213
* WARNING: The value that is returned from this function drives the disablement of RAI for full-file rewrite requests
2214
* in Copilot Edits, Copilot Chat, Agent Mode, and Inline Chat.
2215
* If your chat location generates full-file rewrite requests and you are unsure if changing something here will cause problems, please talk to @roblourens
2216
*/
2217
2218
export function locationToIntent(location: ChatLocation): string {
2219
switch (location) {
2220
case ChatLocation.Panel:
2221
return 'conversation-panel';
2222
case ChatLocation.Editor:
2223
return 'conversation-inline';
2224
case ChatLocation.EditingSession:
2225
return 'conversation-edits';
2226
case ChatLocation.Notebook:
2227
return 'conversation-notebook';
2228
case ChatLocation.Terminal:
2229
return 'conversation-terminal';
2230
case ChatLocation.Other:
2231
return 'conversation-other';
2232
case ChatLocation.Agent:
2233
return 'conversation-agent';
2234
case ChatLocation.ResponsesProxy:
2235
return 'responses-proxy';
2236
case ChatLocation.MessagesProxy:
2237
return 'messages-proxy';
2238
}
2239
}
2240
2241