CoCalc -- chatEndpoint.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/chatEndpoint.ts
¹³⁴⁰¹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5
import { RequestMetadata, RequestType } from '@vscode/copilot-api';
6
import { OpenAI, Raw } from '@vscode/prompt-tsx';
7
import type { CancellationToken } from 'vscode';
8
import { ITokenizer, TokenizerType } from '../../../util/common/tokenizer';
9
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
10
import { deepClone, mixin } from '../../../util/vs/base/common/objects';
11
import { generateUuid } from '../../../util/vs/base/common/uuid';
12
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
13
import { IAuthenticationService } from '../../authentication/common/authentication';
14
import { IChatMLFetcher, Source } from '../../chat/common/chatMLFetcher';
15
import { ChatFetchResponseType, ChatLocation, ChatResponse } from '../../chat/common/commonTypes';
16
import { getTextPart } from '../../chat/common/globalStringUtils';
17
import { CHAT_MODEL, ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';
18
import { ILogService } from '../../log/common/logService';
19
import { isAnthropicContextEditingEnabled } from '../../networking/common/anthropic';
20
import { FinishedCallback, getRequestId, ICopilotToolCall, OptionalChatRequestParams } from '../../networking/common/fetch';
21
import { IFetcherService, Response } from '../../networking/common/fetcherService';
22
import { createCapiRequestBody, IChatEndpoint, IChatEndpointTokenPricing, ICreateEndpointBodyOptions, IEndpointBody, IMakeChatRequestOptions } from '../../networking/common/networking';
23
import { CAPIChatMessage, ChatCompletion, FinishedCompletionReason, RawMessageConversionCallback } from '../../networking/common/openai';
24
import { prepareChatCompletionForReturn } from '../../networking/node/chatStream';
25
import { IChatWebSocketManager } from '../../networking/node/chatWebSocketManager';
26
import { SSEProcessor } from '../../networking/node/stream';
27
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
28
import { ITelemetryService, TelemetryProperties } from '../../telemetry/common/telemetry';
29
import { TelemetryData } from '../../telemetry/common/telemetryData';
30
import { ITokenizerProvider } from '../../tokenizer/node/tokenizer';
31
import { ICAPIClientService } from '../common/capiClient';
32
import { isAnthropicFamily, isGeminiFamily, modelSupportsContextEditing, modelSupportsToolSearch } from '../common/chatModelCapabilities';
33
import { IDomainService } from '../common/domainService';
34
import { CustomModel, IChatModelInformation, IModelTokenPrices, ModelSupportedEndpoint } from '../common/endpointProvider';
35
import { createMessagesRequestBody, processResponseFromMessagesEndpoint } from './messagesApi';
36
import { createResponsesRequestBody, getResponsesApiCompactionThreshold, processResponseFromChatEndpoint } from './responsesApi';
37
import { filterHistoryImages } from './imageLimits';
38

39
/**
40
 * The default processor for the stream format from CAPI
41
 */
42
export async function defaultChatResponseProcessor(
43
	telemetryService: ITelemetryService,
44
	logService: ILogService,
45
	response: Response,
46
	expectedNumChoices: number,
47
	finishCallback: FinishedCallback,
48
	telemetryData: TelemetryData,
49
	cancellationToken?: CancellationToken | undefined
50
) {
51
	const processor = await SSEProcessor.create(logService, telemetryService, expectedNumChoices, response, cancellationToken);
52
	const finishedCompletions = processor.processSSE(finishCallback);
53
	const chatCompletions = AsyncIterableObject.map(finishedCompletions, (solution) => {
54
		const loggedReason = solution.reason ?? 'client-trimmed';
55
		const dataToSendToTelemetry = telemetryData.extendedBy({
56
			completionChoiceFinishReason: loggedReason,
57
			headerRequestId: solution.requestId.headerRequestId
58
		});
59
		telemetryService.sendGHTelemetryEvent('completion.finishReason', dataToSendToTelemetry.properties, dataToSendToTelemetry.measurements);
60
		return prepareChatCompletionForReturn(telemetryService, logService, solution, telemetryData);
61
	});
62
	return chatCompletions;
63
}
64

65
export async function defaultNonStreamChatResponseProcessor(response: Response, finishCallback: FinishedCallback, telemetryData: TelemetryData) {
66
	const textResponse = await response.text();
67
	const jsonResponse = JSON.parse(textResponse);
68
	const completions: ChatCompletion[] = [];
69
	for (let i = 0; i < (jsonResponse?.choices?.length || 0); i++) {
70
		const choice = jsonResponse.choices[i];
71
		const message: Raw.AssistantChatMessage = {
72
			role: choice.message.role,
73
			content: choice.message.content,
74
			name: choice.message.name,
75
			// Normalize property name: OpenAI API uses snake_case (tool_calls) but our types expect camelCase (toolCalls)
76
			// See: https://platform.openai.com/docs/api-reference/chat/object#chat-object-choices-message-tool_calls
77
			toolCalls: choice.message.toolCalls ?? choice.message.tool_calls,
78
		};
79
		const messageText = getTextPart(message.content);
80
		const requestId = response.headers.get('X-Request-ID') ?? generateUuid();
81
		const ghRequestId = response.headers.get('x-github-request-id') ?? '';
82
		const { serverExperiments } = getRequestId(response.headers);
83

84

85
		const completion: ChatCompletion = {
86
			blockFinished: false,
87
			choiceIndex: i,
88
			model: jsonResponse.model,
89
			filterReason: undefined,
90
			finishReason: choice.finish_reason as FinishedCompletionReason,
91
			message: message,
92
			usage: jsonResponse.usage,
93
			tokens: [], // This is used for repetition detection so not super important to be accurate
94
			requestId: { headerRequestId: requestId, gitHubRequestId: ghRequestId, completionId: jsonResponse.id, created: jsonResponse.created, deploymentId: '', serverExperiments },
95
			telemetryData: telemetryData
96
		};
97
		const functionCall: ICopilotToolCall[] = [];
98
		for (const tool of message.toolCalls ?? []) {
99
			functionCall.push({
100
				name: tool.function?.name ?? '',
101
				arguments: tool.function?.arguments ?? '',
102
				id: tool.id ?? '',
103
			});
104
		}
105
		await finishCallback(messageText, i, {
106
			text: messageText,
107
			copilotToolCalls: functionCall,
108
		});
109
		completions.push(completion);
110
	}
111

112
	return AsyncIterableObject.fromArray(completions);
113
}
114

115
const AIC_DIVISOR = 1_000_000_000;
116
const TOKENS_PER_MILLION = 1_000_000;
117

118
/**
119
 * Converts raw billing token prices into normalized AICs per million tokens.
120
 *
121
 * Raw prices are divided by {@link AIC_DIVISOR} to get AICs, then scaled
122
 * so the result is always "per 1M tokens" regardless of the original batch_size.
123
 */
124
function normalizeTokenPricing(tokenPrices: IModelTokenPrices | undefined): IChatEndpointTokenPricing | undefined {
125
	if (!tokenPrices) {
126
		return undefined;
127
	}
128
	const { batch_size, input_price, output_price, cache_price } = tokenPrices;
129
	const scale = TOKENS_PER_MILLION / batch_size;
130
	return {
131
		inputPrice: (input_price / AIC_DIVISOR) * scale,
132
		outputPrice: (output_price / AIC_DIVISOR) * scale,
133
		cacheReadTokenPrice: (cache_price / AIC_DIVISOR) * scale,
134
	};
135
}
136

137
export class ChatEndpoint implements IChatEndpoint {
138
	private readonly _maxTokens: number;
139
	private readonly _maxOutputTokens: number;
140
	public readonly model: string;
141
	public readonly name: string;
142
	public readonly version: string;
143
	public readonly modelProvider: string;
144
	public readonly family: string;
145
	public readonly tokenizer: TokenizerType;
146
	public readonly showInModelPicker: boolean;
147
	public readonly isFallback: boolean;
148
	public readonly supportsToolCalls: boolean;
149
	public readonly supportsVision: boolean;
150
	public readonly supportsPrediction: boolean;
151
	public readonly supportsAdaptiveThinking?: boolean;
152
	public readonly minThinkingBudget?: number;
153
	public readonly maxThinkingBudget?: number;
154
	public readonly supportsReasoningEffort?: string[];
155
	public readonly supportsToolSearch?: boolean;
156
	public readonly supportsContextEditing?: boolean;
157
	public readonly isPremium?: boolean | undefined;
158
	public readonly multiplier?: number | undefined;
159
	public readonly restrictedToSkus?: string[] | undefined;
160
	public readonly tokenPricing?: IChatEndpointTokenPricing | undefined;
161
	public readonly customModel?: CustomModel | undefined;
162
	public readonly maxPromptImages?: number | undefined;
163

164
	private readonly _supportsStreaming: boolean;
165

166
	constructor(
167
		public readonly modelMetadata: IChatModelInformation,
168
		@IDomainService protected readonly _domainService: IDomainService,
169
		@IChatMLFetcher private readonly _chatMLFetcher: IChatMLFetcher,
170
		@ITokenizerProvider private readonly _tokenizerProvider: ITokenizerProvider,
171
		@IInstantiationService protected readonly _instantiationService: IInstantiationService,
172
		@IConfigurationService protected readonly _configurationService: IConfigurationService,
173
		@IExperimentationService private readonly _expService: IExperimentationService,
174
		@IChatWebSocketManager private readonly _chatWebSocketService: IChatWebSocketManager,
175
		@ILogService _logService: ILogService,
176
	) {
177
		// This metadata should always be present, but if not we will default to 8192 tokens
178
		this._maxTokens = modelMetadata.capabilities.limits?.max_prompt_tokens ?? 8192;
179
		// This metadata should always be present, but if not we will default to 4096 tokens
180
		this._maxOutputTokens = modelMetadata.capabilities.limits?.max_output_tokens ?? 4096;
181
		this.model = modelMetadata.id;
182
		this.modelProvider = modelMetadata.vendor;
183
		this.name = modelMetadata.name;
184
		this.version = modelMetadata.version;
185
		this.family = modelMetadata.capabilities.family;
186
		this.tokenizer = modelMetadata.capabilities.tokenizer;
187
		this.showInModelPicker = modelMetadata.model_picker_enabled;
188
		this.isPremium = modelMetadata.billing?.is_premium;
189
		this.multiplier = modelMetadata.billing?.multiplier;
190
		this.restrictedToSkus = modelMetadata.billing?.restricted_to;
191
		this.tokenPricing = normalizeTokenPricing(modelMetadata.billing?.token_prices);
192
		this.isFallback = modelMetadata.is_chat_fallback;
193
		this.supportsToolCalls = !!modelMetadata.capabilities.supports.tool_calls;
194
		this.supportsVision = !!modelMetadata.capabilities.supports.vision;
195
		this.supportsPrediction = !!modelMetadata.capabilities.supports.prediction;
196
		this.supportsAdaptiveThinking = modelMetadata.capabilities.supports.adaptive_thinking;
197
		this.minThinkingBudget = modelMetadata.capabilities.supports.min_thinking_budget;
198
		this.maxThinkingBudget = modelMetadata.capabilities.supports.max_thinking_budget;
199
		this.supportsReasoningEffort = modelMetadata.capabilities.supports.reasoning_effort;
200
		this.supportsToolSearch = modelMetadata.capabilities.supports.tool_search ?? modelSupportsToolSearch(this.model, this._configurationService, this._expService);
201
		this.supportsContextEditing = modelMetadata.capabilities.supports.context_editing ?? modelSupportsContextEditing(this.model);
202
		this._supportsStreaming = !!modelMetadata.capabilities.supports.streaming;
203
		this.customModel = modelMetadata.custom_model;
204
		this.maxPromptImages = modelMetadata.capabilities.limits?.vision?.max_prompt_images;
205
	}
206

207
	// TODO: Thread enableThinking through the fetch pipeline (INetworkRequestOptions / chatMLFetcher positional params)
208
	// so getExtraHeaders can gate the interleaved-thinking header on whether thinking is actually enabled for the
209
	// request, rather than using the location check. Once plumbed, replace isAllowedConversationAgentModel with
210
	// an enableThinking check for the thinking header (keep location gate for context management / tool search).
211
	public getExtraHeaders(_location?: ChatLocation): Record<string, string> {
212
		const headers: Record<string, string> = { ...this.modelMetadata.requestHeaders };
213

214
		if (this.useMessagesApi) {
215

216
			const modelProviderPreference = this._configurationService.getConfig(ConfigKey.TeamInternal.ModelProviderPreference);
217
			if (modelProviderPreference) {
218
				headers['X-Model-Provider-Preference'] = modelProviderPreference;
219
			}
220

221
			const betas: string[] = [];
222

223
			if (!this.supportsAdaptiveThinking) {
224
				betas.push('interleaved-thinking-2025-05-14');
225
			}
226
			if (this.supportsToolSearch) {
227
				betas.push('advanced-tool-use-2025-11-20');
228
			}
229
			if (isAnthropicContextEditingEnabled(this, this._configurationService, this._expService)) {
230
				betas.push('context-management-2025-06-27');
231
			}
232
			if (betas.length > 0) {
233
				headers['anthropic-beta'] = betas.join(',');
234
			}
235
		}
236

237
		return headers;
238
	}
239

240
	public get modelMaxPromptTokens(): number {
241
		return this._maxTokens;
242
	}
243

244
	public get maxOutputTokens(): number {
245
		return this._maxOutputTokens;
246
	}
247

248
	public get urlOrRequestMetadata(): string | RequestMetadata {
249
		// Use override or respect setting.
250
		// TODO unlikely but would break if it changes in the middle of a request being constructed
251
		return this.modelMetadata.urlOrRequestMetadata ??
252
			(this.useResponsesApi ? { type: RequestType.ChatResponses } :
253
				this.useMessagesApi ? { type: RequestType.ChatMessages } : { type: RequestType.ChatCompletions });
254
	}
255

256
	protected get useResponsesApi(): boolean {
257
		if (this.modelMetadata.supported_endpoints
258
			&& !this.modelMetadata.supported_endpoints.includes(ModelSupportedEndpoint.ChatCompletions)
259
			&& this.modelMetadata.supported_endpoints.includes(ModelSupportedEndpoint.Responses)
260
		) {
261
			return true;
262
		}
263

264
		return !!this.modelMetadata.supported_endpoints?.includes(ModelSupportedEndpoint.Responses);
265
	}
266

267
	protected get useWebSocketResponsesApi(): boolean {
268
		return !!this.modelMetadata.supported_endpoints?.includes(ModelSupportedEndpoint.WebSocketResponses);
269
	}
270

271
	protected get useMessagesApi(): boolean {
272
		const enableMessagesApi = this._configurationService.getExperimentBasedConfig(ConfigKey.UseAnthropicMessagesApi, this._expService);
273
		return !!(enableMessagesApi && this.modelMetadata.supported_endpoints?.includes(ModelSupportedEndpoint.Messages));
274
	}
275

276
	public get degradationReason(): string | undefined {
277
		return this.modelMetadata.warning_messages?.at(0)?.message ?? this.modelMetadata.info_messages?.at(0)?.message;
278
	}
279

280
	public get apiType(): string {
281
		return this.useResponsesApi ? 'responses' :
282
			this.useMessagesApi ? 'messages' : 'chatCompletions';
283
	}
284

285
	interceptBody(body: IEndpointBody | undefined): void {
286
		// Remove tool calls from requests that don't support them
287
		// We really shouldn't make requests to models that don't support tool calls with tools though
288
		if (body && !this.supportsToolCalls) {
289
			delete body['tools'];
290
		}
291

292
		// If the model doesn't support streaming, don't ask for a streamed request
293
		if (body && !this._supportsStreaming) {
294
			body.stream = false;
295
		}
296

297
		// If it's o1 we must modify the body significantly as the request is very different
298
		if (body?.messages && (this.family.startsWith('o1') || this.model === CHAT_MODEL.O1 || this.model === CHAT_MODEL.O1MINI)) {
299
			const newMessages: CAPIChatMessage[] = body.messages.map((message: CAPIChatMessage): CAPIChatMessage => {
300
				if (message.role === OpenAI.ChatRole.System) {
301
					return {
302
						role: OpenAI.ChatRole.User,
303
						content: message.content,
304
					};
305
				} else {
306
					return message;
307
				}
308
			});
309
			// Add the messages & model back
310
			body['messages'] = newMessages;
311
		}
312
	}
313

314
	createRequestBody(options: ICreateEndpointBodyOptions): IEndpointBody {
315
		// Determine per-model image limit for APIs with known restrictions
316
		const imageLimit = this.getImageLimit();
317
		if (imageLimit !== undefined) {
318
			options = { ...options, messages: this.validateAndFilterImages(options.messages, imageLimit) };
319
		}
320

321
		if (this.useResponsesApi) {
322
			const body = this._instantiationService.invokeFunction(createResponsesRequestBody, options, this.model, this);
323
			return this.customizeResponsesBody(body);
324
		} else if (this.useMessagesApi) {
325
			const body = this._instantiationService.invokeFunction(createMessagesRequestBody, options, this.model, this);
326
			return this.customizeMessagesBody(body);
327
		} else {
328
			const body = createCapiRequestBody(options, this.model, this.getCompletionsCallback());
329
			return this.customizeCapiBody(body, options);
330
		}
331
	}
332

333
	/**
334
	 * Returns the model-specific image limit, or `undefined` if no limit applies.
335
	 * Anthropic Messages API allows up to 20 images per request; Gemini allows up to 10.
336
	 * These are hardcoded based on API documentation rather than model metadata to
337
	 * avoid being clamped by unreliable server-provided values.
338
	 */
339
	private getImageLimit(): number | undefined {
340
		if (this.useMessagesApi && isAnthropicFamily(this)) {
341
			return 20;
342
		}
343
		if (isGeminiFamily(this)) {
344
			return 10;
345
		}
346
		return undefined;
347
	}
348

349
	/**
350
	 * Thin wrapper around {@link filterHistoryImages} retained for test ergonomics.
351
	 */
352
	private validateAndFilterImages(messages: Raw.ChatMessage[], maxImages: number): Raw.ChatMessage[] {
353
		return filterHistoryImages(messages, maxImages);
354
	}
355

356
	protected getCompletionsCallback(): RawMessageConversionCallback | undefined {
357
		return undefined;
358
	}
359

360
	protected customizeMessagesBody(body: IEndpointBody): IEndpointBody {
361
		return body;
362
	}
363

364
	protected customizeResponsesBody(body: IEndpointBody): IEndpointBody {
365
		return body;
366
	}
367

368
	protected customizeCapiBody(body: IEndpointBody, options: ICreateEndpointBodyOptions): IEndpointBody {
369

370
		// Apply Gemini function calling mode if configured
371
		const hasTools = !!options.requestOptions?.tools?.length;
372
		if (hasTools && this.family.toLowerCase().includes('gemini-3')) {
373
			const geminiFunctionCallingMode = this._configurationService.getExperimentBasedConfig(
374
				ConfigKey.TeamInternal.GeminiFunctionCallingMode,
375
				this._expService
376
			);
377
			// Only override tool_choice if experiment provides a value and user hasn't specified a function call
378
			if (geminiFunctionCallingMode && typeof body.tool_choice !== 'object') {
379
				body.tool_choice = geminiFunctionCallingMode;
380
			}
381
		}
382

383
		return body;
384
	}
385

386
	public async processResponseFromChatEndpoint(
387
		telemetryService: ITelemetryService,
388
		logService: ILogService,
389
		response: Response,
390
		expectedNumChoices: number,
391
		finishCallback: FinishedCallback,
392
		telemetryData: TelemetryData,
393
		cancellationToken?: CancellationToken | undefined
394
	): Promise<AsyncIterableObject<ChatCompletion>> {
395
		if (this.useResponsesApi) {
396
			const compactionThreshold = getResponsesApiCompactionThreshold(this._configurationService, this._expService, this);
397
			return processResponseFromChatEndpoint(this._instantiationService, telemetryService, logService, response, expectedNumChoices, finishCallback, telemetryData, compactionThreshold);
398
		} else if (this.useMessagesApi) {
399
			return processResponseFromMessagesEndpoint(this._instantiationService, telemetryService, logService, response, finishCallback, telemetryData);
400
		} else if (!this._supportsStreaming) {
401
			return defaultNonStreamChatResponseProcessor(response, finishCallback, telemetryData);
402
		} else {
403
			return defaultChatResponseProcessor(telemetryService, logService, response, expectedNumChoices, finishCallback, telemetryData, cancellationToken);
404
		}
405
	}
406

407
	public acquireTokenizer(): ITokenizer {
408
		return this._tokenizerProvider.acquireTokenizer(this);
409
	}
410

411
	public async makeChatRequest2(options: IMakeChatRequestOptions, token: CancellationToken): Promise<ChatResponse> {
412
		const useWebSocket = options.useWebSocket ?? !!(
413
			options.turnId
414
			&& options.conversationId
415
			&& this.useWebSocketResponsesApi
416
			&& this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.ResponsesApiWebSocketEnabled, this._expService)
417
		);
418
		const ignoreStatefulMarker = options.ignoreStatefulMarker ?? !(
419
			useWebSocket
420
			&& options.conversationId
421
			&& options.turnId
422
			&& this._chatWebSocketService.hasActiveConnection(options.conversationId)
423
		);
424
		const response = await this._makeChatRequest2({
425
			...options,
426
			useWebSocket,
427
			ignoreStatefulMarker,
428
		}, token);
429
		if (response.type === ChatFetchResponseType.InvalidStatefulMarker) {
430
			return this._makeChatRequest2({
431
				...options,
432
				useWebSocket,
433
				ignoreStatefulMarker: true
434
			}, token);
435
		}
436
		return response;
437
	}
438

439
	protected async _makeChatRequest2(options: IMakeChatRequestOptions, token: CancellationToken) {
440
		return this._chatMLFetcher.fetchOne({
441
			requestOptions: {},
442
			...options,
443
			endpoint: this,
444
		}, token);
445
	}
446

447
	public async makeChatRequest(
448
		debugName: string,
449
		messages: Raw.ChatMessage[],
450
		finishedCb: FinishedCallback | undefined,
451
		token: CancellationToken,
452
		location: ChatLocation,
453
		source?: Source,
454
		requestOptions?: Omit<OptionalChatRequestParams, 'n'>,
455
		userInitiatedRequest?: boolean,
456
		telemetryProperties?: TelemetryProperties,
457
	): Promise<ChatResponse> {
458
		return this.makeChatRequest2({
459
			debugName,
460
			messages,
461
			finishedCb,
462
			location,
463
			source,
464
			requestOptions,
465
			userInitiatedRequest,
466
			telemetryProperties,
467
		}, token);
468
	}
469

470
	public cloneWithTokenOverride(modelMaxPromptTokens: number): IChatEndpoint {
471
		return this._instantiationService.createInstance(
472
			ChatEndpoint,
473
			mixin(deepClone(this.modelMetadata), { capabilities: { limits: { max_prompt_tokens: modelMaxPromptTokens } } }));
474
	}
475
}
476

477
export class RemoteAgentChatEndpoint extends ChatEndpoint {
478
	constructor(
479
		modelMetadata: IChatModelInformation,
480
		private readonly _requestMetadata: RequestMetadata,
481
		@IDomainService domainService: IDomainService,
482
		@ICAPIClientService capiClientService: ICAPIClientService,
483
		@IFetcherService fetcherService: IFetcherService,
484
		@ITelemetryService telemetryService: ITelemetryService,
485
		@IAuthenticationService authService: IAuthenticationService,
486
		@IChatMLFetcher chatMLFetcher: IChatMLFetcher,
487
		@ITokenizerProvider tokenizerProvider: ITokenizerProvider,
488
		@IInstantiationService instantiationService: IInstantiationService,
489
		@IConfigurationService configService: IConfigurationService,
490
		@IExperimentationService experimentService: IExperimentationService,
491
		@IChatWebSocketManager chatWebSocketService: IChatWebSocketManager,
492
		@ILogService logService: ILogService
493
	) {
494
		super(
495
			modelMetadata,
496
			domainService,
497
			chatMLFetcher,
498
			tokenizerProvider,
499
			instantiationService,
500
			configService,
501
			experimentService,
502
			chatWebSocketService,
503
			logService
504
		);
505
	}
506

507
	override processResponseFromChatEndpoint(
508
		telemetryService: ITelemetryService,
509
		logService: ILogService,
510
		response: Response,
511
		expectedNumChoices: number,
512
		finishCallback: FinishedCallback,
513
		telemetryData: TelemetryData,
514
		cancellationToken?: CancellationToken | undefined,
515
		_location?: ChatLocation,
516
	): Promise<AsyncIterableObject<ChatCompletion>> {
517
		// We must override this to a num choices > 1 because remote agents can do internal function calls which emit multiple completions even when N > 1
518
		// It's awful that they do this, but we have to support it
519
		return defaultChatResponseProcessor(telemetryService, logService, response, 2, finishCallback, telemetryData, cancellationToken);
520
	}
521

522
	public override get urlOrRequestMetadata() {
523
		return this._requestMetadata;
524
	}
525
}
526

527
Product

Resources

Company