CoCalc -- extChatEndpoint.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/vscode-node/extChatEndpoint.ts
¹³⁴⁰¹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { Raw } from '@vscode/prompt-tsx';
7
import type { CancellationToken } from 'vscode';
8
import * as vscode from 'vscode';
9
import { FetchStreamRecorder } from '../../../platform/chat/common/chatMLFetcher';
10
import { toErrorMessage } from '../../../util/common/errorMessage';
11
import { ITokenizer, TokenizerType } from '../../../util/common/tokenizer';
12
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
13
import { generateUuid } from '../../../util/vs/base/common/uuid';
14
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
15
import { ChatFetchResponseType, ChatLocation, ChatResponse } from '../../chat/common/commonTypes';
16
import { ILogService } from '../../log/common/logService';
17
import { ContextManagementResponse } from '../../networking/common/anthropic';
18
import { FinishedCallback, OpenAiFunctionTool, OptionalChatRequestParams } from '../../networking/common/fetch';
19
import { Response } from '../../networking/common/fetcherService';
20
import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IMakeChatRequestOptions } from '../../networking/common/networking';
21
import { ChatCompletion } from '../../networking/common/openai';
22
import { IOTelService } from '../../otel/common/otelService';
23
import { retrieveCapturingTokenByCorrelation, storeCapturingTokenForCorrelation } from '../../requestLogger/node/requestLogger';
24
import { ITelemetryService } from '../../telemetry/common/telemetry';
25
import { TelemetryData } from '../../telemetry/common/telemetryData';
26
import { EndpointEditToolName, isEndpointEditToolName } from '../common/endpointProvider';
27
import { CustomDataPartMimeTypes } from '../common/endpointTypes';
28
import { decodeStatefulMarker, encodeStatefulMarker, rawPartAsStatefulMarker } from '../common/statefulMarkerContainer';
29
import { rawPartAsThinkingData } from '../common/thinkingDataContainer';
30
import { ExtensionContributedChatTokenizer } from './extChatTokenizer';
31

32
enum ChatImageMimeType {
33
	PNG = 'image/png',
34
	JPEG = 'image/jpeg',
35
	GIF = 'image/gif',
36
	WEBP = 'image/webp',
37
	BMP = 'image/bmp',
38
}
39

40
export class ExtensionContributedChatEndpoint implements IChatEndpoint {
41
	private readonly _maxTokens: number;
42
	public readonly isDefault: boolean = false;
43
	public readonly isFallback: boolean = false;
44
	public readonly isPremium: boolean = false;
45
	public readonly multiplier: number = 0;
46
	public readonly isExtensionContributed = true;
47
	public readonly supportedEditTools?: readonly EndpointEditToolName[] | undefined;
48

49
	constructor(
50
		private readonly languageModel: vscode.LanguageModelChat,
51
		@IInstantiationService private readonly _instantiationService: IInstantiationService,
52
		@IOTelService private readonly _otelService: IOTelService,
53
	) {
54
		// Initialize with the model's max tokens
55
		this._maxTokens = languageModel.maxInputTokens;
56
		this.supportedEditTools = languageModel.capabilities.editToolsHint?.filter(isEndpointEditToolName);
57
	}
58

59
	get modelProvider(): string {
60
		return this.languageModel.vendor;
61
	}
62

63
	get modelMaxPromptTokens(): number {
64
		return this._maxTokens;
65
	}
66

67
	get maxOutputTokens(): number {
68
		// The VS Code API doesn't expose max output tokens, use a reasonable default
69
		return 8192;
70
	}
71

72
	get urlOrRequestMetadata(): string {
73
		// Not used for extension contributed endpoints
74
		return '';
75
	}
76

77
	get model(): string {
78
		return this.languageModel.id;
79
	}
80

81
	get name(): string {
82
		return this.languageModel.name;
83
	}
84

85
	get version(): string {
86
		return this.languageModel.version;
87
	}
88

89
	get family(): string {
90
		return this.languageModel.family;
91
	}
92

93
	get tokenizer(): TokenizerType {
94
		// Most language models use the O200K tokenizer, if they don't they should specify in their metadata
95
		return TokenizerType.O200K;
96
	}
97

98
	get showInModelPicker(): boolean {
99
		// TODO @lramos15 - Need some API exposed for this, registration seems to have it
100
		return true;
101
	}
102

103
	get supportsToolCalls(): boolean {
104
		return this.languageModel.capabilities?.supportsToolCalling ?? false;
105
	}
106

107
	get supportsVision(): boolean {
108
		return this.languageModel?.capabilities?.supportsImageToText ?? false;
109
	}
110

111
	get supportsPrediction(): boolean {
112
		return false;
113
	}
114

115
	get policy(): 'enabled' | { terms: string } {
116
		return 'enabled';
117
	}
118

119
	async processResponseFromChatEndpoint(
120
		telemetryService: ITelemetryService,
121
		logService: ILogService,
122
		response: Response,
123
		expectedNumChoices: number,
124
		finishCallback: FinishedCallback,
125
		telemetryData: TelemetryData,
126
		cancellationToken?: CancellationToken
127
	): Promise<AsyncIterableObject<ChatCompletion>> {
128
		throw new Error('processResponseFromChatEndpoint not supported for extension contributed endpoints');
129
	}
130

131
	async acceptChatPolicy(): Promise<boolean> {
132
		return true;
133
	}
134

135
	public acquireTokenizer(): ITokenizer {
136
		// Use the extension-contributed tokenizer that leverages the VS Code language model API
137
		return new ExtensionContributedChatTokenizer(this.languageModel);
138
	}
139

140
	async makeChatRequest(
141
		debugName: string,
142
		messages: Raw.ChatMessage[],
143
		finishedCb: FinishedCallback | undefined,
144
		token: CancellationToken,
145
		location: ChatLocation,
146
		source?: { extensionId?: string | undefined },
147
		requestOptions?: Omit<OptionalChatRequestParams, 'n'>,
148
		userInitiatedRequest?: boolean,
149
		telemetryProperties?: Record<string, string>,
150
	): Promise<ChatResponse> {
151
		return this.makeChatRequest2({
152
			debugName,
153
			messages,
154
			finishedCb,
155
			location,
156
			source,
157
			requestOptions,
158
			userInitiatedRequest,
159
			telemetryProperties,
160
		}, token);
161
	}
162

163
	async makeChatRequest2({
164
		debugName,
165
		messages,
166
		requestOptions,
167
		finishedCb,
168
		location,
169
		source,
170
	}: IMakeChatRequestOptions, token: CancellationToken): Promise<ChatResponse> {
171
		const vscodeMessages = convertToApiChatMessage(messages);
172
		const ourRequestId = generateUuid();
173

174
		// Capture active OTel trace context to propagate through IPC to the BYOK provider.
175
		// Each provider creates its own chat span with full usage data:
176
		// - OpenAI-compatible (Azure, OpenAI, etc.): via CopilotLanguageModelWrapper → chatMLFetcher
177
		// - Anthropic: inside AnthropicLMProvider
178
		// - Gemini: inside GeminiNativeBYOKLMProvider
179
		const activeTraceCtx = this._otelService.getActiveTraceContext();
180

181
		const vscodeOptions: vscode.LanguageModelChatRequestOptions = {
182
			tools: ((requestOptions?.tools ?? []) as OpenAiFunctionTool[]).map(tool => ({
183
				name: tool.function.name,
184
				description: tool.function.description,
185
				inputSchema: tool.function.parameters,
186
			})),
187
			// Pass correlation ID and OTel trace context through modelOptions for cross-IPC restoration.
188
			modelOptions: {
189
				_capturingTokenCorrelationId: ourRequestId,
190
				_otelTraceContext: activeTraceCtx ?? null,
191
			}
192
		};
193

194
		// Store current CapturingToken for retrieval by BYOK providers after IPC crossing
195
		//
196
		// Note: We intentionally don't create an OTel chat span here for extension-contributed models.
197
		// The BYOK provider (CopilotLanguageModelWrapper) creates the real chat span via chatMLFetcher
198
		// with full token usage, response model, and cache data. Creating a span here would duplicate it.
199
		storeCapturingTokenForCorrelation(ourRequestId);
200

201
		const streamRecorder = new FetchStreamRecorder(finishedCb);
202

203
		try {
204
			const response = await this.languageModel.sendRequest(vscodeMessages, vscodeOptions, token);
205
			let text = '';
206
			let numToolsCalled = 0;
207
			const requestId = ourRequestId;
208

209
			// consume stream
210
			for await (const chunk of response.stream) {
211
				if (chunk instanceof vscode.LanguageModelTextPart) {
212
					text += chunk.value;
213
					if (streamRecorder.callback) {
214
						await streamRecorder.callback(text, 0, { text: chunk.value });
215
					}
216
				} else if (chunk instanceof vscode.LanguageModelToolCallPart) {
217
					if (streamRecorder.callback) {
218
						const functionCalls = [chunk].map(tool => ({
219
							name: tool.name ?? '',
220
							arguments: JSON.stringify(tool.input) ?? '',
221
							id: tool.callId
222
						}));
223
						numToolsCalled++;
224
						await streamRecorder.callback(text, 0, { text: '', copilotToolCalls: functionCalls });
225
					}
226
				} else if (chunk instanceof vscode.LanguageModelDataPart) {
227
					if (chunk.mimeType === CustomDataPartMimeTypes.StatefulMarker) {
228
						const decoded = decodeStatefulMarker(chunk.data);
229
						await streamRecorder.callback?.(text, 0, { text: '', statefulMarker: decoded.marker });
230
					} else if (chunk.mimeType === CustomDataPartMimeTypes.ContextManagement) {
231
						const contextManagement = JSON.parse(new TextDecoder().decode(chunk.data)) as ContextManagementResponse;
232
						await streamRecorder.callback?.(text, 0, { text: '', contextManagement });
233
					}
234
				} else if (chunk instanceof vscode.LanguageModelThinkingPart) {
235
					if (streamRecorder.callback) {
236
						await streamRecorder.callback(text, 0, {
237
							text: '',
238
							thinking: {
239
								text: chunk.value,
240
								id: chunk.id || '',
241
								metadata: chunk.metadata
242
							}
243
						});
244
					}
245
				}
246
			}
247

248
			if (text || numToolsCalled > 0) {
249
				return {
250
					type: ChatFetchResponseType.Success,
251
					requestId,
252
					serverRequestId: requestId,
253
					usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0, prompt_tokens_details: { cached_tokens: 0 } },
254
					value: text,
255
					resolvedModel: this.languageModel.id
256
				};
257
			} else {
258
				return {
259
					type: ChatFetchResponseType.Unknown,
260
					reason: 'No response from language model',
261
					requestId: requestId,
262
					serverRequestId: undefined
263
				};
264
			}
265
		} catch (e) {
266
			return {
267
				type: ChatFetchResponseType.Failed,
268
				reason: toErrorMessage(e, true),
269
				requestId: generateUuid(),
270
				serverRequestId: undefined
271
			};
272
		} finally {
273
			retrieveCapturingTokenByCorrelation(ourRequestId);
274
		}
275
	}
276

277
	createRequestBody(options: ICreateEndpointBodyOptions): IEndpointBody {
278
		throw new Error('unreachable'); // this endpoint does not call into fetchers
279
	}
280

281
	cloneWithTokenOverride(modelMaxPromptTokens: number): IChatEndpoint {
282
		return this._instantiationService.createInstance(ExtensionContributedChatEndpoint, {
283
			...this.languageModel,
284
			maxInputTokens: modelMaxPromptTokens
285
		});
286
	}
287
}
288

289
export function convertToApiChatMessage(messages: Raw.ChatMessage[]): Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2> {
290
	const apiMessages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2> = [];
291
	for (const message of messages) {
292
		const apiContent: Array<vscode.LanguageModelTextPart | vscode.LanguageModelToolResultPart2 | vscode.LanguageModelToolCallPart | vscode.LanguageModelDataPart | vscode.LanguageModelThinkingPart> = [];
293
		// Easier to work with arrays everywhere, rather than string in some cases. So convert to a single text content part
294
		for (const contentPart of message.content) {
295
			if (contentPart.type === Raw.ChatCompletionContentPartKind.Text) {
296
				apiContent.push(new vscode.LanguageModelTextPart(contentPart.text));
297
			} else if (contentPart.type === Raw.ChatCompletionContentPartKind.Image) {
298
				// Handle base64 encoded images
299
				if (contentPart.imageUrl.url.startsWith('data:')) {
300
					const dataUrlRegex = /^data:([^;]+);base64,(.*)$/;
301
					const match = contentPart.imageUrl.url.match(dataUrlRegex);
302

303
					if (match) {
304
						const [, mimeType, base64Data] = match;
305
						apiContent.push(new vscode.LanguageModelDataPart(Buffer.from(base64Data, 'base64'), mimeType as ChatImageMimeType));
306
					}
307
				} else {
308
					// Not a base64 image
309
					continue;
310
				}
311
			} else if (contentPart.type === Raw.ChatCompletionContentPartKind.CacheBreakpoint) {
312
				apiContent.push(new vscode.LanguageModelDataPart(new TextEncoder().encode('ephemeral'), CustomDataPartMimeTypes.CacheControl));
313
			} else if (contentPart.type === Raw.ChatCompletionContentPartKind.Opaque) {
314
				const statefulMarker = rawPartAsStatefulMarker(contentPart);
315
				if (statefulMarker) {
316
					apiContent.push(new vscode.LanguageModelDataPart(encodeStatefulMarker(statefulMarker.modelId, statefulMarker.marker), CustomDataPartMimeTypes.StatefulMarker));
317
				}
318
				const thinkingData = rawPartAsThinkingData(contentPart);
319
				if (thinkingData) {
320
					apiContent.push(new vscode.LanguageModelThinkingPart(thinkingData.text, thinkingData.id, thinkingData.metadata));
321
				}
322
			}
323
		}
324

325
		if (message.role === Raw.ChatRole.System || message.role === Raw.ChatRole.User) {
326
			apiMessages.push({
327
				role: message.role === Raw.ChatRole.System ? vscode.LanguageModelChatMessageRole.System : vscode.LanguageModelChatMessageRole.User,
328
				name: message.name,
329
				content: apiContent
330
			});
331
		} else if (message.role === Raw.ChatRole.Assistant) {
332
			if (message.toolCalls) {
333
				for (const toolCall of message.toolCalls) {
334
					apiContent.push(new vscode.LanguageModelToolCallPart(toolCall.id, toolCall.function.name, JSON.parse(toolCall.function.arguments)));
335
				}
336
			}
337
			apiMessages.push({
338
				role: vscode.LanguageModelChatMessageRole.Assistant,
339
				name: message.name,
340
				content: apiContent
341
			});
342
		} else if (message.role === Raw.ChatRole.Tool) {
343
			const toolResultPart: vscode.LanguageModelToolResultPart2 = new vscode.LanguageModelToolResultPart2(
344
				message.toolCallId ?? '',
345
				apiContent
346
			);
347
			apiMessages.push({
348
				role: vscode.LanguageModelChatMessageRole.User,
349
				name: '',
350
				content: [toolResultPart]
351
			});
352
		}
353
	}
354
	return apiMessages;
355
}
356

357
Product

Resources

Company