Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/vscode-node/extChatEndpoint.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { Raw } from '@vscode/prompt-tsx';
7
import type { CancellationToken } from 'vscode';
8
import * as vscode from 'vscode';
9
import { FetchStreamRecorder } from '../../../platform/chat/common/chatMLFetcher';
10
import { toErrorMessage } from '../../../util/common/errorMessage';
11
import { ITokenizer, TokenizerType } from '../../../util/common/tokenizer';
12
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
13
import { generateUuid } from '../../../util/vs/base/common/uuid';
14
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
15
import { ChatFetchResponseType, ChatLocation, ChatResponse } from '../../chat/common/commonTypes';
16
import { ILogService } from '../../log/common/logService';
17
import { ContextManagementResponse } from '../../networking/common/anthropic';
18
import { FinishedCallback, OpenAiFunctionTool, OptionalChatRequestParams } from '../../networking/common/fetch';
19
import { Response } from '../../networking/common/fetcherService';
20
import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IMakeChatRequestOptions } from '../../networking/common/networking';
21
import { ChatCompletion } from '../../networking/common/openai';
22
import { IOTelService } from '../../otel/common/otelService';
23
import { retrieveCapturingTokenByCorrelation, storeCapturingTokenForCorrelation } from '../../requestLogger/node/requestLogger';
24
import { ITelemetryService } from '../../telemetry/common/telemetry';
25
import { TelemetryData } from '../../telemetry/common/telemetryData';
26
import { EndpointEditToolName, isEndpointEditToolName } from '../common/endpointProvider';
27
import { CustomDataPartMimeTypes } from '../common/endpointTypes';
28
import { decodeStatefulMarker, encodeStatefulMarker, rawPartAsStatefulMarker } from '../common/statefulMarkerContainer';
29
import { rawPartAsThinkingData } from '../common/thinkingDataContainer';
30
import { ExtensionContributedChatTokenizer } from './extChatTokenizer';
31
32
enum ChatImageMimeType {
33
PNG = 'image/png',
34
JPEG = 'image/jpeg',
35
GIF = 'image/gif',
36
WEBP = 'image/webp',
37
BMP = 'image/bmp',
38
}
39
40
export class ExtensionContributedChatEndpoint implements IChatEndpoint {
41
private readonly _maxTokens: number;
42
public readonly isDefault: boolean = false;
43
public readonly isFallback: boolean = false;
44
public readonly isPremium: boolean = false;
45
public readonly multiplier: number = 0;
46
public readonly isExtensionContributed = true;
47
public readonly supportedEditTools?: readonly EndpointEditToolName[] | undefined;
48
49
constructor(
50
private readonly languageModel: vscode.LanguageModelChat,
51
@IInstantiationService private readonly _instantiationService: IInstantiationService,
52
@IOTelService private readonly _otelService: IOTelService,
53
) {
54
// Initialize with the model's max tokens
55
this._maxTokens = languageModel.maxInputTokens;
56
this.supportedEditTools = languageModel.capabilities.editToolsHint?.filter(isEndpointEditToolName);
57
}
58
59
get modelProvider(): string {
60
return this.languageModel.vendor;
61
}
62
63
get modelMaxPromptTokens(): number {
64
return this._maxTokens;
65
}
66
67
get maxOutputTokens(): number {
68
// The VS Code API doesn't expose max output tokens, use a reasonable default
69
return 8192;
70
}
71
72
get urlOrRequestMetadata(): string {
73
// Not used for extension contributed endpoints
74
return '';
75
}
76
77
get model(): string {
78
return this.languageModel.id;
79
}
80
81
get name(): string {
82
return this.languageModel.name;
83
}
84
85
get version(): string {
86
return this.languageModel.version;
87
}
88
89
get family(): string {
90
return this.languageModel.family;
91
}
92
93
get tokenizer(): TokenizerType {
94
// Most language models use the O200K tokenizer, if they don't they should specify in their metadata
95
return TokenizerType.O200K;
96
}
97
98
get showInModelPicker(): boolean {
99
// TODO @lramos15 - Need some API exposed for this, registration seems to have it
100
return true;
101
}
102
103
get supportsToolCalls(): boolean {
104
return this.languageModel.capabilities?.supportsToolCalling ?? false;
105
}
106
107
get supportsVision(): boolean {
108
return this.languageModel?.capabilities?.supportsImageToText ?? false;
109
}
110
111
get supportsPrediction(): boolean {
112
return false;
113
}
114
115
get policy(): 'enabled' | { terms: string } {
116
return 'enabled';
117
}
118
119
async processResponseFromChatEndpoint(
120
telemetryService: ITelemetryService,
121
logService: ILogService,
122
response: Response,
123
expectedNumChoices: number,
124
finishCallback: FinishedCallback,
125
telemetryData: TelemetryData,
126
cancellationToken?: CancellationToken
127
): Promise<AsyncIterableObject<ChatCompletion>> {
128
throw new Error('processResponseFromChatEndpoint not supported for extension contributed endpoints');
129
}
130
131
async acceptChatPolicy(): Promise<boolean> {
132
return true;
133
}
134
135
public acquireTokenizer(): ITokenizer {
136
// Use the extension-contributed tokenizer that leverages the VS Code language model API
137
return new ExtensionContributedChatTokenizer(this.languageModel);
138
}
139
140
async makeChatRequest(
141
debugName: string,
142
messages: Raw.ChatMessage[],
143
finishedCb: FinishedCallback | undefined,
144
token: CancellationToken,
145
location: ChatLocation,
146
source?: { extensionId?: string | undefined },
147
requestOptions?: Omit<OptionalChatRequestParams, 'n'>,
148
userInitiatedRequest?: boolean,
149
telemetryProperties?: Record<string, string>,
150
): Promise<ChatResponse> {
151
return this.makeChatRequest2({
152
debugName,
153
messages,
154
finishedCb,
155
location,
156
source,
157
requestOptions,
158
userInitiatedRequest,
159
telemetryProperties,
160
}, token);
161
}
162
163
async makeChatRequest2({
164
debugName,
165
messages,
166
requestOptions,
167
finishedCb,
168
location,
169
source,
170
}: IMakeChatRequestOptions, token: CancellationToken): Promise<ChatResponse> {
171
const vscodeMessages = convertToApiChatMessage(messages);
172
const ourRequestId = generateUuid();
173
174
// Capture active OTel trace context to propagate through IPC to the BYOK provider.
175
// Each provider creates its own chat span with full usage data:
176
// - OpenAI-compatible (Azure, OpenAI, etc.): via CopilotLanguageModelWrapper → chatMLFetcher
177
// - Anthropic: inside AnthropicLMProvider
178
// - Gemini: inside GeminiNativeBYOKLMProvider
179
const activeTraceCtx = this._otelService.getActiveTraceContext();
180
181
const vscodeOptions: vscode.LanguageModelChatRequestOptions = {
182
tools: ((requestOptions?.tools ?? []) as OpenAiFunctionTool[]).map(tool => ({
183
name: tool.function.name,
184
description: tool.function.description,
185
inputSchema: tool.function.parameters,
186
})),
187
// Pass correlation ID and OTel trace context through modelOptions for cross-IPC restoration.
188
modelOptions: {
189
_capturingTokenCorrelationId: ourRequestId,
190
_otelTraceContext: activeTraceCtx ?? null,
191
}
192
};
193
194
// Store current CapturingToken for retrieval by BYOK providers after IPC crossing
195
//
196
// Note: We intentionally don't create an OTel chat span here for extension-contributed models.
197
// The BYOK provider (CopilotLanguageModelWrapper) creates the real chat span via chatMLFetcher
198
// with full token usage, response model, and cache data. Creating a span here would duplicate it.
199
storeCapturingTokenForCorrelation(ourRequestId);
200
201
const streamRecorder = new FetchStreamRecorder(finishedCb);
202
203
try {
204
const response = await this.languageModel.sendRequest(vscodeMessages, vscodeOptions, token);
205
let text = '';
206
let numToolsCalled = 0;
207
const requestId = ourRequestId;
208
209
// consume stream
210
for await (const chunk of response.stream) {
211
if (chunk instanceof vscode.LanguageModelTextPart) {
212
text += chunk.value;
213
if (streamRecorder.callback) {
214
await streamRecorder.callback(text, 0, { text: chunk.value });
215
}
216
} else if (chunk instanceof vscode.LanguageModelToolCallPart) {
217
if (streamRecorder.callback) {
218
const functionCalls = [chunk].map(tool => ({
219
name: tool.name ?? '',
220
arguments: JSON.stringify(tool.input) ?? '',
221
id: tool.callId
222
}));
223
numToolsCalled++;
224
await streamRecorder.callback(text, 0, { text: '', copilotToolCalls: functionCalls });
225
}
226
} else if (chunk instanceof vscode.LanguageModelDataPart) {
227
if (chunk.mimeType === CustomDataPartMimeTypes.StatefulMarker) {
228
const decoded = decodeStatefulMarker(chunk.data);
229
await streamRecorder.callback?.(text, 0, { text: '', statefulMarker: decoded.marker });
230
} else if (chunk.mimeType === CustomDataPartMimeTypes.ContextManagement) {
231
const contextManagement = JSON.parse(new TextDecoder().decode(chunk.data)) as ContextManagementResponse;
232
await streamRecorder.callback?.(text, 0, { text: '', contextManagement });
233
}
234
} else if (chunk instanceof vscode.LanguageModelThinkingPart) {
235
if (streamRecorder.callback) {
236
await streamRecorder.callback(text, 0, {
237
text: '',
238
thinking: {
239
text: chunk.value,
240
id: chunk.id || '',
241
metadata: chunk.metadata
242
}
243
});
244
}
245
}
246
}
247
248
if (text || numToolsCalled > 0) {
249
return {
250
type: ChatFetchResponseType.Success,
251
requestId,
252
serverRequestId: requestId,
253
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0, prompt_tokens_details: { cached_tokens: 0 } },
254
value: text,
255
resolvedModel: this.languageModel.id
256
};
257
} else {
258
return {
259
type: ChatFetchResponseType.Unknown,
260
reason: 'No response from language model',
261
requestId: requestId,
262
serverRequestId: undefined
263
};
264
}
265
} catch (e) {
266
return {
267
type: ChatFetchResponseType.Failed,
268
reason: toErrorMessage(e, true),
269
requestId: generateUuid(),
270
serverRequestId: undefined
271
};
272
} finally {
273
retrieveCapturingTokenByCorrelation(ourRequestId);
274
}
275
}
276
277
createRequestBody(options: ICreateEndpointBodyOptions): IEndpointBody {
278
throw new Error('unreachable'); // this endpoint does not call into fetchers
279
}
280
281
cloneWithTokenOverride(modelMaxPromptTokens: number): IChatEndpoint {
282
return this._instantiationService.createInstance(ExtensionContributedChatEndpoint, {
283
...this.languageModel,
284
maxInputTokens: modelMaxPromptTokens
285
});
286
}
287
}
288
289
export function convertToApiChatMessage(messages: Raw.ChatMessage[]): Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2> {
290
const apiMessages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2> = [];
291
for (const message of messages) {
292
const apiContent: Array<vscode.LanguageModelTextPart | vscode.LanguageModelToolResultPart2 | vscode.LanguageModelToolCallPart | vscode.LanguageModelDataPart | vscode.LanguageModelThinkingPart> = [];
293
// Easier to work with arrays everywhere, rather than string in some cases. So convert to a single text content part
294
for (const contentPart of message.content) {
295
if (contentPart.type === Raw.ChatCompletionContentPartKind.Text) {
296
apiContent.push(new vscode.LanguageModelTextPart(contentPart.text));
297
} else if (contentPart.type === Raw.ChatCompletionContentPartKind.Image) {
298
// Handle base64 encoded images
299
if (contentPart.imageUrl.url.startsWith('data:')) {
300
const dataUrlRegex = /^data:([^;]+);base64,(.*)$/;
301
const match = contentPart.imageUrl.url.match(dataUrlRegex);
302
303
if (match) {
304
const [, mimeType, base64Data] = match;
305
apiContent.push(new vscode.LanguageModelDataPart(Buffer.from(base64Data, 'base64'), mimeType as ChatImageMimeType));
306
}
307
} else {
308
// Not a base64 image
309
continue;
310
}
311
} else if (contentPart.type === Raw.ChatCompletionContentPartKind.CacheBreakpoint) {
312
apiContent.push(new vscode.LanguageModelDataPart(new TextEncoder().encode('ephemeral'), CustomDataPartMimeTypes.CacheControl));
313
} else if (contentPart.type === Raw.ChatCompletionContentPartKind.Opaque) {
314
const statefulMarker = rawPartAsStatefulMarker(contentPart);
315
if (statefulMarker) {
316
apiContent.push(new vscode.LanguageModelDataPart(encodeStatefulMarker(statefulMarker.modelId, statefulMarker.marker), CustomDataPartMimeTypes.StatefulMarker));
317
}
318
const thinkingData = rawPartAsThinkingData(contentPart);
319
if (thinkingData) {
320
apiContent.push(new vscode.LanguageModelThinkingPart(thinkingData.text, thinkingData.id, thinkingData.metadata));
321
}
322
}
323
}
324
325
if (message.role === Raw.ChatRole.System || message.role === Raw.ChatRole.User) {
326
apiMessages.push({
327
role: message.role === Raw.ChatRole.System ? vscode.LanguageModelChatMessageRole.System : vscode.LanguageModelChatMessageRole.User,
328
name: message.name,
329
content: apiContent
330
});
331
} else if (message.role === Raw.ChatRole.Assistant) {
332
if (message.toolCalls) {
333
for (const toolCall of message.toolCalls) {
334
apiContent.push(new vscode.LanguageModelToolCallPart(toolCall.id, toolCall.function.name, JSON.parse(toolCall.function.arguments)));
335
}
336
}
337
apiMessages.push({
338
role: vscode.LanguageModelChatMessageRole.Assistant,
339
name: message.name,
340
content: apiContent
341
});
342
} else if (message.role === Raw.ChatRole.Tool) {
343
const toolResultPart: vscode.LanguageModelToolResultPart2 = new vscode.LanguageModelToolResultPart2(
344
message.toolCallId ?? '',
345
apiContent
346
);
347
apiMessages.push({
348
role: vscode.LanguageModelChatMessageRole.User,
349
name: '',
350
content: [toolResultPart]
351
});
352
}
353
}
354
return apiMessages;
355
}
356
357