Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/conversation/vscode-node/languageModelAccess.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
7
import { Raw } from '@vscode/prompt-tsx';
8
import * as vscode from 'vscode';
9
import { IAuthenticationService } from '../../../platform/authentication/common/authentication';
10
import { CopilotToken } from '../../../platform/authentication/common/copilotToken';
11
import { IBlockedExtensionService } from '../../../platform/chat/common/blockedExtensionService';
12
import { ChatFetchResponseType, ChatLocation, getErrorDetailsFromChatFetchError } from '../../../platform/chat/common/commonTypes';
13
import { getTextPart } from '../../../platform/chat/common/globalStringUtils';
14
import { EmbeddingType, getWellKnownEmbeddingTypeInfo, IEmbeddingsComputer } from '../../../platform/embeddings/common/embeddingsComputer';
15
import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
16
import { CustomDataPartMimeTypes } from '../../../platform/endpoint/common/endpointTypes';
17
import { ModelAliasRegistry } from '../../../platform/endpoint/common/modelAliasRegistry';
18
import { encodeStatefulMarker } from '../../../platform/endpoint/common/statefulMarkerContainer';
19
import { isGeminiFamily } from '../../../platform/endpoint/common/chatModelCapabilities';
20
import { AutoChatEndpoint } from '../../../platform/endpoint/node/autoChatEndpoint';
21
import { IAutomodeService } from '../../../platform/endpoint/node/automodeService';
22
import { IEnvService, isScenarioAutomation } from '../../../platform/env/common/envService';
23
import { IVSCodeExtensionContext } from '../../../platform/extContext/common/extensionContext';
24
import { IOctoKitService } from '../../../platform/github/common/githubService';
25
import { ILogService } from '../../../platform/log/common/logService';
26
import { FinishedCallback, OpenAiFunctionTool, OptionalChatRequestParams } from '../../../platform/networking/common/fetch';
27
import { IChatEndpoint, IEndpoint } from '../../../platform/networking/common/networking';
28
import { IOTelService, type OTelModelOptions } from '../../../platform/otel/common/otelService';
29
import { retrieveCapturingTokenByCorrelation, runWithCapturingToken } from '../../../platform/requestLogger/node/requestLogger';
30
import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
31
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
32
import { isEncryptedThinkingDelta } from '../../../platform/thinking/common/thinking';
33
import { BaseTokensPerCompletion } from '../../../platform/tokenizer/node/tokenizer';
34
import { TelemetryCorrelationId } from '../../../util/common/telemetryCorrelationId';
35
import { Emitter } from '../../../util/vs/base/common/event';
36
import { Disposable, MutableDisposable } from '../../../util/vs/base/common/lifecycle';
37
import { isBoolean, isDefined, isNumber, isString, isStringArray } from '../../../util/vs/base/common/types';
38
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
39
import { ChatLocation as ApiChatLocation, ExtensionMode } from '../../../vscodeTypes';
40
import type { LMResponsePart } from '../../byok/common/byokProvider';
41
import { IExtensionContribution } from '../../common/contributions';
42
import { PromptRenderer } from '../../prompts/node/base/promptRenderer';
43
import { isImageDataPart } from '../common/languageModelChatMessageHelpers';
44
import { LanguageModelAccessPrompt } from './languageModelAccessPrompt';
45
import { formatPricingLabel, getModelCapabilitiesDescription } from '../common/languageModelAccess';
46
47
/**
48
* Markers in the autoModelHint experiment variable that indicate the auto model
49
* is routing to an experimental or evaluation model.
50
*/
51
const experimentalAutoModelHintMarkers = ['minimax', 'mp3yn0h7', 'yaqq2gxh'];
52
53
/**
54
* Builds a configurationSchema for the model picker based on the endpoint's supported capabilities.
55
* Models that support reasoning_effort get a "Thinking Effort" dropdown in the model picker UI.
56
*/
57
function buildConfigurationSchema(endpoint: IChatEndpoint): { configurationSchema?: vscode.LanguageModelConfigurationSchema } {
58
const effortLevels = endpoint.supportsReasoningEffort;
59
if (!effortLevels || effortLevels.length <= 1) {
60
return {};
61
}
62
63
// Auto model delegates to different backends, so don't expose effort picker
64
if (endpoint instanceof AutoChatEndpoint) {
65
return {};
66
}
67
68
const family = endpoint.family.toLowerCase();
69
if (isGeminiFamily(endpoint)) {
70
return {};
71
}
72
73
let defaultEffort: string | undefined;
74
if (family.startsWith('claude')) {
75
defaultEffort = effortLevels.includes('high') ? 'high' : undefined;
76
} else if (family.startsWith('gpt-')) {
77
defaultEffort = effortLevels.includes('medium') ? 'medium' : undefined;
78
}
79
80
return {
81
configurationSchema: {
82
properties: {
83
reasoningEffort: {
84
type: 'string',
85
title: vscode.l10n.t('Thinking Effort'),
86
enum: effortLevels,
87
enumItemLabels: effortLevels.map(level => level.charAt(0).toUpperCase() + level.slice(1)),
88
enumDescriptions: effortLevels.map(level => {
89
switch (level) {
90
case 'none': return vscode.l10n.t('No reasoning applied');
91
case 'low': return vscode.l10n.t('Faster responses with less reasoning');
92
case 'medium': return vscode.l10n.t('Balanced reasoning and speed');
93
case 'high': return vscode.l10n.t('Greater reasoning depth but slower');
94
case 'xhigh': return vscode.l10n.t('Maximum reasoning depth but slower');
95
default: return level;
96
}
97
}),
98
default: defaultEffort,
99
group: 'navigation',
100
}
101
}
102
}
103
};
104
}
105
106
export class LanguageModelAccess extends Disposable implements IExtensionContribution {
107
108
readonly id = 'languageModelAccess';
109
110
readonly activationBlocker?: Promise<void>;
111
112
private readonly _onDidChange = this._register(new Emitter<void>());
113
private _currentModels: vscode.LanguageModelChatInformation[] = []; // Store current models for reference
114
private _chatEndpoints: IChatEndpoint[] = [];
115
private _lmWrapper: CopilotLanguageModelWrapper;
116
private _promptBaseCountCache: LanguageModelAccessPromptBaseCountCache;
117
118
constructor(
119
@ILogService private readonly _logService: ILogService,
120
@IInstantiationService private readonly _instantiationService: IInstantiationService,
121
@IAuthenticationService private readonly _authenticationService: IAuthenticationService,
122
@IEndpointProvider private readonly _endpointProvider: IEndpointProvider,
123
@IEmbeddingsComputer private readonly _embeddingsComputer: IEmbeddingsComputer,
124
@IVSCodeExtensionContext private readonly _vsCodeExtensionContext: IVSCodeExtensionContext,
125
@IAutomodeService private readonly _automodeService: IAutomodeService,
126
@IExperimentationService private readonly _expService: IExperimentationService,
127
) {
128
super();
129
130
this._lmWrapper = this._instantiationService.createInstance(CopilotLanguageModelWrapper);
131
this._promptBaseCountCache = this._instantiationService.createInstance(LanguageModelAccessPromptBaseCountCache);
132
133
if (this._vsCodeExtensionContext.extensionMode === ExtensionMode.Test && !isScenarioAutomation) {
134
this._logService.warn('[LanguageModelAccess] LanguageModels and Embeddings are NOT AVAILABLE in test mode.');
135
return;
136
}
137
138
// initial
139
this.activationBlocker = Promise.all([
140
this._registerChatProvider(),
141
this._registerEmbeddings(),
142
]).then(() => { });
143
}
144
145
override dispose(): void {
146
super.dispose();
147
}
148
149
get currentModels(): vscode.LanguageModelChatInformation[] {
150
return this._currentModels;
151
}
152
153
private async _registerChatProvider(): Promise<void> {
154
const provider: vscode.LanguageModelChatProvider = {
155
onDidChangeLanguageModelChatInformation: this._onDidChange.event,
156
provideLanguageModelChatInformation: this._provideLanguageModelChatInfo.bind(this),
157
provideLanguageModelChatResponse: this._provideLanguageModelChatResponse.bind(this),
158
provideTokenCount: this._provideTokenCount.bind(this)
159
};
160
this._register(vscode.lm.registerLanguageModelChatProvider('copilot', provider));
161
this._register(this._authenticationService.onDidAuthenticationChange(() => {
162
if (!this._authenticationService.anyGitHubSession) {
163
this._currentModels = [];
164
}
165
// Auth changed which means models could've changed. Fire the event
166
this._onDidChange.fire();
167
}));
168
this._register(this._endpointProvider.onDidModelsRefresh(() => {
169
// Models have been refreshed from CAPI so we should requery them
170
this._onDidChange.fire();
171
}));
172
}
173
174
private async _provideLanguageModelChatInfo(options: { silent: boolean }, token: vscode.CancellationToken): Promise<vscode.LanguageModelChatInformation[]> {
175
const session = await this._getToken();
176
if (!session) {
177
// Return cached models until we have auth reacquired
178
// We clear this list in onDidAuthenticationChange so signed out should still have model picker clear
179
return this._currentModels;
180
}
181
182
const models: vscode.LanguageModelChatInformation[] = [];
183
const allEndpoints = await this._endpointProvider.getAllChatEndpoints();
184
const chatEndpoints = allEndpoints.filter(e => e.showInModelPicker || e.model === 'gpt-4o-mini');
185
const autoEndpoint = await this._automodeService.resolveAutoModeEndpoint(undefined, allEndpoints);
186
chatEndpoints.push(autoEndpoint);
187
let defaultChatEndpoint: IChatEndpoint;
188
const defaultExpModel = this._expService.getTreatmentVariable<string>('chat.defaultLanguageModel')?.replace('copilot/', '');
189
if (this._authenticationService.copilotToken?.isNoAuthUser || !defaultExpModel || defaultExpModel === AutoChatEndpoint.pseudoModelId) {
190
// No auth, no experiment, and exp that sets auto to default all get default model
191
defaultChatEndpoint = autoEndpoint;
192
} else {
193
// Find exp default
194
defaultChatEndpoint = chatEndpoints.find(e => e.model === defaultExpModel) || autoEndpoint;
195
}
196
197
const seenFamilies = new Set<string>();
198
199
for (const endpoint of chatEndpoints) {
200
if (seenFamilies.has(endpoint.family) && !endpoint.showInModelPicker) {
201
continue;
202
}
203
seenFamilies.add(endpoint.family);
204
205
const sanitizedModelName = endpoint.name.replace(/\(Preview\)/g, '').trim();
206
let modelTooltip: string | undefined;
207
if (endpoint.degradationReason) {
208
modelTooltip = endpoint.degradationReason;
209
} else if (endpoint instanceof AutoChatEndpoint) {
210
modelTooltip = vscode.l10n.t('Auto selects the best model for your request based on capacity and performance.');
211
const plan = this._authenticationService.copilotToken?.copilotPlan;
212
const isOrgManaged = plan === 'business' || plan === 'enterprise';
213
const autoModeHint = this._expService.getTreatmentVariable<string>('copilotchat.autoModelHint');
214
const showExperimentalHint = !isOrgManaged && !!autoModeHint && experimentalAutoModelHintMarkers.some(marker => autoModeHint.includes(marker));
215
if (showExperimentalHint) {
216
modelTooltip = `${modelTooltip} ${vscode.l10n.t('This model may be experimental or in evaluation.')}`;
217
}
218
} else {
219
modelTooltip = getModelCapabilitiesDescription(endpoint);
220
}
221
222
let modelCategory: { label: string; order: number } | undefined;
223
if (endpoint instanceof AutoChatEndpoint) {
224
modelCategory = { label: '', order: Number.MIN_SAFE_INTEGER };
225
} else if (endpoint.isPremium === undefined || this._authenticationService.copilotToken?.isFreeUser) {
226
modelCategory = { label: vscode.l10n.t("Copilot Models"), order: 0 };
227
} else if (endpoint.isPremium) {
228
modelCategory = { label: vscode.l10n.t("Premium Models"), order: 1 };
229
} else {
230
modelCategory = { label: vscode.l10n.t("Standard Models"), order: 0 };
231
}
232
233
// Counting tokens requires instantiating the tokenizers, which makes this process use a lot of memory.
234
// Let's cache the results across extension activations
235
const baseCount = await this._promptBaseCountCache.getBaseCount(endpoint);
236
const multiplier = endpoint.multiplier !== undefined ? `${endpoint.multiplier}x` : undefined;
237
let modelDetail: string | undefined;
238
239
if (endpoint instanceof AutoChatEndpoint) {
240
if (endpoint.discountRange.high === endpoint.discountRange.low && endpoint.discountRange.low !== 0) {
241
modelDetail = `${endpoint.discountRange.low * 100}% discount`;
242
} else if (endpoint.discountRange.high !== endpoint.discountRange.low) {
243
modelDetail = `${endpoint.discountRange.low * 100}% to ${endpoint.discountRange.high * 100}% discount`;
244
}
245
}
246
if (endpoint.customModel) {
247
const customModel = endpoint.customModel;
248
modelDetail = customModel.owner_name;
249
modelTooltip = vscode.l10n.t('{0} is contributed by {1} using {2}.', sanitizedModelName, customModel.owner_name, customModel.key_name);
250
modelCategory = { label: vscode.l10n.t("Custom Models"), order: 2 };
251
}
252
253
const session = this._authenticationService.anyGitHubSession;
254
const isDefault = endpoint === defaultChatEndpoint;
255
256
const model: vscode.LanguageModelChatInformation = {
257
id: endpoint instanceof AutoChatEndpoint ? AutoChatEndpoint.pseudoModelId : endpoint.model,
258
name: endpoint instanceof AutoChatEndpoint ? 'Auto' : endpoint.name,
259
family: endpoint.family,
260
tooltip: modelTooltip,
261
pricing: endpoint instanceof AutoChatEndpoint ? undefined : (multiplier ?? (endpoint.tokenPricing ? formatPricingLabel(endpoint.tokenPricing) : undefined)),
262
multiplierNumeric: endpoint instanceof AutoChatEndpoint ? undefined : endpoint.multiplier,
263
detail: modelDetail,
264
category: modelCategory,
265
statusIcon: endpoint.degradationReason ? new vscode.ThemeIcon('warning') : undefined,
266
version: endpoint.version,
267
maxInputTokens: endpoint.modelMaxPromptTokens - baseCount - BaseTokensPerCompletion,
268
maxOutputTokens: endpoint.maxOutputTokens,
269
requiresAuthorization: session && { label: session.account.label },
270
isDefault: {
271
[ApiChatLocation.Panel]: isDefault,
272
[ApiChatLocation.Terminal]: isDefault,
273
[ApiChatLocation.Notebook]: isDefault,
274
[ApiChatLocation.Editor]: endpoint instanceof AutoChatEndpoint, // inline chat gets 'Auto' by default
275
},
276
isUserSelectable: endpoint.showInModelPicker,
277
capabilities: {
278
imageInput: endpoint instanceof AutoChatEndpoint ? true : endpoint.supportsVision,
279
toolCalling: endpoint.supportsToolCalls,
280
},
281
...buildConfigurationSchema(endpoint),
282
};
283
284
models.push(model);
285
286
// Register aliases for this model
287
const aliases = ModelAliasRegistry.getAliases(model.id);
288
for (const alias of aliases) {
289
models.push({
290
...model,
291
id: alias,
292
family: alias,
293
isUserSelectable: false,
294
});
295
}
296
}
297
298
this._currentModels = models;
299
this._chatEndpoints = chatEndpoints;
300
return models;
301
}
302
303
private async _getEndpointForModel(model: vscode.LanguageModelChatInformation) {
304
if (model.id === AutoChatEndpoint.pseudoModelId) {
305
const allEndpoints = await this._endpointProvider.getAllChatEndpoints();
306
return await this._automodeService.resolveAutoModeEndpoint(undefined, allEndpoints);
307
}
308
return this._chatEndpoints.find(e => e.model === ModelAliasRegistry.resolveAlias(model.id));
309
}
310
311
private async _provideLanguageModelChatResponse(
312
model: vscode.LanguageModelChatInformation,
313
messages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2>,
314
options: vscode.ProvideLanguageModelChatResponseOptions,
315
progress: vscode.Progress<vscode.LanguageModelResponsePart2>,
316
token: vscode.CancellationToken
317
): Promise<void> {
318
const endpoint = await this._getEndpointForModel(model);
319
if (!endpoint) {
320
throw new Error(`Endpoint not found for model ${model.id}`);
321
}
322
323
return this._lmWrapper.provideLanguageModelResponse(endpoint, messages, {
324
...options,
325
modelOptions: options.modelOptions
326
}, options.requestInitiator, progress, token);
327
}
328
329
private async _provideTokenCount(
330
model: vscode.LanguageModelChatInformation,
331
text: string | vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2,
332
token: vscode.CancellationToken
333
): Promise<number> {
334
const endpoint = await this._getEndpointForModel(model);
335
if (!endpoint) {
336
throw new Error(`Endpoint not found for model ${model.id}`);
337
}
338
339
return this._lmWrapper.provideTokenCount(endpoint, text);
340
}
341
342
private async _registerEmbeddings(): Promise<void> {
343
344
const dispo = this._register(new MutableDisposable());
345
346
347
const update = async () => {
348
349
if (!await this._getToken()) {
350
dispo.clear();
351
return;
352
}
353
354
const embeddingsComputer = this._embeddingsComputer;
355
const embeddingType = EmbeddingType.text3small_512;
356
const model = getWellKnownEmbeddingTypeInfo(embeddingType)?.model;
357
if (!model) {
358
throw new Error(`No model found for embedding type ${embeddingType.id}`);
359
}
360
361
dispo.clear();
362
dispo.value = vscode.lm.registerEmbeddingsProvider(`copilot.${model}`, new class implements vscode.EmbeddingsProvider {
363
async provideEmbeddings(input: string[], token: vscode.CancellationToken): Promise<vscode.Embedding[]> {
364
const result = await embeddingsComputer.computeEmbeddings(embeddingType, input, {}, new TelemetryCorrelationId('EmbeddingsProvider::provideEmbeddings'), token);
365
return result.values.map(embedding => ({ values: embedding.value.slice(0) }));
366
}
367
});
368
};
369
370
this._register(this._authenticationService.onDidAuthenticationChange(() => update()));
371
await update();
372
}
373
374
private async _getToken(): Promise<CopilotToken | undefined> {
375
try {
376
const copilotToken = await this._authenticationService.getCopilotToken();
377
return copilotToken;
378
} catch (e) {
379
this._logService.warn('[LanguageModelAccess] LanguageModel/Embeddings are not available without auth token');
380
this._logService.error(e);
381
return undefined;
382
}
383
}
384
}
385
386
class LanguageModelAccessPromptBaseCountCache {
387
constructor(
388
@IVSCodeExtensionContext private readonly _extensionContext: IVSCodeExtensionContext,
389
@IInstantiationService private readonly _instantiationService: IInstantiationService,
390
@IEnvService private readonly _envService: IEnvService
391
) { }
392
393
public async getBaseCount(endpoint: IChatEndpoint): Promise<number> {
394
const key = `lmBaseCount/${endpoint.model}`;
395
const cached = this._extensionContext.globalState.get<{ extensionVersion: string; baseCount: number }>(key);
396
if (cached && cached.extensionVersion === this._envService.getVersion() && typeof cached.baseCount === 'number') {
397
return cached.baseCount;
398
}
399
400
const baseCount = await this._computeBaseCount(endpoint);
401
// Store the computed value along with the extension version so we can
402
// invalidate the cache when the extension is updated.
403
try {
404
await this._extensionContext.globalState.update(key, { extensionVersion: this._envService.getVersion(), baseCount });
405
} catch (err) {
406
// Best-effort cache update — don't fail the caller if persisting the
407
// cache entry fails for any reason.
408
}
409
410
return baseCount;
411
}
412
413
private async _computeBaseCount(endpoint: IChatEndpoint): Promise<number> {
414
const baseCount = await PromptRenderer.create(this._instantiationService, endpoint, LanguageModelAccessPrompt, { noSafety: false, messages: [] }).countTokens();
415
return baseCount;
416
}
417
418
}
419
420
/**
421
* Exported for test
422
*/
423
export class CopilotLanguageModelWrapper extends Disposable {
424
425
constructor(
426
@ITelemetryService private readonly _telemetryService: ITelemetryService,
427
@IBlockedExtensionService private readonly _blockedExtensionService: IBlockedExtensionService,
428
@IInstantiationService private readonly _instantiationService: IInstantiationService,
429
@ILogService private readonly _logService: ILogService,
430
@IAuthenticationService private readonly _authenticationService: IAuthenticationService,
431
@IEnvService private readonly _envService: IEnvService,
432
@IOTelService private readonly _otelService: IOTelService,
433
@IOctoKitService private readonly _octoKitService: IOctoKitService,
434
) {
435
super();
436
}
437
438
private async _provideLanguageModelResponse(_endpoint: IChatEndpoint, _messages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2>, _options: vscode.ProvideLanguageModelChatResponseOptions, extensionId: string | undefined, callback: FinishedCallback, token: vscode.CancellationToken): Promise<void> {
439
if (extensionId === 'core') {
440
extensionId = undefined;
441
}
442
443
const extensionInfo = !extensionId ? { packageJSON: { version: this._envService.vscodeVersion } } : vscode.extensions.getExtension(extensionId, true);
444
if (!extensionInfo || typeof extensionInfo.packageJSON.version !== 'string') {
445
throw new Error('Invalid extension information');
446
}
447
const extensionVersion = <string>extensionInfo.packageJSON.version;
448
449
const blockedExtensionMessage = vscode.l10n.t('The extension has been temporarily blocked due to making too many requests. Please try again later.');
450
if (extensionId && this._blockedExtensionService.isExtensionBlocked(extensionId)) {
451
throw vscode.LanguageModelError.Blocked(blockedExtensionMessage);
452
}
453
454
const toolTokenCount = _options.tools ? await this.countToolTokens(_endpoint, _options.tools) : 0;
455
const baseCount = await PromptRenderer.create(this._instantiationService, _endpoint, LanguageModelAccessPrompt, { noSafety: false, messages: [] }).countTokens();
456
const tokenLimit = _endpoint.modelMaxPromptTokens - baseCount - BaseTokensPerCompletion - toolTokenCount;
457
458
this.validateRequest(_messages);
459
if (_options.tools) {
460
this.validateTools(_options.tools);
461
}
462
// Add safety rules to the prompt if it originates from outside the Copilot Chat extension, otherwise they already exist in the prompt.
463
const { messages, tokenCount } = await PromptRenderer.create(this._instantiationService, {
464
..._endpoint,
465
modelMaxPromptTokens: tokenLimit
466
}, LanguageModelAccessPrompt, { noSafety: extensionId === this._envService.extensionId, messages: _messages }).render();
467
468
/* __GDPR__
469
"languagemodelrequest" : {
470
"owner": "jrieken",
471
"comment": "Data about extensions using the language model",
472
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model that is being used" },
473
"extensionId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The extension identifier for which we make the request" },
474
"extensionVersion": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The extension version for which we make the request" },
475
"tokenCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The number of tokens" },
476
"tokenLimit": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The number of tokens that can be used" }
477
}
478
*/
479
this._telemetryService.sendMSFTTelemetryEvent(
480
'languagemodelrequest',
481
{
482
extensionId,
483
extensionVersion,
484
model: _endpoint.model
485
},
486
{
487
tokenCount,
488
tokenLimit
489
}
490
);
491
492
// If no messages they got rendered out due to token limit
493
if (messages.length === 0 || tokenCount > tokenLimit) {
494
throw new Error('Message exceeds token limit.');
495
}
496
497
if (_options.tools && _options.tools.length > 128 && !_endpoint.supportsToolSearch) {
498
throw new Error('Cannot have more than 128 tools per request.');
499
}
500
501
const endpoint: IChatEndpoint = new Proxy(_endpoint, {
502
get: function (target, prop, receiver) {
503
if (prop === 'getExtraHeaders') {
504
return function () {
505
const extraHeaders = target.getExtraHeaders?.() ?? {};
506
if (!extensionId) {
507
return extraHeaders;
508
}
509
return {
510
...extraHeaders,
511
'x-onbehalf-extension-id': `${extensionId}/${extensionVersion}`,
512
};
513
};
514
}
515
if (prop === 'acquireTokenizer') {
516
return target.acquireTokenizer.bind(target);
517
}
518
return Reflect.get(target, prop, receiver);
519
}
520
});
521
522
523
const options: OptionalChatRequestParams = LanguageModelOptions.Default.convert(_options.modelOptions ?? {});
524
const telemetryProperties = { messageSource: `api.${extensionId}` };
525
526
options.tools = _options.tools?.map((tool): OpenAiFunctionTool => {
527
return {
528
type: 'function',
529
function: {
530
name: tool.name,
531
description: tool.description,
532
parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined
533
}
534
};
535
});
536
if (_options.toolMode === vscode.LanguageModelChatToolMode.Required && _options.tools?.length && _options.tools.length > 1) {
537
throw new Error('LanguageModelChatToolMode.Required is not supported with more than one tool');
538
}
539
540
options.tool_choice = _options.toolMode === vscode.LanguageModelChatToolMode.Required && _options.tools?.length ?
541
{ type: 'function', function: { name: _options.tools[0].name } } :
542
undefined;
543
544
// Restore CapturingToken context if correlation ID was passed through modelOptions.
545
// This handles BYOK providers where the original AsyncLocalStorage context was lost
546
// when crossing the VS Code IPC boundary.
547
const correlationId = (_options as { modelOptions?: OTelModelOptions }).modelOptions?._capturingTokenCorrelationId;
548
const capturingToken = correlationId ? retrieveCapturingTokenByCorrelation(correlationId) : undefined;
549
550
// Restore OTel trace context if passed through modelOptions.
551
// This links the wrapper's chat span back to the original invoke_agent trace.
552
const parentTraceContext = (_options as { modelOptions?: OTelModelOptions }).modelOptions?._otelTraceContext ?? undefined;
553
554
const makeRequest = () => endpoint.makeChatRequest2({
555
debugName: 'copilotLanguageModelWrapper',
556
messages,
557
finishedCb: callback,
558
location: ChatLocation.Other,
559
source: { extensionId },
560
requestOptions: options,
561
userInitiatedRequest: !!extensionId,
562
telemetryProperties,
563
modelCapabilities: {
564
reasoningEffort: typeof _options.modelConfiguration?.reasoningEffort === 'string' ? _options.modelConfiguration.reasoningEffort : undefined,
565
},
566
}, token);
567
568
// Run request within the parent OTel context (no extra span) so chat spans in chatMLFetcher inherit the agent trace
569
const wrappedRequest = parentTraceContext
570
? () => this._otelService.runWithTraceContext(parentTraceContext, async () => {
571
return capturingToken
572
? await runWithCapturingToken(capturingToken, makeRequest)
573
: await makeRequest();
574
})
575
: () => capturingToken
576
? runWithCapturingToken(capturingToken, makeRequest)
577
: makeRequest();
578
579
const result = await wrappedRequest();
580
581
if (result.type !== ChatFetchResponseType.Success) {
582
if (result.type === ChatFetchResponseType.ExtensionBlocked) {
583
if (extensionId) {
584
this._blockedExtensionService.reportBlockedExtension(extensionId, result.retryAfter);
585
}
586
587
throw vscode.LanguageModelError.Blocked(blockedExtensionMessage);
588
} else if (result.type === ChatFetchResponseType.QuotaExceeded) {
589
const outageStatus = await this._octoKitService.getGitHubOutageStatus();
590
const details = getErrorDetailsFromChatFetchError(result, (await this._authenticationService.getCopilotToken()).copilotPlan, outageStatus);
591
const err = new vscode.LanguageModelError(details.message);
592
err.name = 'ChatQuotaExceeded';
593
throw err;
594
} else if (result.type === ChatFetchResponseType.RateLimited) {
595
const err = new Error(result.reason);
596
err.name = 'ChatRateLimited';
597
throw err;
598
}
599
600
throw new Error(result.reason);
601
}
602
603
this._telemetryService.sendInternalMSFTTelemetryEvent(
604
'languagemodelrequest',
605
{
606
extensionId,
607
extensionVersion,
608
requestid: result.requestId,
609
query: getTextPart(messages[messages.length - 1].content),
610
model: _endpoint.model
611
},
612
{
613
tokenCount,
614
tokenLimit
615
}
616
);
617
}
618
619
async provideLanguageModelResponse(endpoint: IChatEndpoint, messages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2>, options: vscode.ProvideLanguageModelChatResponseOptions, extensionId: string | undefined, progress: vscode.Progress<LMResponsePart>, token: vscode.CancellationToken): Promise<void> {
620
let thinkingActive = false;
621
const finishCallback: FinishedCallback = async (_text, index, delta): Promise<undefined> => {
622
if (delta.thinking) {
623
// Show thinking progress for unencrypted thinking deltas
624
if (!isEncryptedThinkingDelta(delta.thinking)) {
625
const text = delta.thinking.text ?? '';
626
progress.report(new vscode.LanguageModelThinkingPart(text, delta.thinking.id, delta.thinking.metadata));
627
thinkingActive = true;
628
}
629
} else if (thinkingActive) {
630
progress.report(new vscode.LanguageModelThinkingPart('', '', { vscode_reasoning_done: true }));
631
thinkingActive = false;
632
}
633
if (delta.text) {
634
progress.report(new vscode.LanguageModelTextPart(delta.text));
635
}
636
if (delta.copilotToolCalls) {
637
for (const call of delta.copilotToolCalls) {
638
try {
639
// Anthropic models send "" (empty string) for tools with no parameters.
640
const parameters = JSON.parse(call.arguments || '{}');
641
progress.report(new vscode.LanguageModelToolCallPart(call.id, call.name, parameters));
642
} catch (err) {
643
this._logService.error(err, `Got invalid JSON for tool call: ${call.arguments}`);
644
throw new Error('Invalid JSON for tool call');
645
}
646
}
647
}
648
649
if (delta.statefulMarker) {
650
progress.report(
651
new vscode.LanguageModelDataPart(encodeStatefulMarker(endpoint.model, delta.statefulMarker), CustomDataPartMimeTypes.StatefulMarker)
652
);
653
}
654
655
return undefined;
656
};
657
return this._provideLanguageModelResponse(endpoint, messages, options, extensionId, finishCallback, token);
658
}
659
660
async provideTokenCount(endpoint: IEndpoint, message: string | vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2): Promise<number> {
661
if (typeof message === 'string') {
662
return endpoint.acquireTokenizer().tokenLength(message);
663
} else {
664
let raw: Raw.ChatMessage;
665
666
const content = message.content.map((part): Raw.ChatCompletionContentPart | undefined => {
667
if (part instanceof vscode.LanguageModelTextPart) {
668
return { type: Raw.ChatCompletionContentPartKind.Text, text: part.value };
669
} else if (part instanceof vscode.LanguageModelDataPart && part.mimeType === 'application/pdf') {
670
return { type: Raw.ChatCompletionContentPartKind.Document, documentData: { data: Buffer.from(part.data).toString('base64'), mediaType: part.mimeType } };
671
} else if (isImageDataPart(part)) {
672
return { type: Raw.ChatCompletionContentPartKind.Image, imageUrl: { url: `data:${part.mimeType};base64,${Buffer.from(part.data).toString('base64url')}` } };
673
} else {
674
return undefined;
675
}
676
}).filter(isDefined);
677
switch (message.role) {
678
case vscode.LanguageModelChatMessageRole.User:
679
raw = { role: Raw.ChatRole.User, content, name: message.name };
680
break;
681
case vscode.LanguageModelChatMessageRole.System:
682
raw = { role: Raw.ChatRole.Assistant, content, name: message.name };
683
break;
684
case vscode.LanguageModelChatMessageRole.Assistant:
685
raw = {
686
role: Raw.ChatRole.Assistant,
687
content,
688
name: message.name,
689
toolCalls: message.content
690
.filter(part => part instanceof vscode.LanguageModelToolCallPart)
691
.map(part => part as vscode.LanguageModelToolCallPart)
692
.map(part => ({ function: { name: part.name, arguments: JSON.stringify(part.input) }, id: part.callId, type: 'function' })),
693
};
694
break;
695
default:
696
return 0;
697
}
698
699
return endpoint.acquireTokenizer().countMessageTokens(raw);
700
}
701
}
702
703
private validateTools(tools: readonly vscode.LanguageModelChatTool[]): void {
704
for (const tool of tools) {
705
if (!tool.name.match(/^[\w-]+$/)) {
706
throw new Error(`Invalid tool name "${tool.name}": only alphanumeric characters, hyphens, and underscores are allowed.`);
707
}
708
}
709
}
710
711
private async countToolTokens(endpoint: IChatEndpoint, tools: readonly vscode.LanguageModelChatTool[]): Promise<number> {
712
return await endpoint.acquireTokenizer().countToolTokens(tools);
713
}
714
715
private validateRequest(_messages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2>): void {
716
const lastMessage = _messages.at(-1);
717
if (!lastMessage) {
718
throw new Error('Invalid request: no messages.');
719
}
720
721
_messages.forEach((message, i) => {
722
if (message.role === vscode.LanguageModelChatMessageRole.Assistant) {
723
// Filter out DataPart since it does not share the same value type and does not have callId, function, etc.
724
const filteredContent = message.content.filter(part => part instanceof vscode.LanguageModelDataPart);
725
const toolCallIds = new Set(filteredContent
726
.filter(part => part instanceof vscode.LanguageModelToolCallPart)
727
.map(part => part.callId));
728
let nextMessageIdx = i + 1;
729
const errMsg = 'Invalid request: Tool call part must be followed by a User message with a LanguageModelToolResultPart with a matching callId.';
730
while (toolCallIds.size > 0) {
731
const nextMessage = _messages.at(nextMessageIdx++);
732
if (!nextMessage || nextMessage.role !== vscode.LanguageModelChatMessageRole.User) {
733
throw new Error(errMsg);
734
}
735
736
nextMessage.content.forEach(part => {
737
if (!(part instanceof vscode.LanguageModelToolResultPart2 || part instanceof vscode.LanguageModelToolResultPart)) {
738
throw new Error(errMsg);
739
}
740
741
toolCallIds.delete(part.callId);
742
});
743
}
744
}
745
});
746
}
747
}
748
749
750
function or(...checks: ((value: unknown) => boolean)[]): (value: unknown) => boolean {
751
return (value) => checks.some(check => check(value));
752
}
753
754
class LanguageModelOptions {
755
756
private static _defaultDesc: Record<string, (value: unknown) => boolean> = {
757
stop: or(isStringArray, isString),
758
temperature: isNumber,
759
max_tokens: isNumber,
760
frequency_penalty: isNumber,
761
presence_penalty: isNumber,
762
};
763
764
static Default = new LanguageModelOptions({ ...this._defaultDesc });
765
766
constructor(private _description: Record<string, (value: unknown) => boolean>) { }
767
768
convert(options: { [name: string]: unknown }): Record<string, number | boolean | string> {
769
const result: Record<string, number | boolean | string> = {};
770
for (const key in this._description) {
771
const isValid = this._description[key];
772
const value = options[key];
773
if (value !== null && value !== undefined && isValid(value)) {
774
// Type guards ensure we only add values of the correct type
775
if (isNumber(value) || isBoolean(value) || isString(value)) {
776
result[key] = value;
777
}
778
}
779
}
780
return result;
781
}
782
}
783
784