Path: blob/main/extensions/copilot/src/extension/conversation/vscode-node/languageModelAccess.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/456import { Raw } from '@vscode/prompt-tsx';7import * as vscode from 'vscode';8import { IAuthenticationService } from '../../../platform/authentication/common/authentication';9import { CopilotToken } from '../../../platform/authentication/common/copilotToken';10import { IBlockedExtensionService } from '../../../platform/chat/common/blockedExtensionService';11import { ChatFetchResponseType, ChatLocation, getErrorDetailsFromChatFetchError } from '../../../platform/chat/common/commonTypes';12import { getTextPart } from '../../../platform/chat/common/globalStringUtils';13import { EmbeddingType, getWellKnownEmbeddingTypeInfo, IEmbeddingsComputer } from '../../../platform/embeddings/common/embeddingsComputer';14import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';15import { CustomDataPartMimeTypes } from '../../../platform/endpoint/common/endpointTypes';16import { ModelAliasRegistry } from '../../../platform/endpoint/common/modelAliasRegistry';17import { encodeStatefulMarker } from '../../../platform/endpoint/common/statefulMarkerContainer';18import { isGeminiFamily } from '../../../platform/endpoint/common/chatModelCapabilities';19import { AutoChatEndpoint } from '../../../platform/endpoint/node/autoChatEndpoint';20import { IAutomodeService } from '../../../platform/endpoint/node/automodeService';21import { IEnvService, isScenarioAutomation } from '../../../platform/env/common/envService';22import { IVSCodeExtensionContext } from '../../../platform/extContext/common/extensionContext';23import { IOctoKitService } from '../../../platform/github/common/githubService';24import { ILogService } from '../../../platform/log/common/logService';25import { FinishedCallback, OpenAiFunctionTool, OptionalChatRequestParams } from '../../../platform/networking/common/fetch';26import { IChatEndpoint, IEndpoint } from '../../../platform/networking/common/networking';27import { IOTelService, type OTelModelOptions } from '../../../platform/otel/common/otelService';28import { retrieveCapturingTokenByCorrelation, runWithCapturingToken } from '../../../platform/requestLogger/node/requestLogger';29import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';30import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';31import { isEncryptedThinkingDelta } from '../../../platform/thinking/common/thinking';32import { BaseTokensPerCompletion } from '../../../platform/tokenizer/node/tokenizer';33import { TelemetryCorrelationId } from '../../../util/common/telemetryCorrelationId';34import { Emitter } from '../../../util/vs/base/common/event';35import { Disposable, MutableDisposable } from '../../../util/vs/base/common/lifecycle';36import { isBoolean, isDefined, isNumber, isString, isStringArray } from '../../../util/vs/base/common/types';37import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';38import { ChatLocation as ApiChatLocation, ExtensionMode } from '../../../vscodeTypes';39import type { LMResponsePart } from '../../byok/common/byokProvider';40import { IExtensionContribution } from '../../common/contributions';41import { PromptRenderer } from '../../prompts/node/base/promptRenderer';42import { isImageDataPart } from '../common/languageModelChatMessageHelpers';43import { LanguageModelAccessPrompt } from './languageModelAccessPrompt';44import { formatPricingLabel, getModelCapabilitiesDescription } from '../common/languageModelAccess';4546/**47* Markers in the autoModelHint experiment variable that indicate the auto model48* is routing to an experimental or evaluation model.49*/50const experimentalAutoModelHintMarkers = ['minimax', 'mp3yn0h7', 'yaqq2gxh'];5152/**53* Builds a configurationSchema for the model picker based on the endpoint's supported capabilities.54* Models that support reasoning_effort get a "Thinking Effort" dropdown in the model picker UI.55*/56function buildConfigurationSchema(endpoint: IChatEndpoint): { configurationSchema?: vscode.LanguageModelConfigurationSchema } {57const effortLevels = endpoint.supportsReasoningEffort;58if (!effortLevels || effortLevels.length <= 1) {59return {};60}6162// Auto model delegates to different backends, so don't expose effort picker63if (endpoint instanceof AutoChatEndpoint) {64return {};65}6667const family = endpoint.family.toLowerCase();68if (isGeminiFamily(endpoint)) {69return {};70}7172let defaultEffort: string | undefined;73if (family.startsWith('claude')) {74defaultEffort = effortLevels.includes('high') ? 'high' : undefined;75} else if (family.startsWith('gpt-')) {76defaultEffort = effortLevels.includes('medium') ? 'medium' : undefined;77}7879return {80configurationSchema: {81properties: {82reasoningEffort: {83type: 'string',84title: vscode.l10n.t('Thinking Effort'),85enum: effortLevels,86enumItemLabels: effortLevels.map(level => level.charAt(0).toUpperCase() + level.slice(1)),87enumDescriptions: effortLevels.map(level => {88switch (level) {89case 'none': return vscode.l10n.t('No reasoning applied');90case 'low': return vscode.l10n.t('Faster responses with less reasoning');91case 'medium': return vscode.l10n.t('Balanced reasoning and speed');92case 'high': return vscode.l10n.t('Greater reasoning depth but slower');93case 'xhigh': return vscode.l10n.t('Maximum reasoning depth but slower');94default: return level;95}96}),97default: defaultEffort,98group: 'navigation',99}100}101}102};103}104105export class LanguageModelAccess extends Disposable implements IExtensionContribution {106107readonly id = 'languageModelAccess';108109readonly activationBlocker?: Promise<void>;110111private readonly _onDidChange = this._register(new Emitter<void>());112private _currentModels: vscode.LanguageModelChatInformation[] = []; // Store current models for reference113private _chatEndpoints: IChatEndpoint[] = [];114private _lmWrapper: CopilotLanguageModelWrapper;115private _promptBaseCountCache: LanguageModelAccessPromptBaseCountCache;116117constructor(118@ILogService private readonly _logService: ILogService,119@IInstantiationService private readonly _instantiationService: IInstantiationService,120@IAuthenticationService private readonly _authenticationService: IAuthenticationService,121@IEndpointProvider private readonly _endpointProvider: IEndpointProvider,122@IEmbeddingsComputer private readonly _embeddingsComputer: IEmbeddingsComputer,123@IVSCodeExtensionContext private readonly _vsCodeExtensionContext: IVSCodeExtensionContext,124@IAutomodeService private readonly _automodeService: IAutomodeService,125@IExperimentationService private readonly _expService: IExperimentationService,126) {127super();128129this._lmWrapper = this._instantiationService.createInstance(CopilotLanguageModelWrapper);130this._promptBaseCountCache = this._instantiationService.createInstance(LanguageModelAccessPromptBaseCountCache);131132if (this._vsCodeExtensionContext.extensionMode === ExtensionMode.Test && !isScenarioAutomation) {133this._logService.warn('[LanguageModelAccess] LanguageModels and Embeddings are NOT AVAILABLE in test mode.');134return;135}136137// initial138this.activationBlocker = Promise.all([139this._registerChatProvider(),140this._registerEmbeddings(),141]).then(() => { });142}143144override dispose(): void {145super.dispose();146}147148get currentModels(): vscode.LanguageModelChatInformation[] {149return this._currentModels;150}151152private async _registerChatProvider(): Promise<void> {153const provider: vscode.LanguageModelChatProvider = {154onDidChangeLanguageModelChatInformation: this._onDidChange.event,155provideLanguageModelChatInformation: this._provideLanguageModelChatInfo.bind(this),156provideLanguageModelChatResponse: this._provideLanguageModelChatResponse.bind(this),157provideTokenCount: this._provideTokenCount.bind(this)158};159this._register(vscode.lm.registerLanguageModelChatProvider('copilot', provider));160this._register(this._authenticationService.onDidAuthenticationChange(() => {161if (!this._authenticationService.anyGitHubSession) {162this._currentModels = [];163}164// Auth changed which means models could've changed. Fire the event165this._onDidChange.fire();166}));167this._register(this._endpointProvider.onDidModelsRefresh(() => {168// Models have been refreshed from CAPI so we should requery them169this._onDidChange.fire();170}));171}172173private async _provideLanguageModelChatInfo(options: { silent: boolean }, token: vscode.CancellationToken): Promise<vscode.LanguageModelChatInformation[]> {174const session = await this._getToken();175if (!session) {176// Return cached models until we have auth reacquired177// We clear this list in onDidAuthenticationChange so signed out should still have model picker clear178return this._currentModels;179}180181const models: vscode.LanguageModelChatInformation[] = [];182const allEndpoints = await this._endpointProvider.getAllChatEndpoints();183const chatEndpoints = allEndpoints.filter(e => e.showInModelPicker || e.model === 'gpt-4o-mini');184const autoEndpoint = await this._automodeService.resolveAutoModeEndpoint(undefined, allEndpoints);185chatEndpoints.push(autoEndpoint);186let defaultChatEndpoint: IChatEndpoint;187const defaultExpModel = this._expService.getTreatmentVariable<string>('chat.defaultLanguageModel')?.replace('copilot/', '');188if (this._authenticationService.copilotToken?.isNoAuthUser || !defaultExpModel || defaultExpModel === AutoChatEndpoint.pseudoModelId) {189// No auth, no experiment, and exp that sets auto to default all get default model190defaultChatEndpoint = autoEndpoint;191} else {192// Find exp default193defaultChatEndpoint = chatEndpoints.find(e => e.model === defaultExpModel) || autoEndpoint;194}195196const seenFamilies = new Set<string>();197198for (const endpoint of chatEndpoints) {199if (seenFamilies.has(endpoint.family) && !endpoint.showInModelPicker) {200continue;201}202seenFamilies.add(endpoint.family);203204const sanitizedModelName = endpoint.name.replace(/\(Preview\)/g, '').trim();205let modelTooltip: string | undefined;206if (endpoint.degradationReason) {207modelTooltip = endpoint.degradationReason;208} else if (endpoint instanceof AutoChatEndpoint) {209modelTooltip = vscode.l10n.t('Auto selects the best model for your request based on capacity and performance.');210const plan = this._authenticationService.copilotToken?.copilotPlan;211const isOrgManaged = plan === 'business' || plan === 'enterprise';212const autoModeHint = this._expService.getTreatmentVariable<string>('copilotchat.autoModelHint');213const showExperimentalHint = !isOrgManaged && !!autoModeHint && experimentalAutoModelHintMarkers.some(marker => autoModeHint.includes(marker));214if (showExperimentalHint) {215modelTooltip = `${modelTooltip} ${vscode.l10n.t('This model may be experimental or in evaluation.')}`;216}217} else {218modelTooltip = getModelCapabilitiesDescription(endpoint);219}220221let modelCategory: { label: string; order: number } | undefined;222if (endpoint instanceof AutoChatEndpoint) {223modelCategory = { label: '', order: Number.MIN_SAFE_INTEGER };224} else if (endpoint.isPremium === undefined || this._authenticationService.copilotToken?.isFreeUser) {225modelCategory = { label: vscode.l10n.t("Copilot Models"), order: 0 };226} else if (endpoint.isPremium) {227modelCategory = { label: vscode.l10n.t("Premium Models"), order: 1 };228} else {229modelCategory = { label: vscode.l10n.t("Standard Models"), order: 0 };230}231232// Counting tokens requires instantiating the tokenizers, which makes this process use a lot of memory.233// Let's cache the results across extension activations234const baseCount = await this._promptBaseCountCache.getBaseCount(endpoint);235const multiplier = endpoint.multiplier !== undefined ? `${endpoint.multiplier}x` : undefined;236let modelDetail: string | undefined;237238if (endpoint instanceof AutoChatEndpoint) {239if (endpoint.discountRange.high === endpoint.discountRange.low && endpoint.discountRange.low !== 0) {240modelDetail = `${endpoint.discountRange.low * 100}% discount`;241} else if (endpoint.discountRange.high !== endpoint.discountRange.low) {242modelDetail = `${endpoint.discountRange.low * 100}% to ${endpoint.discountRange.high * 100}% discount`;243}244}245if (endpoint.customModel) {246const customModel = endpoint.customModel;247modelDetail = customModel.owner_name;248modelTooltip = vscode.l10n.t('{0} is contributed by {1} using {2}.', sanitizedModelName, customModel.owner_name, customModel.key_name);249modelCategory = { label: vscode.l10n.t("Custom Models"), order: 2 };250}251252const session = this._authenticationService.anyGitHubSession;253const isDefault = endpoint === defaultChatEndpoint;254255const model: vscode.LanguageModelChatInformation = {256id: endpoint instanceof AutoChatEndpoint ? AutoChatEndpoint.pseudoModelId : endpoint.model,257name: endpoint instanceof AutoChatEndpoint ? 'Auto' : endpoint.name,258family: endpoint.family,259tooltip: modelTooltip,260pricing: endpoint instanceof AutoChatEndpoint ? undefined : (multiplier ?? (endpoint.tokenPricing ? formatPricingLabel(endpoint.tokenPricing) : undefined)),261multiplierNumeric: endpoint instanceof AutoChatEndpoint ? undefined : endpoint.multiplier,262detail: modelDetail,263category: modelCategory,264statusIcon: endpoint.degradationReason ? new vscode.ThemeIcon('warning') : undefined,265version: endpoint.version,266maxInputTokens: endpoint.modelMaxPromptTokens - baseCount - BaseTokensPerCompletion,267maxOutputTokens: endpoint.maxOutputTokens,268requiresAuthorization: session && { label: session.account.label },269isDefault: {270[ApiChatLocation.Panel]: isDefault,271[ApiChatLocation.Terminal]: isDefault,272[ApiChatLocation.Notebook]: isDefault,273[ApiChatLocation.Editor]: endpoint instanceof AutoChatEndpoint, // inline chat gets 'Auto' by default274},275isUserSelectable: endpoint.showInModelPicker,276capabilities: {277imageInput: endpoint instanceof AutoChatEndpoint ? true : endpoint.supportsVision,278toolCalling: endpoint.supportsToolCalls,279},280...buildConfigurationSchema(endpoint),281};282283models.push(model);284285// Register aliases for this model286const aliases = ModelAliasRegistry.getAliases(model.id);287for (const alias of aliases) {288models.push({289...model,290id: alias,291family: alias,292isUserSelectable: false,293});294}295}296297this._currentModels = models;298this._chatEndpoints = chatEndpoints;299return models;300}301302private async _getEndpointForModel(model: vscode.LanguageModelChatInformation) {303if (model.id === AutoChatEndpoint.pseudoModelId) {304const allEndpoints = await this._endpointProvider.getAllChatEndpoints();305return await this._automodeService.resolveAutoModeEndpoint(undefined, allEndpoints);306}307return this._chatEndpoints.find(e => e.model === ModelAliasRegistry.resolveAlias(model.id));308}309310private async _provideLanguageModelChatResponse(311model: vscode.LanguageModelChatInformation,312messages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2>,313options: vscode.ProvideLanguageModelChatResponseOptions,314progress: vscode.Progress<vscode.LanguageModelResponsePart2>,315token: vscode.CancellationToken316): Promise<void> {317const endpoint = await this._getEndpointForModel(model);318if (!endpoint) {319throw new Error(`Endpoint not found for model ${model.id}`);320}321322return this._lmWrapper.provideLanguageModelResponse(endpoint, messages, {323...options,324modelOptions: options.modelOptions325}, options.requestInitiator, progress, token);326}327328private async _provideTokenCount(329model: vscode.LanguageModelChatInformation,330text: string | vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2,331token: vscode.CancellationToken332): Promise<number> {333const endpoint = await this._getEndpointForModel(model);334if (!endpoint) {335throw new Error(`Endpoint not found for model ${model.id}`);336}337338return this._lmWrapper.provideTokenCount(endpoint, text);339}340341private async _registerEmbeddings(): Promise<void> {342343const dispo = this._register(new MutableDisposable());344345346const update = async () => {347348if (!await this._getToken()) {349dispo.clear();350return;351}352353const embeddingsComputer = this._embeddingsComputer;354const embeddingType = EmbeddingType.text3small_512;355const model = getWellKnownEmbeddingTypeInfo(embeddingType)?.model;356if (!model) {357throw new Error(`No model found for embedding type ${embeddingType.id}`);358}359360dispo.clear();361dispo.value = vscode.lm.registerEmbeddingsProvider(`copilot.${model}`, new class implements vscode.EmbeddingsProvider {362async provideEmbeddings(input: string[], token: vscode.CancellationToken): Promise<vscode.Embedding[]> {363const result = await embeddingsComputer.computeEmbeddings(embeddingType, input, {}, new TelemetryCorrelationId('EmbeddingsProvider::provideEmbeddings'), token);364return result.values.map(embedding => ({ values: embedding.value.slice(0) }));365}366});367};368369this._register(this._authenticationService.onDidAuthenticationChange(() => update()));370await update();371}372373private async _getToken(): Promise<CopilotToken | undefined> {374try {375const copilotToken = await this._authenticationService.getCopilotToken();376return copilotToken;377} catch (e) {378this._logService.warn('[LanguageModelAccess] LanguageModel/Embeddings are not available without auth token');379this._logService.error(e);380return undefined;381}382}383}384385class LanguageModelAccessPromptBaseCountCache {386constructor(387@IVSCodeExtensionContext private readonly _extensionContext: IVSCodeExtensionContext,388@IInstantiationService private readonly _instantiationService: IInstantiationService,389@IEnvService private readonly _envService: IEnvService390) { }391392public async getBaseCount(endpoint: IChatEndpoint): Promise<number> {393const key = `lmBaseCount/${endpoint.model}`;394const cached = this._extensionContext.globalState.get<{ extensionVersion: string; baseCount: number }>(key);395if (cached && cached.extensionVersion === this._envService.getVersion() && typeof cached.baseCount === 'number') {396return cached.baseCount;397}398399const baseCount = await this._computeBaseCount(endpoint);400// Store the computed value along with the extension version so we can401// invalidate the cache when the extension is updated.402try {403await this._extensionContext.globalState.update(key, { extensionVersion: this._envService.getVersion(), baseCount });404} catch (err) {405// Best-effort cache update — don't fail the caller if persisting the406// cache entry fails for any reason.407}408409return baseCount;410}411412private async _computeBaseCount(endpoint: IChatEndpoint): Promise<number> {413const baseCount = await PromptRenderer.create(this._instantiationService, endpoint, LanguageModelAccessPrompt, { noSafety: false, messages: [] }).countTokens();414return baseCount;415}416417}418419/**420* Exported for test421*/422export class CopilotLanguageModelWrapper extends Disposable {423424constructor(425@ITelemetryService private readonly _telemetryService: ITelemetryService,426@IBlockedExtensionService private readonly _blockedExtensionService: IBlockedExtensionService,427@IInstantiationService private readonly _instantiationService: IInstantiationService,428@ILogService private readonly _logService: ILogService,429@IAuthenticationService private readonly _authenticationService: IAuthenticationService,430@IEnvService private readonly _envService: IEnvService,431@IOTelService private readonly _otelService: IOTelService,432@IOctoKitService private readonly _octoKitService: IOctoKitService,433) {434super();435}436437private async _provideLanguageModelResponse(_endpoint: IChatEndpoint, _messages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2>, _options: vscode.ProvideLanguageModelChatResponseOptions, extensionId: string | undefined, callback: FinishedCallback, token: vscode.CancellationToken): Promise<void> {438if (extensionId === 'core') {439extensionId = undefined;440}441442const extensionInfo = !extensionId ? { packageJSON: { version: this._envService.vscodeVersion } } : vscode.extensions.getExtension(extensionId, true);443if (!extensionInfo || typeof extensionInfo.packageJSON.version !== 'string') {444throw new Error('Invalid extension information');445}446const extensionVersion = <string>extensionInfo.packageJSON.version;447448const blockedExtensionMessage = vscode.l10n.t('The extension has been temporarily blocked due to making too many requests. Please try again later.');449if (extensionId && this._blockedExtensionService.isExtensionBlocked(extensionId)) {450throw vscode.LanguageModelError.Blocked(blockedExtensionMessage);451}452453const toolTokenCount = _options.tools ? await this.countToolTokens(_endpoint, _options.tools) : 0;454const baseCount = await PromptRenderer.create(this._instantiationService, _endpoint, LanguageModelAccessPrompt, { noSafety: false, messages: [] }).countTokens();455const tokenLimit = _endpoint.modelMaxPromptTokens - baseCount - BaseTokensPerCompletion - toolTokenCount;456457this.validateRequest(_messages);458if (_options.tools) {459this.validateTools(_options.tools);460}461// Add safety rules to the prompt if it originates from outside the Copilot Chat extension, otherwise they already exist in the prompt.462const { messages, tokenCount } = await PromptRenderer.create(this._instantiationService, {463..._endpoint,464modelMaxPromptTokens: tokenLimit465}, LanguageModelAccessPrompt, { noSafety: extensionId === this._envService.extensionId, messages: _messages }).render();466467/* __GDPR__468"languagemodelrequest" : {469"owner": "jrieken",470"comment": "Data about extensions using the language model",471"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model that is being used" },472"extensionId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The extension identifier for which we make the request" },473"extensionVersion": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The extension version for which we make the request" },474"tokenCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The number of tokens" },475"tokenLimit": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The number of tokens that can be used" }476}477*/478this._telemetryService.sendMSFTTelemetryEvent(479'languagemodelrequest',480{481extensionId,482extensionVersion,483model: _endpoint.model484},485{486tokenCount,487tokenLimit488}489);490491// If no messages they got rendered out due to token limit492if (messages.length === 0 || tokenCount > tokenLimit) {493throw new Error('Message exceeds token limit.');494}495496if (_options.tools && _options.tools.length > 128 && !_endpoint.supportsToolSearch) {497throw new Error('Cannot have more than 128 tools per request.');498}499500const endpoint: IChatEndpoint = new Proxy(_endpoint, {501get: function (target, prop, receiver) {502if (prop === 'getExtraHeaders') {503return function () {504const extraHeaders = target.getExtraHeaders?.() ?? {};505if (!extensionId) {506return extraHeaders;507}508return {509...extraHeaders,510'x-onbehalf-extension-id': `${extensionId}/${extensionVersion}`,511};512};513}514if (prop === 'acquireTokenizer') {515return target.acquireTokenizer.bind(target);516}517return Reflect.get(target, prop, receiver);518}519});520521522const options: OptionalChatRequestParams = LanguageModelOptions.Default.convert(_options.modelOptions ?? {});523const telemetryProperties = { messageSource: `api.${extensionId}` };524525options.tools = _options.tools?.map((tool): OpenAiFunctionTool => {526return {527type: 'function',528function: {529name: tool.name,530description: tool.description,531parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined532}533};534});535if (_options.toolMode === vscode.LanguageModelChatToolMode.Required && _options.tools?.length && _options.tools.length > 1) {536throw new Error('LanguageModelChatToolMode.Required is not supported with more than one tool');537}538539options.tool_choice = _options.toolMode === vscode.LanguageModelChatToolMode.Required && _options.tools?.length ?540{ type: 'function', function: { name: _options.tools[0].name } } :541undefined;542543// Restore CapturingToken context if correlation ID was passed through modelOptions.544// This handles BYOK providers where the original AsyncLocalStorage context was lost545// when crossing the VS Code IPC boundary.546const correlationId = (_options as { modelOptions?: OTelModelOptions }).modelOptions?._capturingTokenCorrelationId;547const capturingToken = correlationId ? retrieveCapturingTokenByCorrelation(correlationId) : undefined;548549// Restore OTel trace context if passed through modelOptions.550// This links the wrapper's chat span back to the original invoke_agent trace.551const parentTraceContext = (_options as { modelOptions?: OTelModelOptions }).modelOptions?._otelTraceContext ?? undefined;552553const makeRequest = () => endpoint.makeChatRequest2({554debugName: 'copilotLanguageModelWrapper',555messages,556finishedCb: callback,557location: ChatLocation.Other,558source: { extensionId },559requestOptions: options,560userInitiatedRequest: !!extensionId,561telemetryProperties,562modelCapabilities: {563reasoningEffort: typeof _options.modelConfiguration?.reasoningEffort === 'string' ? _options.modelConfiguration.reasoningEffort : undefined,564},565}, token);566567// Run request within the parent OTel context (no extra span) so chat spans in chatMLFetcher inherit the agent trace568const wrappedRequest = parentTraceContext569? () => this._otelService.runWithTraceContext(parentTraceContext, async () => {570return capturingToken571? await runWithCapturingToken(capturingToken, makeRequest)572: await makeRequest();573})574: () => capturingToken575? runWithCapturingToken(capturingToken, makeRequest)576: makeRequest();577578const result = await wrappedRequest();579580if (result.type !== ChatFetchResponseType.Success) {581if (result.type === ChatFetchResponseType.ExtensionBlocked) {582if (extensionId) {583this._blockedExtensionService.reportBlockedExtension(extensionId, result.retryAfter);584}585586throw vscode.LanguageModelError.Blocked(blockedExtensionMessage);587} else if (result.type === ChatFetchResponseType.QuotaExceeded) {588const outageStatus = await this._octoKitService.getGitHubOutageStatus();589const details = getErrorDetailsFromChatFetchError(result, (await this._authenticationService.getCopilotToken()).copilotPlan, outageStatus);590const err = new vscode.LanguageModelError(details.message);591err.name = 'ChatQuotaExceeded';592throw err;593} else if (result.type === ChatFetchResponseType.RateLimited) {594const err = new Error(result.reason);595err.name = 'ChatRateLimited';596throw err;597}598599throw new Error(result.reason);600}601602this._telemetryService.sendInternalMSFTTelemetryEvent(603'languagemodelrequest',604{605extensionId,606extensionVersion,607requestid: result.requestId,608query: getTextPart(messages[messages.length - 1].content),609model: _endpoint.model610},611{612tokenCount,613tokenLimit614}615);616}617618async provideLanguageModelResponse(endpoint: IChatEndpoint, messages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2>, options: vscode.ProvideLanguageModelChatResponseOptions, extensionId: string | undefined, progress: vscode.Progress<LMResponsePart>, token: vscode.CancellationToken): Promise<void> {619let thinkingActive = false;620const finishCallback: FinishedCallback = async (_text, index, delta): Promise<undefined> => {621if (delta.thinking) {622// Show thinking progress for unencrypted thinking deltas623if (!isEncryptedThinkingDelta(delta.thinking)) {624const text = delta.thinking.text ?? '';625progress.report(new vscode.LanguageModelThinkingPart(text, delta.thinking.id, delta.thinking.metadata));626thinkingActive = true;627}628} else if (thinkingActive) {629progress.report(new vscode.LanguageModelThinkingPart('', '', { vscode_reasoning_done: true }));630thinkingActive = false;631}632if (delta.text) {633progress.report(new vscode.LanguageModelTextPart(delta.text));634}635if (delta.copilotToolCalls) {636for (const call of delta.copilotToolCalls) {637try {638// Anthropic models send "" (empty string) for tools with no parameters.639const parameters = JSON.parse(call.arguments || '{}');640progress.report(new vscode.LanguageModelToolCallPart(call.id, call.name, parameters));641} catch (err) {642this._logService.error(err, `Got invalid JSON for tool call: ${call.arguments}`);643throw new Error('Invalid JSON for tool call');644}645}646}647648if (delta.statefulMarker) {649progress.report(650new vscode.LanguageModelDataPart(encodeStatefulMarker(endpoint.model, delta.statefulMarker), CustomDataPartMimeTypes.StatefulMarker)651);652}653654return undefined;655};656return this._provideLanguageModelResponse(endpoint, messages, options, extensionId, finishCallback, token);657}658659async provideTokenCount(endpoint: IEndpoint, message: string | vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2): Promise<number> {660if (typeof message === 'string') {661return endpoint.acquireTokenizer().tokenLength(message);662} else {663let raw: Raw.ChatMessage;664665const content = message.content.map((part): Raw.ChatCompletionContentPart | undefined => {666if (part instanceof vscode.LanguageModelTextPart) {667return { type: Raw.ChatCompletionContentPartKind.Text, text: part.value };668} else if (part instanceof vscode.LanguageModelDataPart && part.mimeType === 'application/pdf') {669return { type: Raw.ChatCompletionContentPartKind.Document, documentData: { data: Buffer.from(part.data).toString('base64'), mediaType: part.mimeType } };670} else if (isImageDataPart(part)) {671return { type: Raw.ChatCompletionContentPartKind.Image, imageUrl: { url: `data:${part.mimeType};base64,${Buffer.from(part.data).toString('base64url')}` } };672} else {673return undefined;674}675}).filter(isDefined);676switch (message.role) {677case vscode.LanguageModelChatMessageRole.User:678raw = { role: Raw.ChatRole.User, content, name: message.name };679break;680case vscode.LanguageModelChatMessageRole.System:681raw = { role: Raw.ChatRole.Assistant, content, name: message.name };682break;683case vscode.LanguageModelChatMessageRole.Assistant:684raw = {685role: Raw.ChatRole.Assistant,686content,687name: message.name,688toolCalls: message.content689.filter(part => part instanceof vscode.LanguageModelToolCallPart)690.map(part => part as vscode.LanguageModelToolCallPart)691.map(part => ({ function: { name: part.name, arguments: JSON.stringify(part.input) }, id: part.callId, type: 'function' })),692};693break;694default:695return 0;696}697698return endpoint.acquireTokenizer().countMessageTokens(raw);699}700}701702private validateTools(tools: readonly vscode.LanguageModelChatTool[]): void {703for (const tool of tools) {704if (!tool.name.match(/^[\w-]+$/)) {705throw new Error(`Invalid tool name "${tool.name}": only alphanumeric characters, hyphens, and underscores are allowed.`);706}707}708}709710private async countToolTokens(endpoint: IChatEndpoint, tools: readonly vscode.LanguageModelChatTool[]): Promise<number> {711return await endpoint.acquireTokenizer().countToolTokens(tools);712}713714private validateRequest(_messages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2>): void {715const lastMessage = _messages.at(-1);716if (!lastMessage) {717throw new Error('Invalid request: no messages.');718}719720_messages.forEach((message, i) => {721if (message.role === vscode.LanguageModelChatMessageRole.Assistant) {722// Filter out DataPart since it does not share the same value type and does not have callId, function, etc.723const filteredContent = message.content.filter(part => part instanceof vscode.LanguageModelDataPart);724const toolCallIds = new Set(filteredContent725.filter(part => part instanceof vscode.LanguageModelToolCallPart)726.map(part => part.callId));727let nextMessageIdx = i + 1;728const errMsg = 'Invalid request: Tool call part must be followed by a User message with a LanguageModelToolResultPart with a matching callId.';729while (toolCallIds.size > 0) {730const nextMessage = _messages.at(nextMessageIdx++);731if (!nextMessage || nextMessage.role !== vscode.LanguageModelChatMessageRole.User) {732throw new Error(errMsg);733}734735nextMessage.content.forEach(part => {736if (!(part instanceof vscode.LanguageModelToolResultPart2 || part instanceof vscode.LanguageModelToolResultPart)) {737throw new Error(errMsg);738}739740toolCallIds.delete(part.callId);741});742}743}744});745}746}747748749function or(...checks: ((value: unknown) => boolean)[]): (value: unknown) => boolean {750return (value) => checks.some(check => check(value));751}752753class LanguageModelOptions {754755private static _defaultDesc: Record<string, (value: unknown) => boolean> = {756stop: or(isStringArray, isString),757temperature: isNumber,758max_tokens: isNumber,759frequency_penalty: isNumber,760presence_penalty: isNumber,761};762763static Default = new LanguageModelOptions({ ...this._defaultDesc });764765constructor(private _description: Record<string, (value: unknown) => boolean>) { }766767convert(options: { [name: string]: unknown }): Record<string, number | boolean | string> {768const result: Record<string, number | boolean | string> = {};769for (const key in this._description) {770const isValid = this._description[key];771const value = options[key];772if (value !== null && value !== undefined && isValid(value)) {773// Type guards ensure we only add values of the correct type774if (isNumber(value) || isBoolean(value) || isString(value)) {775result[key] = value;776}777}778}779return result;780}781}782783784