Path: blob/main/extensions/copilot/src/platform/endpoint/vscode-node/extChatEndpoint.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Raw } from '@vscode/prompt-tsx';6import type { CancellationToken } from 'vscode';7import * as vscode from 'vscode';8import { FetchStreamRecorder } from '../../../platform/chat/common/chatMLFetcher';9import { toErrorMessage } from '../../../util/common/errorMessage';10import { ITokenizer, TokenizerType } from '../../../util/common/tokenizer';11import { AsyncIterableObject } from '../../../util/vs/base/common/async';12import { generateUuid } from '../../../util/vs/base/common/uuid';13import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';14import { ChatFetchResponseType, ChatLocation, ChatResponse } from '../../chat/common/commonTypes';15import { ILogService } from '../../log/common/logService';16import { ContextManagementResponse } from '../../networking/common/anthropic';17import { FinishedCallback, OpenAiFunctionTool, OptionalChatRequestParams } from '../../networking/common/fetch';18import { Response } from '../../networking/common/fetcherService';19import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IMakeChatRequestOptions } from '../../networking/common/networking';20import { ChatCompletion } from '../../networking/common/openai';21import { IOTelService } from '../../otel/common/otelService';22import { retrieveCapturingTokenByCorrelation, storeCapturingTokenForCorrelation } from '../../requestLogger/node/requestLogger';23import { ITelemetryService } from '../../telemetry/common/telemetry';24import { TelemetryData } from '../../telemetry/common/telemetryData';25import { EndpointEditToolName, isEndpointEditToolName } from '../common/endpointProvider';26import { CustomDataPartMimeTypes } from '../common/endpointTypes';27import { decodeStatefulMarker, encodeStatefulMarker, rawPartAsStatefulMarker } from '../common/statefulMarkerContainer';28import { rawPartAsThinkingData } from '../common/thinkingDataContainer';29import { ExtensionContributedChatTokenizer } from './extChatTokenizer';3031enum ChatImageMimeType {32PNG = 'image/png',33JPEG = 'image/jpeg',34GIF = 'image/gif',35WEBP = 'image/webp',36BMP = 'image/bmp',37}3839export class ExtensionContributedChatEndpoint implements IChatEndpoint {40private readonly _maxTokens: number;41public readonly isDefault: boolean = false;42public readonly isFallback: boolean = false;43public readonly isPremium: boolean = false;44public readonly multiplier: number = 0;45public readonly isExtensionContributed = true;46public readonly supportedEditTools?: readonly EndpointEditToolName[] | undefined;4748constructor(49private readonly languageModel: vscode.LanguageModelChat,50@IInstantiationService private readonly _instantiationService: IInstantiationService,51@IOTelService private readonly _otelService: IOTelService,52) {53// Initialize with the model's max tokens54this._maxTokens = languageModel.maxInputTokens;55this.supportedEditTools = languageModel.capabilities.editToolsHint?.filter(isEndpointEditToolName);56}5758get modelProvider(): string {59return this.languageModel.vendor;60}6162get modelMaxPromptTokens(): number {63return this._maxTokens;64}6566get maxOutputTokens(): number {67// The VS Code API doesn't expose max output tokens, use a reasonable default68return 8192;69}7071get urlOrRequestMetadata(): string {72// Not used for extension contributed endpoints73return '';74}7576get model(): string {77return this.languageModel.id;78}7980get name(): string {81return this.languageModel.name;82}8384get version(): string {85return this.languageModel.version;86}8788get family(): string {89return this.languageModel.family;90}9192get tokenizer(): TokenizerType {93// Most language models use the O200K tokenizer, if they don't they should specify in their metadata94return TokenizerType.O200K;95}9697get showInModelPicker(): boolean {98// TODO @lramos15 - Need some API exposed for this, registration seems to have it99return true;100}101102get supportsToolCalls(): boolean {103return this.languageModel.capabilities?.supportsToolCalling ?? false;104}105106get supportsVision(): boolean {107return this.languageModel?.capabilities?.supportsImageToText ?? false;108}109110get supportsPrediction(): boolean {111return false;112}113114get policy(): 'enabled' | { terms: string } {115return 'enabled';116}117118async processResponseFromChatEndpoint(119telemetryService: ITelemetryService,120logService: ILogService,121response: Response,122expectedNumChoices: number,123finishCallback: FinishedCallback,124telemetryData: TelemetryData,125cancellationToken?: CancellationToken126): Promise<AsyncIterableObject<ChatCompletion>> {127throw new Error('processResponseFromChatEndpoint not supported for extension contributed endpoints');128}129130async acceptChatPolicy(): Promise<boolean> {131return true;132}133134public acquireTokenizer(): ITokenizer {135// Use the extension-contributed tokenizer that leverages the VS Code language model API136return new ExtensionContributedChatTokenizer(this.languageModel);137}138139async makeChatRequest(140debugName: string,141messages: Raw.ChatMessage[],142finishedCb: FinishedCallback | undefined,143token: CancellationToken,144location: ChatLocation,145source?: { extensionId?: string | undefined },146requestOptions?: Omit<OptionalChatRequestParams, 'n'>,147userInitiatedRequest?: boolean,148telemetryProperties?: Record<string, string>,149): Promise<ChatResponse> {150return this.makeChatRequest2({151debugName,152messages,153finishedCb,154location,155source,156requestOptions,157userInitiatedRequest,158telemetryProperties,159}, token);160}161162async makeChatRequest2({163debugName,164messages,165requestOptions,166finishedCb,167location,168source,169}: IMakeChatRequestOptions, token: CancellationToken): Promise<ChatResponse> {170const vscodeMessages = convertToApiChatMessage(messages);171const ourRequestId = generateUuid();172173// Capture active OTel trace context to propagate through IPC to the BYOK provider.174// Each provider creates its own chat span with full usage data:175// - OpenAI-compatible (Azure, OpenAI, etc.): via CopilotLanguageModelWrapper → chatMLFetcher176// - Anthropic: inside AnthropicLMProvider177// - Gemini: inside GeminiNativeBYOKLMProvider178const activeTraceCtx = this._otelService.getActiveTraceContext();179180const vscodeOptions: vscode.LanguageModelChatRequestOptions = {181tools: ((requestOptions?.tools ?? []) as OpenAiFunctionTool[]).map(tool => ({182name: tool.function.name,183description: tool.function.description,184inputSchema: tool.function.parameters,185})),186// Pass correlation ID and OTel trace context through modelOptions for cross-IPC restoration.187modelOptions: {188_capturingTokenCorrelationId: ourRequestId,189_otelTraceContext: activeTraceCtx ?? null,190}191};192193// Store current CapturingToken for retrieval by BYOK providers after IPC crossing194//195// Note: We intentionally don't create an OTel chat span here for extension-contributed models.196// The BYOK provider (CopilotLanguageModelWrapper) creates the real chat span via chatMLFetcher197// with full token usage, response model, and cache data. Creating a span here would duplicate it.198storeCapturingTokenForCorrelation(ourRequestId);199200const streamRecorder = new FetchStreamRecorder(finishedCb);201202try {203const response = await this.languageModel.sendRequest(vscodeMessages, vscodeOptions, token);204let text = '';205let numToolsCalled = 0;206const requestId = ourRequestId;207208// consume stream209for await (const chunk of response.stream) {210if (chunk instanceof vscode.LanguageModelTextPart) {211text += chunk.value;212if (streamRecorder.callback) {213await streamRecorder.callback(text, 0, { text: chunk.value });214}215} else if (chunk instanceof vscode.LanguageModelToolCallPart) {216if (streamRecorder.callback) {217const functionCalls = [chunk].map(tool => ({218name: tool.name ?? '',219arguments: JSON.stringify(tool.input) ?? '',220id: tool.callId221}));222numToolsCalled++;223await streamRecorder.callback(text, 0, { text: '', copilotToolCalls: functionCalls });224}225} else if (chunk instanceof vscode.LanguageModelDataPart) {226if (chunk.mimeType === CustomDataPartMimeTypes.StatefulMarker) {227const decoded = decodeStatefulMarker(chunk.data);228await streamRecorder.callback?.(text, 0, { text: '', statefulMarker: decoded.marker });229} else if (chunk.mimeType === CustomDataPartMimeTypes.ContextManagement) {230const contextManagement = JSON.parse(new TextDecoder().decode(chunk.data)) as ContextManagementResponse;231await streamRecorder.callback?.(text, 0, { text: '', contextManagement });232}233} else if (chunk instanceof vscode.LanguageModelThinkingPart) {234if (streamRecorder.callback) {235await streamRecorder.callback(text, 0, {236text: '',237thinking: {238text: chunk.value,239id: chunk.id || '',240metadata: chunk.metadata241}242});243}244}245}246247if (text || numToolsCalled > 0) {248return {249type: ChatFetchResponseType.Success,250requestId,251serverRequestId: requestId,252usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0, prompt_tokens_details: { cached_tokens: 0 } },253value: text,254resolvedModel: this.languageModel.id255};256} else {257return {258type: ChatFetchResponseType.Unknown,259reason: 'No response from language model',260requestId: requestId,261serverRequestId: undefined262};263}264} catch (e) {265return {266type: ChatFetchResponseType.Failed,267reason: toErrorMessage(e, true),268requestId: generateUuid(),269serverRequestId: undefined270};271} finally {272retrieveCapturingTokenByCorrelation(ourRequestId);273}274}275276createRequestBody(options: ICreateEndpointBodyOptions): IEndpointBody {277throw new Error('unreachable'); // this endpoint does not call into fetchers278}279280cloneWithTokenOverride(modelMaxPromptTokens: number): IChatEndpoint {281return this._instantiationService.createInstance(ExtensionContributedChatEndpoint, {282...this.languageModel,283maxInputTokens: modelMaxPromptTokens284});285}286}287288export function convertToApiChatMessage(messages: Raw.ChatMessage[]): Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2> {289const apiMessages: Array<vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2> = [];290for (const message of messages) {291const apiContent: Array<vscode.LanguageModelTextPart | vscode.LanguageModelToolResultPart2 | vscode.LanguageModelToolCallPart | vscode.LanguageModelDataPart | vscode.LanguageModelThinkingPart> = [];292// Easier to work with arrays everywhere, rather than string in some cases. So convert to a single text content part293for (const contentPart of message.content) {294if (contentPart.type === Raw.ChatCompletionContentPartKind.Text) {295apiContent.push(new vscode.LanguageModelTextPart(contentPart.text));296} else if (contentPart.type === Raw.ChatCompletionContentPartKind.Image) {297// Handle base64 encoded images298if (contentPart.imageUrl.url.startsWith('data:')) {299const dataUrlRegex = /^data:([^;]+);base64,(.*)$/;300const match = contentPart.imageUrl.url.match(dataUrlRegex);301302if (match) {303const [, mimeType, base64Data] = match;304apiContent.push(new vscode.LanguageModelDataPart(Buffer.from(base64Data, 'base64'), mimeType as ChatImageMimeType));305}306} else {307// Not a base64 image308continue;309}310} else if (contentPart.type === Raw.ChatCompletionContentPartKind.CacheBreakpoint) {311apiContent.push(new vscode.LanguageModelDataPart(new TextEncoder().encode('ephemeral'), CustomDataPartMimeTypes.CacheControl));312} else if (contentPart.type === Raw.ChatCompletionContentPartKind.Opaque) {313const statefulMarker = rawPartAsStatefulMarker(contentPart);314if (statefulMarker) {315apiContent.push(new vscode.LanguageModelDataPart(encodeStatefulMarker(statefulMarker.modelId, statefulMarker.marker), CustomDataPartMimeTypes.StatefulMarker));316}317const thinkingData = rawPartAsThinkingData(contentPart);318if (thinkingData) {319apiContent.push(new vscode.LanguageModelThinkingPart(thinkingData.text, thinkingData.id, thinkingData.metadata));320}321}322}323324if (message.role === Raw.ChatRole.System || message.role === Raw.ChatRole.User) {325apiMessages.push({326role: message.role === Raw.ChatRole.System ? vscode.LanguageModelChatMessageRole.System : vscode.LanguageModelChatMessageRole.User,327name: message.name,328content: apiContent329});330} else if (message.role === Raw.ChatRole.Assistant) {331if (message.toolCalls) {332for (const toolCall of message.toolCalls) {333apiContent.push(new vscode.LanguageModelToolCallPart(toolCall.id, toolCall.function.name, JSON.parse(toolCall.function.arguments)));334}335}336apiMessages.push({337role: vscode.LanguageModelChatMessageRole.Assistant,338name: message.name,339content: apiContent340});341} else if (message.role === Raw.ChatRole.Tool) {342const toolResultPart: vscode.LanguageModelToolResultPart2 = new vscode.LanguageModelToolResultPart2(343message.toolCallId ?? '',344apiContent345);346apiMessages.push({347role: vscode.LanguageModelChatMessageRole.User,348name: '',349content: [toolResultPart]350});351}352}353return apiMessages;354}355356357