Path: blob/main/extensions/copilot/src/extension/chatSessions/claude/node/claudeLanguageModelServer.ts
13405 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { MessageParam } from '@anthropic-ai/sdk/resources';6import { RequestMetadata, RequestType } from '@vscode/copilot-api';7import { Raw } from '@vscode/prompt-tsx';8import * as http from 'http';9import { IChatMLFetcher, Source } from '../../../../platform/chat/common/chatMLFetcher';10import { ChatLocation, ChatResponse } from '../../../../platform/chat/common/commonTypes';11import { CustomModel, EndpointEditToolName } from '../../../../platform/endpoint/common/endpointProvider';12import { AnthropicMessagesProcessor } from '../../../../platform/endpoint/node/messagesApi';13import { ILogService } from '../../../../platform/log/common/logService';14import { IOTelService } from '../../../../platform/otel/common/otelService';15import { FinishedCallback, getRequestId, OptionalChatRequestParams } from '../../../../platform/networking/common/fetch';16import { Response } from '../../../../platform/networking/common/fetcherService';17import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IEndpointFetchOptions, IMakeChatRequestOptions } from '../../../../platform/networking/common/networking';18import { ChatCompletion } from '../../../../platform/networking/common/openai';19import { IRequestLogger } from '../../../../platform/requestLogger/common/requestLogger';20import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry';21import { TelemetryData } from '../../../../platform/telemetry/common/telemetryData';22import { ITokenizer, TokenizerType } from '../../../../util/common/tokenizer';23import { AsyncIterableObject } from '../../../../util/vs/base/common/async';24import { CancellationToken, CancellationTokenSource } from '../../../../util/vs/base/common/cancellation';25import { Disposable, toDisposable } from '../../../../util/vs/base/common/lifecycle';26import { SSEParser } from '../../../../util/vs/base/common/sseParser';27import { generateUuid } from '../../../../util/vs/base/common/uuid';28import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';29import { IClaudeCodeModels } from './claudeCodeModels';30import { IClaudeSessionStateService } from '../common/claudeSessionStateService';3132/**33* A list of known Anthropic betas supported by CAPI. Used to filter incoming `anthropic-beta` header values34* to prevent unsupported betas from being sent to CAPI.35*/36const SUPPORTED_ANTHROPIC_BETAS = [37'interleaved-thinking',38'context-management',39'advanced-tool-use',40];4142export interface IClaudeLanguageModelServerConfig {43readonly port: number;44readonly nonce: string;45}4647interface AnthropicMessagesRequest {48model: string;49messages: MessageParam[];50system?: string | Array<{ type: 'text'; text: string }>;51max_tokens?: number;52stream?: boolean;53tools?: unknown[];54[key: string]: unknown;55}5657interface AnthropicErrorResponse {58type: 'error';59error: {60type: 'invalid_request_error' | 'authentication_error' | 'permission_error' | 'not_found_error' | 'rate_limit_error' | 'api_error';61message: string;62};63}6465const DEFAULT_MAX_TOKENS = 200_000;66const DEFAULT_MAX_OUTPUT_TOKENS = 64_000;6768/**69* HTTP server that provides an Anthropic Messages API compatible endpoint.70* Acts as a pure pass-through proxy to the underlying model endpoint.71*/72export class ClaudeLanguageModelServer extends Disposable {73private server: http.Server;74private config: IClaudeLanguageModelServerConfig;75private readonly _userInitiatedMessageCounts = new Map<string, number>();7677constructor(78@ILogService private readonly logService: ILogService,79@IClaudeSessionStateService private readonly sessionStateService: IClaudeSessionStateService,80@IRequestLogger private readonly requestLogger: IRequestLogger,81@IInstantiationService private readonly instantiationService: IInstantiationService,82@IClaudeCodeModels private readonly claudeCodeModels: IClaudeCodeModels,83@IOTelService private readonly _otelService: IOTelService,84) {85super();86this.config = {87port: 0, // Will be set to random available port88nonce: 'vscode-lm-' + generateUuid()89};9091this.server = this.createServer();92this._register(toDisposable(() => this.stop()));93}9495private createServer(): http.Server {96return http.createServer(async (req, res) => {97this.trace(`Received request: ${req.method} ${req.url}`);9899if (req.method === 'OPTIONS') {100res.writeHead(200);101res.end();102return;103}104105// Handle /v1/messages endpoint (also //messages if base URL ends in /)106// Use URL to properly parse and extract pathname, ignoring query string107const pathname = new URL(req.url ?? '/', 'http://localhost').pathname;108if (req.method === 'POST' && (pathname === '/v1/messages' || pathname === '/messages' || pathname === '//messages')) {109await this.handleMessagesRequest(req, res);110return;111}112113if (req.method === 'GET' && req.url === '/') {114res.writeHead(200);115res.end('Hello from ClaudeLanguageModelServer');116return;117}118119this.sendErrorResponse(res, 404, 'not_found_error', 'Not found');120});121}122123private async handleMessagesRequest(req: http.IncomingMessage, res: http.ServerResponse) {124try {125const body = await this.readRequestBody(req);126const auth = extractSessionId(req.headers, this.config.nonce);127if (!auth.valid) {128this.error('Invalid auth key');129this.sendErrorResponse(res, 401, 'authentication_error', 'Invalid authentication');130return;131}132133await this.handleAuthedMessagesRequest(body, req.headers, res, auth.sessionId);134} catch (error) {135this.sendErrorResponse(res, 500, 'api_error', error instanceof Error ? error.message : String(error));136}137return;138}139140private async readRequestBody(req: http.IncomingMessage): Promise<string> {141return new Promise((resolve, reject) => {142let body = '';143req.on('data', chunk => {144body += chunk.toString();145});146req.on('end', () => {147resolve(body);148});149req.on('error', reject);150});151}152153private async handleAuthedMessagesRequest(bodyString: string, headers: http.IncomingHttpHeaders, res: http.ServerResponse, sessionId: string | undefined): Promise<void> {154// Create cancellation token for the request155const tokenSource = new CancellationTokenSource();156157try {158const requestBody: AnthropicMessagesRequest = JSON.parse(bodyString);159160const fallbackModelId = sessionId ? this.sessionStateService.getModelIdForSession(sessionId) : undefined;161const selectedEndpoint = await this.claudeCodeModels.resolveEndpoint(requestBody.model, fallbackModelId);162if (!selectedEndpoint) {163this.error('No model found matching criteria');164this.sendErrorResponse(res, 404, 'not_found_error', 'No model found matching criteria');165return;166}167this.trace(`Session ${sessionId}: model=${selectedEndpoint.model}`);168requestBody.model = selectedEndpoint.model;169// Determine if this is a user-initiated message using counter-based approach170const count = this._userInitiatedMessageCounts.get(selectedEndpoint.model) ?? 0;171const isUserInitiatedMessage = count > 0;172if (isUserInitiatedMessage) {173this._userInitiatedMessageCounts.set(selectedEndpoint.model, count - 1);174}175176// Set up streaming response177res.writeHead(200, {178'Content-Type': 'text/event-stream',179'Cache-Control': 'no-cache',180'Connection': 'keep-alive',181});182183// Handle client disconnect184let requestComplete = false;185res.on('close', () => {186if (!requestComplete) {187this.info('Client disconnected before request complete');188}189190tokenSource.cancel();191});192193const endpointRequestBody = requestBody as IEndpointBody;194const streamingEndpoint = this.instantiationService.createInstance(195ClaudeStreamingPassThroughEndpoint,196selectedEndpoint,197res,198endpointRequestBody,199headers,200'vscode_claude_code',201{202modelMaxPromptTokens: DEFAULT_MAX_TOKENS - DEFAULT_MAX_OUTPUT_TOKENS,203maxOutputTokens: DEFAULT_MAX_OUTPUT_TOKENS204},205sessionId206);207208let messagesForLogging: Raw.ChatMessage[] = [];209try {210// Don't fail based on any assumptions about the shape of the request211messagesForLogging = Array.isArray(requestBody.messages) ?212messagesApiInputToRawMessagesForLogging(requestBody) :213[];214} catch (e) {215this.exception(e as Error, `Failed to parse messages for logging`);216}217218const capturingToken = sessionId ? this.sessionStateService.getCapturingTokenForSession(sessionId) : undefined;219const sessionReasoningEffort = sessionId ? this.sessionStateService.getReasoningEffortForSession(sessionId) : undefined;220const reasoningEffort = sessionReasoningEffort && selectedEndpoint.supportsReasoningEffort?.includes(sessionReasoningEffort)221? sessionReasoningEffort222: undefined;223224const doRequest = () => streamingEndpoint.makeChatRequest2({225debugName: 'Claude Copilot Proxy',226messages: messagesForLogging,227finishedCb: async () => undefined,228location: ChatLocation.MessagesProxy,229modelCapabilities: { enableThinking: true, reasoningEffort },230userInitiatedRequest: isUserInitiatedMessage231}, tokenSource.token);232233// Wrap in trace context so chat spans are parented to the invoke_agent span234const traceContext = sessionId ? this.sessionStateService.getTraceContextForSession(sessionId) : undefined;235const doRequestInContext = traceContext236? () => this._otelService.runWithTraceContext(traceContext, doRequest)237: doRequest;238239if (capturingToken) {240await this.requestLogger.captureInvocation(capturingToken, doRequestInContext);241} else {242await doRequestInContext();243}244245requestComplete = true;246247res.end();248} catch (error) {249this.sendErrorResponse(res, 500, 'api_error', error instanceof Error ? error.message : String(error));250} finally {251tokenSource.dispose();252}253}254255private sendErrorResponse(256res: http.ServerResponse,257statusCode: number,258errorType: AnthropicErrorResponse['error']['type'],259message: string260): void {261const errorResponse: AnthropicErrorResponse = {262type: 'error',263error: {264type: errorType,265message266}267};268res.writeHead(statusCode, { 'Content-Type': 'application/json' });269res.end(JSON.stringify(errorResponse));270}271272public async start(): Promise<void> {273if (this.config.port !== 0) {274// Already started275return;276}277278return new Promise((resolve, reject) => {279this.server.listen(0, '127.0.0.1', () => {280const address = this.server.address();281if (address && typeof address === 'object') {282this.config = {283...this.config,284port: address.port285};286this.info(`Claude Language Model Server started on http://localhost:${this.config.port}`);287resolve();288return;289}290291reject(new Error('Failed to start server'));292});293});294}295296public stop(): void {297this.server.close();298}299300public getConfig(): IClaudeLanguageModelServerConfig {301return { ...this.config };302}303304/**305* Increments the user-initiated message count for a given model.306* Called when a user sends a new message in a Claude session.307*/308public incrementUserInitiatedMessageCount(modelId: string): void {309const current = this._userInitiatedMessageCounts.get(modelId) ?? 0;310this._userInitiatedMessageCounts.set(modelId, current + 1);311}312313private info(message: string): void {314const messageWithClassName = `[ClaudeLanguageModelServer] ${message}`;315this.logService.info(messageWithClassName);316}317318private error(message: string): void {319const messageWithClassName = `[ClaudeLanguageModelServer] ${message}`;320this.logService.error(messageWithClassName);321}322323private exception(err: Error, message?: string): void {324this.logService.error(err, message);325}326327private trace(message: string): void {328const messageWithClassName = `[ClaudeLanguageModelServer] ${message}`;329this.logService.trace(messageWithClassName);330}331}332333export interface ExtractSessionIdResult {334/** Whether the auth nonce is valid. */335readonly valid: boolean;336/** The session ID, if present in the `nonce.sessionId` format. `undefined` for legacy (nonce-only) format. */337readonly sessionId: string | undefined;338}339340/**341* Extracts and validates the session ID from HTTP request headers.342* Reads the `Authorization: Bearer <nonce>.<sessionId>` header set via `ANTHROPIC_AUTH_TOKEN`.343*344* The `x-api-key` header is intentionally ignored to prevent the user's personal345* `ANTHROPIC_API_KEY` environment variable from interfering with authentication.346*/347export function extractSessionId(headers: http.IncomingHttpHeaders, expectedNonce: string): ExtractSessionIdResult {348let apiKey: string | undefined;349350// Check Authorization header with Bearer prefix (set via ANTHROPIC_AUTH_TOKEN)351const authHeader = headers['authorization'];352if (typeof authHeader === 'string' && authHeader.startsWith('Bearer ')) {353apiKey = authHeader.slice(7); // Remove "Bearer " prefix354}355356if (!apiKey) {357return { valid: false, sessionId: undefined };358}359360// Parse `nonce.sessionId` format361const dotIndex = apiKey.indexOf('.');362if (dotIndex === -1) {363// Legacy format without session ID — validate nonce only364return { valid: apiKey === expectedNonce, sessionId: undefined };365}366367const nonce = apiKey.slice(0, dotIndex);368const sessionId = apiKey.slice(dotIndex + 1);369const valid = nonce === expectedNonce;370return { valid, sessionId: valid ? sessionId : undefined };371}372373/**374* Filters a comma-separated `anthropic-beta` header value to only include375* betas that match {@link SUPPORTED_ANTHROPIC_BETAS}. Entries are matched by376* prefix so that e.g. `'context-management'` allows `'context-management-2025-06-27'`.377*378* Returns the filtered comma-separated string, or `undefined` if no betas matched.379*/380export function filterSupportedBetas(headerValue: string): string | undefined {381const filtered = headerValue382.split(',')383.map(b => b.trim())384.filter(b => b && SUPPORTED_ANTHROPIC_BETAS.some(supported => b.startsWith(supported + '-')));385386return filtered.length > 0 ? filtered.join(',') : undefined;387}388389/**390* Converts Anthropic Messages API input to Raw.ChatMessage[] for logging purposes.391*/392function messagesApiInputToRawMessagesForLogging(request: AnthropicMessagesRequest): Raw.ChatMessage[] {393const messages: Raw.ChatMessage[] = [];394395// Add system message if present396if (request.system) {397const systemText = typeof request.system === 'string'398? request.system399: request.system.map(block => block.text).join('\n');400messages.push({401role: Raw.ChatRole.System,402content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: systemText }]403});404}405406// Convert each message407for (const msg of request.messages ?? []) {408const role = msg.role === 'user' ? Raw.ChatRole.User : Raw.ChatRole.Assistant;409const content: Raw.ChatCompletionContentPart[] = [];410411if (typeof msg.content === 'string') {412content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: msg.content });413} else if (Array.isArray(msg.content)) {414for (const block of msg.content) {415if (block.type === 'text') {416content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: block.text });417} else if (block.type === 'image') {418// Handle image blocks if needed for logging419content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: '[image]' });420} else if (block.type === 'tool_use') {421content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: `[tool_use: ${block.name}]` });422} else if (block.type === 'tool_result') {423content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: `[tool_result: ${block.tool_use_id}]` });424}425}426}427428messages.push({ role, content });429}430431return messages;432}433434class ClaudeStreamingPassThroughEndpoint implements IChatEndpoint {435constructor(436private readonly base: IChatEndpoint,437private readonly responseStream: http.ServerResponse,438private readonly requestBody: IEndpointBody,439private readonly requestHeaders: http.IncomingHttpHeaders,440private readonly userAgentPrefix: string,441private readonly contextWindowOverride: { modelMaxPromptTokens?: number; maxOutputTokens?: number },442private readonly sessionId: string | undefined,443@IChatMLFetcher private readonly chatMLFetcher: IChatMLFetcher,444@IInstantiationService private readonly instantiationService: IInstantiationService,445@IClaudeSessionStateService private readonly sessionStateService: IClaudeSessionStateService446) { }447448public get urlOrRequestMetadata(): string | RequestMetadata {449// Force Messages API endpoint - we need this regardless of the useMessagesApi setting450// since we're proxying Messages API format requests from Claude Code451const baseUrl = this.base.urlOrRequestMetadata;452if (typeof baseUrl === 'string') {453return baseUrl;454}455return { type: RequestType.ChatMessages };456}457458public getExtraHeaders(): Record<string, string> {459const headers = this.base.getExtraHeaders?.(ChatLocation.MessagesProxy) ?? {};460if (this.requestHeaders['user-agent']) {461headers['User-Agent'] = this.getUserAgent(this.requestHeaders['user-agent']);462}463if (typeof this.requestHeaders['anthropic-beta'] === 'string') {464const filtered = filterSupportedBetas(this.requestHeaders['anthropic-beta']);465if (filtered) {466headers['anthropic-beta'] = filtered;467}468}469return headers;470}471472getEndpointFetchOptions(): IEndpointFetchOptions {473return {474suppressIntegrationId: true475};476}477478private getUserAgent(incomingUserAgent: string): string {479const slashIndex = incomingUserAgent.indexOf('/');480if (slashIndex === -1) {481return `${this.userAgentPrefix}/${incomingUserAgent}`;482}483484return `${this.userAgentPrefix}${incomingUserAgent.substring(slashIndex)}`;485}486487public interceptBody(body: IEndpointBody | undefined): void {488this.base.interceptBody?.(body);489}490491public acquireTokenizer(): ITokenizer {492return this.base.acquireTokenizer();493}494495public get modelMaxPromptTokens(): number {496return this.contextWindowOverride.modelMaxPromptTokens ?? this.base.modelMaxPromptTokens;497}498499public get maxOutputTokens(): number {500return this.contextWindowOverride.maxOutputTokens ?? this.base.maxOutputTokens;501}502503public get model(): string {504return this.base.model;505}506507public get modelProvider(): string {508return this.base.modelProvider;509}510511public get name(): string {512return this.base.name;513}514515public get version(): string {516return this.base.version;517}518519public get family(): string {520return this.base.family;521}522523public get tokenizer(): TokenizerType {524return this.base.tokenizer;525}526527public get showInModelPicker(): boolean {528return this.base.showInModelPicker;529}530531public get isPremium(): boolean | undefined {532return this.base.isPremium;533}534535public get degradationReason(): string | undefined {536return this.base.degradationReason;537}538539public get multiplier(): number | undefined {540return this.base.multiplier;541}542543public get tokenPricing() {544return this.base.tokenPricing;545}546547public get restrictedToSkus(): string[] | undefined {548return this.base.restrictedToSkus;549}550551public get isFallback(): boolean {552return this.base.isFallback;553}554555public get customModel(): CustomModel | undefined {556return this.base.customModel;557}558559public get isExtensionContributed(): boolean | undefined {560return this.base.isExtensionContributed;561}562563public get apiType(): string | undefined {564return 'messages';565}566567public get supportsThinkingContentInHistory(): boolean | undefined {568return this.base.supportsThinkingContentInHistory;569}570571public get supportsAdaptiveThinking(): boolean | undefined {572return this.base.supportsAdaptiveThinking;573}574575public get minThinkingBudget(): number | undefined {576return this.base.minThinkingBudget;577}578579public get maxThinkingBudget(): number | undefined {580return this.base.maxThinkingBudget;581}582583public get supportsReasoningEffort(): string[] | undefined {584return this.base.supportsReasoningEffort;585}586587public get supportsToolCalls(): boolean {588return this.base.supportsToolCalls;589}590591public get supportsVision(): boolean {592return this.base.supportsVision;593}594595public get supportsPrediction(): boolean {596return this.base.supportsPrediction;597}598599public get supportedEditTools(): readonly EndpointEditToolName[] | undefined {600return this.base.supportedEditTools;601}602603public async processResponseFromChatEndpoint(604telemetryService: ITelemetryService,605logService: ILogService,606response: Response,607expectedNumChoices: number,608finishCallback: FinishedCallback,609telemetryData: TelemetryData,610cancellationToken?: CancellationToken611): Promise<AsyncIterableObject<ChatCompletion>> {612const body = response.body;613return new AsyncIterableObject<ChatCompletion>(async feed => {614// We parse the stream just to return a correct ChatCompletion for logging the response and token usage details.615const requestId = response.headers.get('X-Request-ID') ?? generateUuid();616const ghRequestId = response.headers.get('x-github-request-id') ?? '';617const { serverExperiments } = getRequestId(response.headers);618const processor = this.instantiationService.createInstance(AnthropicMessagesProcessor, telemetryData, requestId, ghRequestId, serverExperiments);619const parser = new SSEParser((ev) => {620try {621const trimmed = ev.data?.trim();622if (!trimmed || trimmed === '[DONE]') {623return;624}625626logService.trace(`[ClaudeStreamingPassThroughEndpoint] SSE: ${ev.data}`);627const parsed = JSON.parse(trimmed);628const type = parsed.type ?? ev.type;629if (!type) {630return;631}632const completion = processor.push({ ...parsed, type }, finishCallback);633if (completion) {634feed.emitOne(completion);635636// Report usage to the usage handler if available637if (completion.usage && this.sessionId) {638const usageHandler = this.sessionStateService.getUsageHandlerForSession(this.sessionId);639if (usageHandler) {640usageHandler({641// Could we bucketize these token counts somehow for the details?642promptTokens: completion.usage.prompt_tokens,643completionTokens: completion.usage.completion_tokens644});645}646}647}648} catch (e) {649feed.reject(e);650}651});652653try {654for await (const chunk of body) {655if (cancellationToken?.isCancellationRequested) {656break;657}658659this.responseStream.write(chunk);660parser.feed(chunk);661}662} finally {663await body.destroy();664}665});666}667668public makeChatRequest(669debugName: string,670messages: Raw.ChatMessage[],671finishedCb: FinishedCallback | undefined,672token: CancellationToken,673location: ChatLocation,674source?: Source,675requestOptions?: Omit<OptionalChatRequestParams, 'n'>,676userInitiatedRequest?: boolean677): Promise<ChatResponse> {678throw new Error('not implemented');679}680681public makeChatRequest2(682options: IMakeChatRequestOptions,683token: CancellationToken684): Promise<ChatResponse> {685return this.chatMLFetcher.fetchOne({686requestOptions: {},687...options,688endpoint: this,689}, token);690}691692public createRequestBody(693options: ICreateEndpointBodyOptions694): IEndpointBody {695const base = this.base.createRequestBody(options);696697// Claude models don't support both temperature and top_p simultaneously.698// If the SDK request specifies either, clear both from base to avoid conflicts.699if (this.requestBody.temperature !== undefined || this.requestBody.top_p !== undefined) {700delete base.temperature;701delete base.top_p;702}703704// Merge with original request body to preserve any additional properties705// i.e. default thinking budget.706return {707...base,708...this.requestBody709};710}711712public cloneWithTokenOverride(modelMaxPromptTokens: number): IChatEndpoint {713throw new Error('not implemented');714}715}716717718