Path: blob/main/extensions/copilot/src/extension/byok/vscode-node/anthropicProvider.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import Anthropic from '@anthropic-ai/sdk';6import * as vscode from 'vscode';7import { CancellationToken, LanguageModelChatInformation, LanguageModelChatMessage, LanguageModelChatMessage2, LanguageModelDataPart, LanguageModelResponsePart2, LanguageModelTextPart, LanguageModelThinkingPart, LanguageModelToolCallPart, LanguageModelToolResultPart, Progress, ProvideLanguageModelChatResponseOptions } from 'vscode';8import { ChatFetchResponseType, ChatLocation } from '../../../platform/chat/common/commonTypes';9import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';10import { CustomDataPartMimeTypes } from '../../../platform/endpoint/common/endpointTypes';11import { modelSupportsToolSearch } from '../../../platform/endpoint/common/chatModelCapabilities';12import { buildToolInputSchema } from '../../../platform/endpoint/node/messagesApi';13import { ILogService } from '../../../platform/log/common/logService';14import { ContextManagementResponse, CUSTOM_TOOL_SEARCH_NAME, getContextManagementFromConfig, isAnthropicContextEditingEnabled, isAnthropicMemoryToolEnabled } from '../../../platform/networking/common/anthropic';15import { IToolDeferralService } from '../../../platform/networking/common/toolDeferralService';16import { IResponseDelta, OpenAiFunctionTool } from '../../../platform/networking/common/fetch';17import { APIUsage } from '../../../platform/networking/common/openai';18import { CopilotChatAttr, emitInferenceDetailsEvent, GenAiAttr, GenAiMetrics, GenAiOperationName, GenAiProviderName, type OTelModelOptions, StdAttr, toToolDefinitions, truncateForOTel } from '../../../platform/otel/common/index';19import { IOTelService, SpanKind, SpanStatusCode } from '../../../platform/otel/common/otelService';20import { IRequestLogger } from '../../../platform/requestLogger/common/requestLogger';21import { retrieveCapturingTokenByCorrelation, runWithCapturingToken } from '../../../platform/requestLogger/node/requestLogger';22import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';23import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';24import { toErrorMessage } from '../../../util/common/errorMessage';25import { RecordedProgress } from '../../../util/common/progressRecorder';26import { generateUuid } from '../../../util/vs/base/common/uuid';27import { anthropicMessagesToRawMessagesForLogging, apiMessageToAnthropicMessage } from '../common/anthropicMessageConverter';28import { BYOKKnownModels, BYOKModelCapabilities, LMResponsePart } from '../common/byokProvider';29import { AbstractLanguageModelChatProvider, ExtendedLanguageModelChatInformation, LanguageModelChatConfiguration } from './abstractLanguageModelChatProvider';30import { byokKnownModelsToAPIInfoWithEffort } from './byokModelInfo';31import { IBYOKStorageService } from './byokStorageService';3233export class AnthropicLMProvider extends AbstractLanguageModelChatProvider {3435public static readonly providerName = 'Anthropic';3637constructor(38knownModels: BYOKKnownModels | undefined,39byokStorageService: IBYOKStorageService,40@ILogService logService: ILogService,41@IRequestLogger private readonly _requestLogger: IRequestLogger,42@IConfigurationService private readonly _configurationService: IConfigurationService,43@IExperimentationService private readonly _experimentationService: IExperimentationService,44@ITelemetryService private readonly _telemetryService: ITelemetryService,45@IOTelService private readonly _otelService: IOTelService,46@IToolDeferralService private readonly _toolDeferralService: IToolDeferralService,47) {48super(AnthropicLMProvider.providerName.toLowerCase(), AnthropicLMProvider.providerName, knownModels, byokStorageService, logService);4950}5152private _getThinkingBudget(modelId: string, maxOutputTokens: number): number | undefined {53const modelCapabilities = this._knownModels?.[modelId];54const modelSupportsThinking = modelCapabilities?.thinking ?? false;55if (!modelSupportsThinking) {56return undefined;57}58return Math.min(32000, maxOutputTokens - 1, 16000);59}6061// Filters the byok known models based on what the anthropic API knows as well62protected async getAllModels(silent: boolean, apiKey: string | undefined): Promise<ExtendedLanguageModelChatInformation<LanguageModelChatConfiguration>[]> {63if (!apiKey && silent) {64return [];65}6667try {68const response = await new Anthropic({ apiKey }).models.list();69const modelList: Record<string, BYOKModelCapabilities> = {};70for (const model of response.data) {71if (this._knownModels && this._knownModels[model.id]) {72modelList[model.id] = this._knownModels[model.id];73} else {74// Mix in generic capabilities for models we don't know75modelList[model.id] = {76maxInputTokens: 100000,77maxOutputTokens: 16000,78name: model.display_name,79toolCalling: true,80vision: false,81thinking: false82};83}84}85return byokKnownModelsToAPIInfoWithEffort(this._name, modelList);86} catch (error) {87this._logService.error(error, `Error fetching available ${AnthropicLMProvider.providerName} models`);88throw new Error(error.message ? error.message : error);89}90}9192async provideLanguageModelChatResponse(model: ExtendedLanguageModelChatInformation<LanguageModelChatConfiguration>, messages: Array<LanguageModelChatMessage | LanguageModelChatMessage2>, options: ProvideLanguageModelChatResponseOptions, progress: Progress<LanguageModelResponsePart2>, token: CancellationToken): Promise<void> {93// Restore CapturingToken context if correlation ID was passed through modelOptions.94// This handles the case where AsyncLocalStorage context was lost crossing VS Code IPC.95const correlationId = (options as { modelOptions?: OTelModelOptions }).modelOptions?._capturingTokenCorrelationId;96const capturingToken = correlationId ? retrieveCapturingTokenByCorrelation(correlationId) : undefined;9798// Restore OTel trace context to link spans back to the agent trace99const parentTraceContext = (options as { modelOptions?: OTelModelOptions }).modelOptions?._otelTraceContext ?? undefined;100101// OTel span handle — created outside doRequest, enriched inside with usage data102let otelSpan: ReturnType<typeof this._otelService.startSpan> | undefined;103104const doRequest = async () => {105const issuedTime = Date.now();106const apiKey = model.configuration?.apiKey;107if (!apiKey) {108throw new Error('API key not found for the model');109}110111const anthropicClient = new Anthropic({ apiKey });112113// Convert the messages from the API format into messages that we can use against anthropic114const { system, messages: convertedMessages } = apiMessageToAnthropicMessage(messages as LanguageModelChatMessage[]);115116const requestId = generateUuid();117const pendingLoggedChatRequest = this._requestLogger.logChatRequest(118'AnthropicBYOK',119{120model: model.id,121modelMaxPromptTokens: model.maxInputTokens,122urlOrRequestMetadata: anthropicClient.baseURL,123},124{125model: model.id,126messages: anthropicMessagesToRawMessagesForLogging(convertedMessages, system),127ourRequestId: requestId,128location: ChatLocation.Other,129body: {130tools: options.tools?.map((tool): OpenAiFunctionTool => ({131type: 'function',132function: {133name: tool.name,134description: tool.description,135parameters: tool.inputSchema136}137}))138},139});140141const memoryToolEnabled = isAnthropicMemoryToolEnabled(model.id, this._configurationService, this._experimentationService);142143// Requires the client-side tool_search tool in the request: without it, defer-loaded tools can't be retrieved.144// If the user disables tool_search in the tool picker, it won't be present here and tool search is skipped.145const toolSearchEnabled = modelSupportsToolSearch(model.id)146&& !!options.tools?.some(t => t.name === CUSTOM_TOOL_SEARCH_NAME);147148// Build tools array, handling both standard tools and native Anthropic tools149const tools: Anthropic.Beta.BetaToolUnion[] = [];150151let hasMemoryTool = false;152for (const tool of (options.tools ?? [])) {153// Handle native Anthropic memory tool (only for models that support it)154if (tool.name === 'memory' && memoryToolEnabled) {155156hasMemoryTool = true;157tools.push({158name: 'memory',159type: 'memory_20250818'160} as Anthropic.Beta.BetaMemoryTool20250818);161continue;162}163164// Mark tools for deferred loading when tool search is enabled, except for frequently used tools165const shouldDefer = toolSearchEnabled ? !this._toolDeferralService.isNonDeferredTool(tool.name) : undefined;166167if (!tool.inputSchema) {168tools.push({169name: tool.name,170description: tool.description,171input_schema: {172type: 'object',173properties: {},174required: []175},176...(shouldDefer ? { defer_loading: shouldDefer } : {})177});178continue;179}180181tools.push({182name: tool.name,183description: tool.description,184input_schema: buildToolInputSchema(tool.inputSchema as Record<string, unknown>),185...(shouldDefer ? { defer_loading: shouldDefer } : {})186});187}188189// Check if web search is enabled and append web_search tool if not already present.190// We need to do this because there is no local web_search tool definition we can replace.191const webSearchEnabled = this._configurationService.getExperimentBasedConfig(ConfigKey.AnthropicWebSearchToolEnabled, this._experimentationService);192if (webSearchEnabled && !tools.some(tool => 'name' in tool && tool.name === 'web_search')) {193const maxUses = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchMaxUses);194const allowedDomains = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchAllowedDomains);195const blockedDomains = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchBlockedDomains);196const userLocation = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchUserLocation);197const shouldDeferWebSearch = toolSearchEnabled ? !this._toolDeferralService.isNonDeferredTool('web_search') : undefined;198199const webSearchTool: Anthropic.Beta.BetaWebSearchTool20250305 = {200name: 'web_search',201type: 'web_search_20250305',202max_uses: maxUses,203...(shouldDeferWebSearch ? { defer_loading: shouldDeferWebSearch } : {})204};205206// Add domain filtering if configured207// Cannot use both allowed and blocked domains simultaneously208if (allowedDomains && allowedDomains.length > 0) {209webSearchTool.allowed_domains = allowedDomains;210} else if (blockedDomains && blockedDomains.length > 0) {211webSearchTool.blocked_domains = blockedDomains;212}213214// Add user location if configured215// Note: All fields are optional according to Anthropic docs216if (userLocation && (userLocation.city || userLocation.region || userLocation.country || userLocation.timezone)) {217webSearchTool.user_location = {218type: 'approximate',219...userLocation220};221}222223tools.push(webSearchTool);224}225226const thinkingBudget = this._getThinkingBudget(model.id, model.maxOutputTokens);227228// Check if model supports adaptive thinking229const modelCapabilities = this._knownModels?.[model.id];230const supportsAdaptiveThinking = modelCapabilities?.adaptiveThinking ?? false;231232// Build context management configuration233const thinkingEnabled = supportsAdaptiveThinking || (thinkingBudget ?? 0) > 0;234const contextManagement = isAnthropicContextEditingEnabled(model.id, this._configurationService, this._experimentationService) ? getContextManagementFromConfig(235this._configurationService,236this._experimentationService,237thinkingEnabled238) : undefined;239240// Build betas array for beta API features (adaptive thinking doesn't need interleaved-thinking beta)241const betas: string[] = [];242if (thinkingBudget && !supportsAdaptiveThinking) {243betas.push('interleaved-thinking-2025-05-14');244}245if (hasMemoryTool || contextManagement) {246betas.push('context-management-2025-06-27');247}248if (toolSearchEnabled) {249betas.push('advanced-tool-use-2025-11-20');250}251252const rawEffort = options.modelConfiguration?.reasoningEffort;253const supportsEffort = modelCapabilities?.supportsReasoningEffort;254const effort = supportsEffort && typeof rawEffort === 'string' && supportsEffort.includes(rawEffort)255? rawEffort as 'low' | 'medium' | 'high' | 'max'256: undefined;257258const params: Anthropic.Beta.Messages.MessageCreateParamsStreaming = {259model: model.id,260messages: convertedMessages,261max_tokens: model.maxOutputTokens,262stream: true,263system: [system],264tools: tools.length > 0 ? tools : undefined,265thinking: supportsAdaptiveThinking266? { type: 'adaptive' as const }267: thinkingBudget ? { type: 'enabled' as const, budget_tokens: thinkingBudget } : undefined,268...(effort ? { output_config: { effort } } : {}),269context_management: contextManagement as Anthropic.Beta.Messages.BetaContextManagementConfig | undefined,270};271272const wrappedProgress = new RecordedProgress(progress);273274try {275const result = await this._makeRequest(anthropicClient, wrappedProgress, params, betas, token, issuedTime);276if (result.ttft) {277pendingLoggedChatRequest.markTimeToFirstToken(result.ttft);278}279const responseDeltas: IResponseDelta[] = wrappedProgress.items.map((i): IResponseDelta => {280if (i instanceof LanguageModelTextPart) {281return { text: i.value };282} else if (i instanceof LanguageModelToolCallPart) {283return {284text: '',285copilotToolCalls: [{286name: i.name,287arguments: JSON.stringify(i.input),288id: i.callId289}]290};291} else if (i instanceof LanguageModelToolResultPart) {292// Handle tool results - extract text from content293const resultText = i.content.map(c => c instanceof LanguageModelTextPart ? c.value : '').join('');294return {295text: `[Tool Result ${i.callId}]: ${resultText}`296};297} else {298return { text: '' };299}300});301// TODO: @bhavyaus - Add telemetry tracking for context editing (contextEditingApplied, contextEditingClearedTokens, contextEditingEditCount) like messagesApi.ts does302if (result.contextManagement) {303responseDeltas.push({304text: '',305contextManagement: result.contextManagement306});307}308pendingLoggedChatRequest.resolve({309type: ChatFetchResponseType.Success,310requestId,311serverRequestId: requestId,312usage: result.usage,313value: ['value'],314resolvedModel: model.id315}, responseDeltas);316317// Enrich OTel span with usage data from the Anthropic response318if (otelSpan && result.usage) {319otelSpan.setAttributes({320[GenAiAttr.USAGE_INPUT_TOKENS]: result.usage.prompt_tokens ?? 0,321[GenAiAttr.USAGE_OUTPUT_TOKENS]: result.usage.completion_tokens ?? 0,322...(result.usage.prompt_tokens_details?.cached_tokens323? { [GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS]: result.usage.prompt_tokens_details.cached_tokens }324: {}),325[GenAiAttr.RESPONSE_MODEL]: model.id,326[GenAiAttr.RESPONSE_ID]: requestId,327[GenAiAttr.RESPONSE_FINISH_REASONS]: ['stop'],328[GenAiAttr.CONVERSATION_ID]: requestId,329...(result.ttft ? { [CopilotChatAttr.TIME_TO_FIRST_TOKEN]: result.ttft } : {}),330[GenAiAttr.REQUEST_MAX_TOKENS]: model.maxOutputTokens ?? 0,331});332// Opt-in content capture333if (this._otelService.config.captureContent) {334const responseText = wrappedProgress.items335.filter((p): p is LanguageModelTextPart => p instanceof LanguageModelTextPart)336.map(p => p.value).join('');337const toolCalls = wrappedProgress.items338.filter((p): p is LanguageModelToolCallPart => p instanceof LanguageModelToolCallPart)339.map(tc => ({ type: 'tool_call' as const, id: tc.callId, name: tc.name, arguments: tc.input }));340const parts: Array<{ type: string; content?: string; id?: string; name?: string; arguments?: unknown }> = [];341if (responseText) { parts.push({ type: 'text', content: responseText }); }342parts.push(...toolCalls);343if (parts.length > 0) {344otelSpan.setAttribute(GenAiAttr.OUTPUT_MESSAGES, truncateForOTel(JSON.stringify([{ role: 'assistant', parts }])));345}346}347}348349// Record OTel metrics for this Anthropic LLM call350if (result.usage) {351const durationSec = (Date.now() - issuedTime) / 1000;352const metricAttrs = { operationName: GenAiOperationName.CHAT, providerName: 'anthropic', requestModel: model.id, responseModel: model.id };353GenAiMetrics.recordOperationDuration(this._otelService, durationSec, metricAttrs);354if (result.usage.prompt_tokens) { GenAiMetrics.recordTokenUsage(this._otelService, result.usage.prompt_tokens, 'input', metricAttrs); }355if (result.usage.completion_tokens) { GenAiMetrics.recordTokenUsage(this._otelService, result.usage.completion_tokens, 'output', metricAttrs); }356if (result.ttft) { GenAiMetrics.recordTimeToFirstToken(this._otelService, model.id, result.ttft / 1000); }357}358359// Emit OTel inference details event360emitInferenceDetailsEvent(361this._otelService,362{ model: model.id, maxTokens: model.maxOutputTokens },363result.usage ? {364id: requestId,365model: model.id,366finishReasons: ['stop'],367inputTokens: result.usage.prompt_tokens,368outputTokens: result.usage.completion_tokens,369} : undefined,370);371372// Send success telemetry matching response.success format373/* __GDPR__374"response.success" : {375"owner": "digitarald",376"comment": "Report quality details for a successful service response.",377"reason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response finished" },378"filterReason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response was filtered" },379"source": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Source of the initial request" },380"initiatorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was initiated by a user or an agent" },381"model": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Model selection for the response" },382"modelInvoked": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Actual model invoked for the response" },383"apiType": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "API type for the response- chat completions or responses" },384"requestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Id of the current turn request" },385"gitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id if available" },386"associatedRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Another request ID that this request is associated with (eg, the originating request of a summarization request)." },387"reasoningEffort": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reasoning effort level" },388"reasoningSummary": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reasoning summary level" },389"fetcher": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The fetcher used for the request" },390"transport": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The transport used for the request (http or websocket)" },391"totalTokenMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum total token window", "isMeasurement": true },392"clientPromptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, locally counted", "isMeasurement": true },393"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, server side counted", "isMeasurement": true },394"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens hitting cache as reported by server", "isMeasurement": true },395"tokenCountMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum generated tokens", "isMeasurement": true },396"tokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of generated tokens", "isMeasurement": true },397"reasoningTokens": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of reasoning tokens", "isMeasurement": true },398"acceptedPredictionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the prediction that appeared in the completion", "isMeasurement": true },399"rejectedPredictionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the prediction that appeared in the completion", "isMeasurement": true },400"completionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the output", "isMeasurement": true },401"timeToFirstToken": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to first token", "isMeasurement": true },402"timeToFirstTokenEmitted": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to first token emitted (visible text)", "isMeasurement": true },403"timeToComplete": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to complete the request", "isMeasurement": true },404"issuedTime": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Timestamp when the request was issued", "isMeasurement": true },405"isVisionRequest": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether the request was for a vision model", "isMeasurement": true },406"isBYOK": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was for a BYOK model", "isMeasurement": true },407"isAuto": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was for an Auto model", "isMeasurement": true },408"bytesReceived": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of bytes received in the response", "isMeasurement": true },409"retryAfterError": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Error of the original request." },410"retryAfterErrorGitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id of the original request if available" },411"connectivityTestError": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Error of the connectivity test." },412"connectivityTestErrorGitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id of the connectivity test request if available" },413"retryAfterFilterCategory": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "If the response was filtered and this is a retry attempt, this contains the original filtered content category." },414"suspendEventSeen": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a system suspend event was seen during the request", "isMeasurement": true },415"resumeEventSeen": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a system resume event was seen during the request", "isMeasurement": true }416}417*/418this._telemetryService.sendTelemetryEvent('response.success', { github: true, microsoft: true }, {419source: 'byok.anthropic',420model: model.id,421requestId,422}, {423totalTokenMax: model.maxInputTokens ?? -1,424tokenCountMax: model.maxOutputTokens ?? -1,425promptTokenCount: result.usage?.prompt_tokens,426promptCacheTokenCount: result.usage?.prompt_tokens_details?.cached_tokens,427tokenCount: result.usage?.total_tokens,428completionTokens: result.usage?.completion_tokens,429timeToFirstToken: result.ttft,430timeToFirstTokenEmitted: result.ttfte,431timeToComplete: Date.now() - issuedTime,432issuedTime,433isBYOK: 1,434});435} catch (err) {436this._logService.error(`BYOK Anthropic error: ${toErrorMessage(err, true)}`);437pendingLoggedChatRequest.resolve({438type: ChatFetchResponseType.Unknown,439requestId,440serverRequestId: requestId,441reason: err.message442}, wrappedProgress.items.map((i): IResponseDelta => {443if (i instanceof LanguageModelTextPart) {444return { text: i.value };445} else if (i instanceof LanguageModelToolCallPart) {446return {447text: '',448copilotToolCalls: [{449name: i.name,450arguments: JSON.stringify(i.input),451id: i.callId452}]453};454} else if (i instanceof LanguageModelToolResultPart) {455// Handle tool results - extract text from content456const resultText = i.content.map(c => c instanceof LanguageModelTextPart ? c.value : '').join('');457return {458text: `[Tool Result ${i.callId}]: ${resultText}`459};460} else {461return { text: '' };462}463}));464throw err;465}466};467468// Create OTel span and execute with trace context + CapturingToken469const executeRequest = async () => {470otelSpan = this._otelService.startSpan(`chat ${model.id}`, {471kind: SpanKind.CLIENT,472attributes: {473[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,474[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.ANTHROPIC,475[GenAiAttr.REQUEST_MODEL]: model.id,476[GenAiAttr.AGENT_NAME]: 'AnthropicBYOK',477[CopilotChatAttr.MAX_PROMPT_TOKENS]: model.maxInputTokens,478[StdAttr.SERVER_ADDRESS]: 'api.anthropic.com',479},480});481// Opt-in: capture input messages in OTel GenAI format482if (this._otelService.config.captureContent) {483// Tool definitions on the chat span (issue #299934) with `parameters`484// per OTel GenAI semantic conventions (issue #300318).485const toolDefs = toToolDefinitions(options.tools);486if (toolDefs) {487otelSpan.setAttribute(GenAiAttr.TOOL_DEFINITIONS, truncateForOTel(JSON.stringify(toolDefs)));488}489try {490const roleNames: Record<number, string> = { 1: 'user', 2: 'assistant', 3: 'system' };491const inputMsgs = messages.map(m => {492const msg = m as LanguageModelChatMessage;493const role = roleNames[msg.role] ?? String(msg.role);494const parts: Array<{ type: string; content?: string | unknown; id?: string; name?: string; arguments?: unknown; response?: unknown }> = [];495if (Array.isArray(msg.content)) {496for (const p of msg.content) {497if (p instanceof LanguageModelTextPart) {498parts.push({ type: 'text', content: p.value });499} else if (p instanceof LanguageModelToolCallPart) {500parts.push({ type: 'tool_call', id: p.callId, name: p.name, arguments: p.input });501} else if (p instanceof LanguageModelToolResultPart) {502const resultText = p.content.map((c: unknown) => c instanceof LanguageModelTextPart ? c.value : '').join('');503parts.push({ type: 'tool_call_response', id: p.callId, response: resultText });504}505}506}507if (parts.length === 0) {508parts.push({ type: 'text', content: '[non-text content]' });509}510return { role, parts };511});512otelSpan.setAttribute(GenAiAttr.INPUT_MESSAGES, truncateForOTel(JSON.stringify(inputMsgs)));513} catch { /* swallow */ }514}515try {516const result = capturingToken517? await runWithCapturingToken(capturingToken, doRequest)518: await doRequest();519otelSpan.setStatus(SpanStatusCode.OK);520return result;521} catch (err) {522otelSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));523throw err;524} finally {525otelSpan.end();526}527};528529if (parentTraceContext) {530return this._otelService.runWithTraceContext(parentTraceContext, executeRequest);531}532return executeRequest();533}534535async provideTokenCount(model: LanguageModelChatInformation, text: string | LanguageModelChatMessage | LanguageModelChatMessage2, token: CancellationToken): Promise<number> {536// Simple estimation - actual token count would require Claude's tokenizer537return Math.ceil(text.toString().length / 4);538}539540private async _makeRequest(anthropicClient: Anthropic, progress: RecordedProgress<LMResponsePart>, params: Anthropic.Beta.Messages.MessageCreateParamsStreaming, betas: string[], token: CancellationToken, issuedTime: number): Promise<{ ttft: number | undefined; ttfte: number | undefined; usage: APIUsage | undefined; contextManagement: ContextManagementResponse | undefined }> {541const start = Date.now();542let ttft: number | undefined;543let ttfte: number | undefined;544545const stream = await anthropicClient.beta.messages.create({546...params,547...(betas.length > 0 && { betas })548});549550let pendingToolCall: {551toolId?: string;552name?: string;553jsonInput?: string;554} | undefined;555let pendingThinking: {556thinking?: string;557signature?: string;558} | undefined;559let pendingRedactedThinking: {560data: string;561} | undefined;562let pendingServerToolCall: {563toolId?: string;564name?: string;565jsonInput?: string;566type?: string;567} | undefined;568let usage: APIUsage | undefined;569let contextManagementResponse: ContextManagementResponse | undefined;570571let hasText = false;572for await (const chunk of stream) {573if (token.isCancellationRequested) {574break;575}576577if (ttft === undefined) {578ttft = Date.now() - start;579}580this._logService.trace(`chunk: ${JSON.stringify(chunk)}`);581582if (chunk.type === 'content_block_start') {583if ('content_block' in chunk && chunk.content_block.type === 'tool_use') {584pendingToolCall = {585toolId: chunk.content_block.id,586name: chunk.content_block.name,587jsonInput: ''588};589} else if ('content_block' in chunk && chunk.content_block.type === 'server_tool_use') {590// Handle server-side tool use (e.g., web_search)591pendingServerToolCall = {592toolId: chunk.content_block.id,593name: chunk.content_block.name,594jsonInput: '',595type: chunk.content_block.name596};597progress.report(new LanguageModelTextPart('\n'));598599} else if ('content_block' in chunk && chunk.content_block.type === 'thinking') {600pendingThinking = {601thinking: '',602signature: ''603};604} else if ('content_block' in chunk && chunk.content_block.type === 'redacted_thinking') {605const redactedBlock = chunk.content_block as Anthropic.Messages.RedactedThinkingBlock;606pendingRedactedThinking = {607data: redactedBlock.data608};609} else if ('content_block' in chunk && chunk.content_block.type === 'web_search_tool_result') {610if (!pendingServerToolCall || !pendingServerToolCall.toolId) {611continue;612}613614const resultBlock = chunk.content_block as Anthropic.Messages.WebSearchToolResultBlock;615// Handle potential error in web search616if (!Array.isArray(resultBlock.content)) {617this._logService.error(`Web search error: ${(resultBlock.content as Anthropic.Messages.WebSearchToolResultError).error_code}`);618continue;619}620621const results = resultBlock.content.map((result: Anthropic.Messages.WebSearchResultBlock) => ({622type: 'web_search_result',623url: result.url,624title: result.title,625page_age: result.page_age,626encrypted_content: result.encrypted_content627}));628629// Format according to Anthropic's web_search_tool_result specification630const toolResult = {631type: 'web_search_tool_result',632tool_use_id: pendingServerToolCall.toolId,633content: results634};635636const searchResults = JSON.stringify(toolResult, null, 2);637638// TODO: @bhavyaus - instead of just pushing text, create a specialized WebSearchResult part639progress.report(new LanguageModelToolResultPart(640pendingServerToolCall.toolId!,641[new LanguageModelTextPart(searchResults)]642));643pendingServerToolCall = undefined;644}645continue;646}647648if (chunk.type === 'content_block_delta') {649if (chunk.delta.type === 'text_delta') {650progress.report(new LanguageModelTextPart(chunk.delta.text || ''));651if (!hasText && chunk.delta.text?.length > 0) {652ttfte = Date.now() - issuedTime;653}654hasText ||= chunk.delta.text?.length > 0;655} else if (chunk.delta.type === 'citations_delta') {656if ('citation' in chunk.delta) {657// TODO: @bhavyaus - instead of just pushing text, create a specialized Citation part658const citation = chunk.delta.citation as Anthropic.Messages.CitationsWebSearchResultLocation;659if (citation.type === 'web_search_result_location') {660// Format citation according to Anthropic specification661const citationData = {662type: 'web_search_result_location',663url: citation.url,664title: citation.title,665encrypted_index: citation.encrypted_index,666cited_text: citation.cited_text667};668669// Format citation as readable blockquote with source link670const referenceText = `\n> "${citation.cited_text}" — [${vscode.l10n.t('Source')}](${citation.url})\n\n`;671672// Report formatted reference text to user673progress.report(new LanguageModelTextPart(referenceText));674675// Store the citation data in the correct format for multi-turn conversations676progress.report(new LanguageModelToolResultPart(677'citation',678[new LanguageModelTextPart(JSON.stringify(citationData, null, 2))]679));680}681}682} else if (chunk.delta.type === 'thinking_delta') {683if (pendingThinking) {684pendingThinking.thinking = (pendingThinking.thinking || '') + (chunk.delta.thinking || '');685progress.report(new LanguageModelThinkingPart(chunk.delta.thinking || ''));686}687} else if (chunk.delta.type === 'signature_delta') {688// Accumulate signature689if (pendingThinking) {690pendingThinking.signature = (pendingThinking.signature || '') + (chunk.delta.signature || '');691}692} else if (chunk.delta.type === 'input_json_delta' && pendingToolCall) {693pendingToolCall.jsonInput = (pendingToolCall.jsonInput || '') + (chunk.delta.partial_json || '');694695try {696// Try to parse the accumulated JSON to see if it's complete697const parsedJson = JSON.parse(pendingToolCall.jsonInput);698progress.report(new LanguageModelToolCallPart(699pendingToolCall.toolId!,700pendingToolCall.name!,701parsedJson702));703pendingToolCall = undefined;704} catch {705// JSON is not complete yet, continue accumulating706continue;707}708} else if (chunk.delta.type === 'input_json_delta' && pendingServerToolCall) {709pendingServerToolCall.jsonInput = (pendingServerToolCall.jsonInput || '') + (chunk.delta.partial_json || '');710}711}712713if (chunk.type === 'content_block_stop') {714if (pendingToolCall) {715try {716const parsedJson = JSON.parse(pendingToolCall.jsonInput || '{}');717progress.report(718new LanguageModelToolCallPart(719pendingToolCall.toolId!,720pendingToolCall.name!,721parsedJson722)723);724} catch (e) {725console.error('Failed to parse tool call JSON:', e);726}727pendingToolCall = undefined;728} else if (pendingThinking) {729if (pendingThinking.signature) {730const finalThinkingPart = new LanguageModelThinkingPart('');731finalThinkingPart.metadata = {732signature: pendingThinking.signature,733_completeThinking: pendingThinking.thinking734};735progress.report(finalThinkingPart);736}737pendingThinking = undefined;738} else if (pendingRedactedThinking) {739pendingRedactedThinking = undefined;740}741}742743if (chunk.type === 'message_start') {744// TODO final output tokens: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":46}}745usage = {746completion_tokens: -1,747prompt_tokens: chunk.message.usage.input_tokens + (chunk.message.usage.cache_creation_input_tokens ?? 0) + (chunk.message.usage.cache_read_input_tokens ?? 0),748total_tokens: -1,749// Cast needed: Anthropic returns cache_creation_input_tokens which APIUsage.prompt_tokens_details doesn't define750prompt_tokens_details: {751cached_tokens: chunk.message.usage.cache_read_input_tokens ?? 0,752cache_creation_input_tokens: chunk.message.usage.cache_creation_input_tokens753} as any754};755} else if (usage && chunk.type === 'message_delta') {756if (chunk.usage.output_tokens) {757usage.completion_tokens = chunk.usage.output_tokens;758usage.total_tokens = usage.prompt_tokens + chunk.usage.output_tokens;759}760// Handle context management response761if ('context_management' in chunk && chunk.context_management) {762contextManagementResponse = chunk.context_management as ContextManagementResponse;763const totalClearedTokens = contextManagementResponse.applied_edits.reduce(764(sum, edit) => sum + (edit.cleared_input_tokens || 0),7650766);767this._logService.info(`BYOK Anthropic context editing applied: cleared ${totalClearedTokens} tokens across ${contextManagementResponse.applied_edits.length} edits`);768// Emit context management via LanguageModelDataPart so it flows through to toolCallingLoop769progress.report(new LanguageModelDataPart(770new TextEncoder().encode(JSON.stringify(contextManagementResponse)),771CustomDataPartMimeTypes.ContextManagement772));773}774}775}776777return { ttft, ttfte, usage, contextManagement: contextManagementResponse };778}779}780781782