CoCalc -- anthropicProvider.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/byok/vscode-node/anthropicProvider.ts
¹³³⁹⁹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import Anthropic from '@anthropic-ai/sdk';
7
import * as vscode from 'vscode';
8
import { CancellationToken, LanguageModelChatInformation, LanguageModelChatMessage, LanguageModelChatMessage2, LanguageModelDataPart, LanguageModelResponsePart2, LanguageModelTextPart, LanguageModelThinkingPart, LanguageModelToolCallPart, LanguageModelToolResultPart, Progress, ProvideLanguageModelChatResponseOptions } from 'vscode';
9
import { ChatFetchResponseType, ChatLocation } from '../../../platform/chat/common/commonTypes';
10
import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';
11
import { CustomDataPartMimeTypes } from '../../../platform/endpoint/common/endpointTypes';
12
import { modelSupportsToolSearch } from '../../../platform/endpoint/common/chatModelCapabilities';
13
import { buildToolInputSchema } from '../../../platform/endpoint/node/messagesApi';
14
import { ILogService } from '../../../platform/log/common/logService';
15
import { ContextManagementResponse, CUSTOM_TOOL_SEARCH_NAME, getContextManagementFromConfig, isAnthropicContextEditingEnabled, isAnthropicMemoryToolEnabled } from '../../../platform/networking/common/anthropic';
16
import { IToolDeferralService } from '../../../platform/networking/common/toolDeferralService';
17
import { IResponseDelta, OpenAiFunctionTool } from '../../../platform/networking/common/fetch';
18
import { APIUsage } from '../../../platform/networking/common/openai';
19
import { CopilotChatAttr, emitInferenceDetailsEvent, GenAiAttr, GenAiMetrics, GenAiOperationName, GenAiProviderName, type OTelModelOptions, StdAttr, toToolDefinitions, truncateForOTel } from '../../../platform/otel/common/index';
20
import { IOTelService, SpanKind, SpanStatusCode } from '../../../platform/otel/common/otelService';
21
import { IRequestLogger } from '../../../platform/requestLogger/common/requestLogger';
22
import { retrieveCapturingTokenByCorrelation, runWithCapturingToken } from '../../../platform/requestLogger/node/requestLogger';
23
import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
24
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
25
import { toErrorMessage } from '../../../util/common/errorMessage';
26
import { RecordedProgress } from '../../../util/common/progressRecorder';
27
import { generateUuid } from '../../../util/vs/base/common/uuid';
28
import { anthropicMessagesToRawMessagesForLogging, apiMessageToAnthropicMessage } from '../common/anthropicMessageConverter';
29
import { BYOKKnownModels, BYOKModelCapabilities, LMResponsePart } from '../common/byokProvider';
30
import { AbstractLanguageModelChatProvider, ExtendedLanguageModelChatInformation, LanguageModelChatConfiguration } from './abstractLanguageModelChatProvider';
31
import { byokKnownModelsToAPIInfoWithEffort } from './byokModelInfo';
32
import { IBYOKStorageService } from './byokStorageService';
33

34
export class AnthropicLMProvider extends AbstractLanguageModelChatProvider {
35

36
	public static readonly providerName = 'Anthropic';
37

38
	constructor(
39
		knownModels: BYOKKnownModels | undefined,
40
		byokStorageService: IBYOKStorageService,
41
		@ILogService logService: ILogService,
42
		@IRequestLogger private readonly _requestLogger: IRequestLogger,
43
		@IConfigurationService private readonly _configurationService: IConfigurationService,
44
		@IExperimentationService private readonly _experimentationService: IExperimentationService,
45
		@ITelemetryService private readonly _telemetryService: ITelemetryService,
46
		@IOTelService private readonly _otelService: IOTelService,
47
		@IToolDeferralService private readonly _toolDeferralService: IToolDeferralService,
48
	) {
49
		super(AnthropicLMProvider.providerName.toLowerCase(), AnthropicLMProvider.providerName, knownModels, byokStorageService, logService);
50

51
	}
52

53
	private _getThinkingBudget(modelId: string, maxOutputTokens: number): number | undefined {
54
		const modelCapabilities = this._knownModels?.[modelId];
55
		const modelSupportsThinking = modelCapabilities?.thinking ?? false;
56
		if (!modelSupportsThinking) {
57
			return undefined;
58
		}
59
		return Math.min(32000, maxOutputTokens - 1, 16000);
60
	}
61

62
	// Filters the byok known models based on what the anthropic API knows as well
63
	protected async getAllModels(silent: boolean, apiKey: string | undefined): Promise<ExtendedLanguageModelChatInformation<LanguageModelChatConfiguration>[]> {
64
		if (!apiKey && silent) {
65
			return [];
66
		}
67

68
		try {
69
			const response = await new Anthropic({ apiKey }).models.list();
70
			const modelList: Record<string, BYOKModelCapabilities> = {};
71
			for (const model of response.data) {
72
				if (this._knownModels && this._knownModels[model.id]) {
73
					modelList[model.id] = this._knownModels[model.id];
74
				} else {
75
					// Mix in generic capabilities for models we don't know
76
					modelList[model.id] = {
77
						maxInputTokens: 100000,
78
						maxOutputTokens: 16000,
79
						name: model.display_name,
80
						toolCalling: true,
81
						vision: false,
82
						thinking: false
83
					};
84
				}
85
			}
86
			return byokKnownModelsToAPIInfoWithEffort(this._name, modelList);
87
		} catch (error) {
88
			this._logService.error(error, `Error fetching available ${AnthropicLMProvider.providerName} models`);
89
			throw new Error(error.message ? error.message : error);
90
		}
91
	}
92

93
	async provideLanguageModelChatResponse(model: ExtendedLanguageModelChatInformation<LanguageModelChatConfiguration>, messages: Array<LanguageModelChatMessage | LanguageModelChatMessage2>, options: ProvideLanguageModelChatResponseOptions, progress: Progress<LanguageModelResponsePart2>, token: CancellationToken): Promise<void> {
94
		// Restore CapturingToken context if correlation ID was passed through modelOptions.
95
		// This handles the case where AsyncLocalStorage context was lost crossing VS Code IPC.
96
		const correlationId = (options as { modelOptions?: OTelModelOptions }).modelOptions?._capturingTokenCorrelationId;
97
		const capturingToken = correlationId ? retrieveCapturingTokenByCorrelation(correlationId) : undefined;
98

99
		// Restore OTel trace context to link spans back to the agent trace
100
		const parentTraceContext = (options as { modelOptions?: OTelModelOptions }).modelOptions?._otelTraceContext ?? undefined;
101

102
		// OTel span handle — created outside doRequest, enriched inside with usage data
103
		let otelSpan: ReturnType<typeof this._otelService.startSpan> | undefined;
104

105
		const doRequest = async () => {
106
			const issuedTime = Date.now();
107
			const apiKey = model.configuration?.apiKey;
108
			if (!apiKey) {
109
				throw new Error('API key not found for the model');
110
			}
111

112
			const anthropicClient = new Anthropic({ apiKey });
113

114
			// Convert the messages from the API format into messages that we can use against anthropic
115
			const { system, messages: convertedMessages } = apiMessageToAnthropicMessage(messages as LanguageModelChatMessage[]);
116

117
			const requestId = generateUuid();
118
			const pendingLoggedChatRequest = this._requestLogger.logChatRequest(
119
				'AnthropicBYOK',
120
				{
121
					model: model.id,
122
					modelMaxPromptTokens: model.maxInputTokens,
123
					urlOrRequestMetadata: anthropicClient.baseURL,
124
				},
125
				{
126
					model: model.id,
127
					messages: anthropicMessagesToRawMessagesForLogging(convertedMessages, system),
128
					ourRequestId: requestId,
129
					location: ChatLocation.Other,
130
					body: {
131
						tools: options.tools?.map((tool): OpenAiFunctionTool => ({
132
							type: 'function',
133
							function: {
134
								name: tool.name,
135
								description: tool.description,
136
								parameters: tool.inputSchema
137
							}
138
						}))
139
					},
140
				});
141

142
			const memoryToolEnabled = isAnthropicMemoryToolEnabled(model.id, this._configurationService, this._experimentationService);
143

144
			// Requires the client-side tool_search tool in the request: without it, defer-loaded tools can't be retrieved.
145
			// If the user disables tool_search in the tool picker, it won't be present here and tool search is skipped.
146
			const toolSearchEnabled = modelSupportsToolSearch(model.id)
147
				&& !!options.tools?.some(t => t.name === CUSTOM_TOOL_SEARCH_NAME);
148

149
			// Build tools array, handling both standard tools and native Anthropic tools
150
			const tools: Anthropic.Beta.BetaToolUnion[] = [];
151

152
			let hasMemoryTool = false;
153
			for (const tool of (options.tools ?? [])) {
154
				// Handle native Anthropic memory tool (only for models that support it)
155
				if (tool.name === 'memory' && memoryToolEnabled) {
156

157
					hasMemoryTool = true;
158
					tools.push({
159
						name: 'memory',
160
						type: 'memory_20250818'
161
					} as Anthropic.Beta.BetaMemoryTool20250818);
162
					continue;
163
				}
164

165
				// Mark tools for deferred loading when tool search is enabled, except for frequently used tools
166
				const shouldDefer = toolSearchEnabled ? !this._toolDeferralService.isNonDeferredTool(tool.name) : undefined;
167

168
				if (!tool.inputSchema) {
169
					tools.push({
170
						name: tool.name,
171
						description: tool.description,
172
						input_schema: {
173
							type: 'object',
174
							properties: {},
175
							required: []
176
						},
177
						...(shouldDefer ? { defer_loading: shouldDefer } : {})
178
					});
179
					continue;
180
				}
181

182
				tools.push({
183
					name: tool.name,
184
					description: tool.description,
185
					input_schema: buildToolInputSchema(tool.inputSchema as Record<string, unknown>),
186
					...(shouldDefer ? { defer_loading: shouldDefer } : {})
187
				});
188
			}
189

190
			// Check if web search is enabled and append web_search tool if not already present.
191
			// We need to do this because there is no local web_search tool definition we can replace.
192
			const webSearchEnabled = this._configurationService.getExperimentBasedConfig(ConfigKey.AnthropicWebSearchToolEnabled, this._experimentationService);
193
			if (webSearchEnabled && !tools.some(tool => 'name' in tool && tool.name === 'web_search')) {
194
				const maxUses = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchMaxUses);
195
				const allowedDomains = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchAllowedDomains);
196
				const blockedDomains = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchBlockedDomains);
197
				const userLocation = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchUserLocation);
198
				const shouldDeferWebSearch = toolSearchEnabled ? !this._toolDeferralService.isNonDeferredTool('web_search') : undefined;
199

200
				const webSearchTool: Anthropic.Beta.BetaWebSearchTool20250305 = {
201
					name: 'web_search',
202
					type: 'web_search_20250305',
203
					max_uses: maxUses,
204
					...(shouldDeferWebSearch ? { defer_loading: shouldDeferWebSearch } : {})
205
				};
206

207
				// Add domain filtering if configured
208
				// Cannot use both allowed and blocked domains simultaneously
209
				if (allowedDomains && allowedDomains.length > 0) {
210
					webSearchTool.allowed_domains = allowedDomains;
211
				} else if (blockedDomains && blockedDomains.length > 0) {
212
					webSearchTool.blocked_domains = blockedDomains;
213
				}
214

215
				// Add user location if configured
216
				// Note: All fields are optional according to Anthropic docs
217
				if (userLocation && (userLocation.city || userLocation.region || userLocation.country || userLocation.timezone)) {
218
					webSearchTool.user_location = {
219
						type: 'approximate',
220
						...userLocation
221
					};
222
				}
223

224
				tools.push(webSearchTool);
225
			}
226

227
			const thinkingBudget = this._getThinkingBudget(model.id, model.maxOutputTokens);
228

229
			// Check if model supports adaptive thinking
230
			const modelCapabilities = this._knownModels?.[model.id];
231
			const supportsAdaptiveThinking = modelCapabilities?.adaptiveThinking ?? false;
232

233
			// Build context management configuration
234
			const thinkingEnabled = supportsAdaptiveThinking || (thinkingBudget ?? 0) > 0;
235
			const contextManagement = isAnthropicContextEditingEnabled(model.id, this._configurationService, this._experimentationService) ? getContextManagementFromConfig(
236
				this._configurationService,
237
				this._experimentationService,
238
				thinkingEnabled
239
			) : undefined;
240

241
			// Build betas array for beta API features (adaptive thinking doesn't need interleaved-thinking beta)
242
			const betas: string[] = [];
243
			if (thinkingBudget && !supportsAdaptiveThinking) {
244
				betas.push('interleaved-thinking-2025-05-14');
245
			}
246
			if (hasMemoryTool || contextManagement) {
247
				betas.push('context-management-2025-06-27');
248
			}
249
			if (toolSearchEnabled) {
250
				betas.push('advanced-tool-use-2025-11-20');
251
			}
252

253
			const rawEffort = options.modelConfiguration?.reasoningEffort;
254
			const supportsEffort = modelCapabilities?.supportsReasoningEffort;
255
			const effort = supportsEffort && typeof rawEffort === 'string' && supportsEffort.includes(rawEffort)
256
				? rawEffort as 'low' | 'medium' | 'high' | 'max'
257
				: undefined;
258

259
			const params: Anthropic.Beta.Messages.MessageCreateParamsStreaming = {
260
				model: model.id,
261
				messages: convertedMessages,
262
				max_tokens: model.maxOutputTokens,
263
				stream: true,
264
				system: [system],
265
				tools: tools.length > 0 ? tools : undefined,
266
				thinking: supportsAdaptiveThinking
267
					? { type: 'adaptive' as const }
268
					: thinkingBudget ? { type: 'enabled' as const, budget_tokens: thinkingBudget } : undefined,
269
				...(effort ? { output_config: { effort } } : {}),
270
				context_management: contextManagement as Anthropic.Beta.Messages.BetaContextManagementConfig | undefined,
271
			};
272

273
			const wrappedProgress = new RecordedProgress(progress);
274

275
			try {
276
				const result = await this._makeRequest(anthropicClient, wrappedProgress, params, betas, token, issuedTime);
277
				if (result.ttft) {
278
					pendingLoggedChatRequest.markTimeToFirstToken(result.ttft);
279
				}
280
				const responseDeltas: IResponseDelta[] = wrappedProgress.items.map((i): IResponseDelta => {
281
					if (i instanceof LanguageModelTextPart) {
282
						return { text: i.value };
283
					} else if (i instanceof LanguageModelToolCallPart) {
284
						return {
285
							text: '',
286
							copilotToolCalls: [{
287
								name: i.name,
288
								arguments: JSON.stringify(i.input),
289
								id: i.callId
290
							}]
291
						};
292
					} else if (i instanceof LanguageModelToolResultPart) {
293
						// Handle tool results - extract text from content
294
						const resultText = i.content.map(c => c instanceof LanguageModelTextPart ? c.value : '').join('');
295
						return {
296
							text: `[Tool Result ${i.callId}]: ${resultText}`
297
						};
298
					} else {
299
						return { text: '' };
300
					}
301
				});
302
				// TODO: @bhavyaus - Add telemetry tracking for context editing (contextEditingApplied, contextEditingClearedTokens, contextEditingEditCount) like messagesApi.ts does
303
				if (result.contextManagement) {
304
					responseDeltas.push({
305
						text: '',
306
						contextManagement: result.contextManagement
307
					});
308
				}
309
				pendingLoggedChatRequest.resolve({
310
					type: ChatFetchResponseType.Success,
311
					requestId,
312
					serverRequestId: requestId,
313
					usage: result.usage,
314
					value: ['value'],
315
					resolvedModel: model.id
316
				}, responseDeltas);
317

318
				// Enrich OTel span with usage data from the Anthropic response
319
				if (otelSpan && result.usage) {
320
					otelSpan.setAttributes({
321
						[GenAiAttr.USAGE_INPUT_TOKENS]: result.usage.prompt_tokens ?? 0,
322
						[GenAiAttr.USAGE_OUTPUT_TOKENS]: result.usage.completion_tokens ?? 0,
323
						...(result.usage.prompt_tokens_details?.cached_tokens
324
							? { [GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS]: result.usage.prompt_tokens_details.cached_tokens }
325
							: {}),
326
						[GenAiAttr.RESPONSE_MODEL]: model.id,
327
						[GenAiAttr.RESPONSE_ID]: requestId,
328
						[GenAiAttr.RESPONSE_FINISH_REASONS]: ['stop'],
329
						[GenAiAttr.CONVERSATION_ID]: requestId,
330
						...(result.ttft ? { [CopilotChatAttr.TIME_TO_FIRST_TOKEN]: result.ttft } : {}),
331
						[GenAiAttr.REQUEST_MAX_TOKENS]: model.maxOutputTokens ?? 0,
332
					});
333
					// Opt-in content capture
334
					if (this._otelService.config.captureContent) {
335
						const responseText = wrappedProgress.items
336
							.filter((p): p is LanguageModelTextPart => p instanceof LanguageModelTextPart)
337
							.map(p => p.value).join('');
338
						const toolCalls = wrappedProgress.items
339
							.filter((p): p is LanguageModelToolCallPart => p instanceof LanguageModelToolCallPart)
340
							.map(tc => ({ type: 'tool_call' as const, id: tc.callId, name: tc.name, arguments: tc.input }));
341
						const parts: Array<{ type: string; content?: string; id?: string; name?: string; arguments?: unknown }> = [];
342
						if (responseText) { parts.push({ type: 'text', content: responseText }); }
343
						parts.push(...toolCalls);
344
						if (parts.length > 0) {
345
							otelSpan.setAttribute(GenAiAttr.OUTPUT_MESSAGES, truncateForOTel(JSON.stringify([{ role: 'assistant', parts }])));
346
						}
347
					}
348
				}
349

350
				// Record OTel metrics for this Anthropic LLM call
351
				if (result.usage) {
352
					const durationSec = (Date.now() - issuedTime) / 1000;
353
					const metricAttrs = { operationName: GenAiOperationName.CHAT, providerName: 'anthropic', requestModel: model.id, responseModel: model.id };
354
					GenAiMetrics.recordOperationDuration(this._otelService, durationSec, metricAttrs);
355
					if (result.usage.prompt_tokens) { GenAiMetrics.recordTokenUsage(this._otelService, result.usage.prompt_tokens, 'input', metricAttrs); }
356
					if (result.usage.completion_tokens) { GenAiMetrics.recordTokenUsage(this._otelService, result.usage.completion_tokens, 'output', metricAttrs); }
357
					if (result.ttft) { GenAiMetrics.recordTimeToFirstToken(this._otelService, model.id, result.ttft / 1000); }
358
				}
359

360
				// Emit OTel inference details event
361
				emitInferenceDetailsEvent(
362
					this._otelService,
363
					{ model: model.id, maxTokens: model.maxOutputTokens },
364
					result.usage ? {
365
						id: requestId,
366
						model: model.id,
367
						finishReasons: ['stop'],
368
						inputTokens: result.usage.prompt_tokens,
369
						outputTokens: result.usage.completion_tokens,
370
					} : undefined,
371
				);
372

373
				// Send success telemetry matching response.success format
374
				/* __GDPR__
375
					"response.success" : {
376
						"owner": "digitarald",
377
						"comment": "Report quality details for a successful service response.",
378
						"reason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response finished" },
379
						"filterReason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response was filtered" },
380
						"source": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Source of the initial request" },
381
						"initiatorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was initiated by a user or an agent" },
382
						"model": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Model selection for the response" },
383
						"modelInvoked": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Actual model invoked for the response" },
384
						"apiType": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "API type for the response- chat completions or responses" },
385
						"requestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Id of the current turn request" },
386
						"gitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id if available" },
387
						"associatedRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Another request ID that this request is associated with (eg, the originating request of a summarization request)." },
388
						"reasoningEffort": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reasoning effort level" },
389
						"reasoningSummary": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reasoning summary level" },
390
						"fetcher": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The fetcher used for the request" },
391
						"transport": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The transport used for the request (http or websocket)" },
392
						"totalTokenMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum total token window", "isMeasurement": true },
393
						"clientPromptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, locally counted", "isMeasurement": true },
394
						"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, server side counted", "isMeasurement": true },
395
						"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens hitting cache as reported by server", "isMeasurement": true },
396
						"tokenCountMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum generated tokens", "isMeasurement": true },
397
						"tokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of generated tokens", "isMeasurement": true },
398
						"reasoningTokens": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of reasoning tokens", "isMeasurement": true },
399
						"acceptedPredictionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the prediction that appeared in the completion", "isMeasurement": true },
400
						"rejectedPredictionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the prediction that appeared in the completion", "isMeasurement": true },
401
						"completionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the output", "isMeasurement": true },
402
						"timeToFirstToken": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to first token", "isMeasurement": true },
403
						"timeToFirstTokenEmitted": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to first token emitted (visible text)", "isMeasurement": true },
404
						"timeToComplete": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to complete the request", "isMeasurement": true },
405
						"issuedTime": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Timestamp when the request was issued", "isMeasurement": true },
406
						"isVisionRequest": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether the request was for a vision model", "isMeasurement": true },
407
						"isBYOK": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was for a BYOK model", "isMeasurement": true },
408
						"isAuto": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was for an Auto model", "isMeasurement": true },
409
						"bytesReceived": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of bytes received in the response", "isMeasurement": true },
410
						"retryAfterError": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Error of the original request." },
411
						"retryAfterErrorGitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id of the original request if available" },
412
						"connectivityTestError": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Error of the connectivity test." },
413
						"connectivityTestErrorGitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id of the connectivity test request if available" },
414
						"retryAfterFilterCategory": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "If the response was filtered and this is a retry attempt, this contains the original filtered content category." },
415
						"suspendEventSeen": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a system suspend event was seen during the request", "isMeasurement": true },
416
						"resumeEventSeen": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a system resume event was seen during the request", "isMeasurement": true }
417
					}
418
				*/
419
				this._telemetryService.sendTelemetryEvent('response.success', { github: true, microsoft: true }, {
420
					source: 'byok.anthropic',
421
					model: model.id,
422
					requestId,
423
				}, {
424
					totalTokenMax: model.maxInputTokens ?? -1,
425
					tokenCountMax: model.maxOutputTokens ?? -1,
426
					promptTokenCount: result.usage?.prompt_tokens,
427
					promptCacheTokenCount: result.usage?.prompt_tokens_details?.cached_tokens,
428
					tokenCount: result.usage?.total_tokens,
429
					completionTokens: result.usage?.completion_tokens,
430
					timeToFirstToken: result.ttft,
431
					timeToFirstTokenEmitted: result.ttfte,
432
					timeToComplete: Date.now() - issuedTime,
433
					issuedTime,
434
					isBYOK: 1,
435
				});
436
			} catch (err) {
437
				this._logService.error(`BYOK Anthropic error: ${toErrorMessage(err, true)}`);
438
				pendingLoggedChatRequest.resolve({
439
					type: ChatFetchResponseType.Unknown,
440
					requestId,
441
					serverRequestId: requestId,
442
					reason: err.message
443
				}, wrappedProgress.items.map((i): IResponseDelta => {
444
					if (i instanceof LanguageModelTextPart) {
445
						return { text: i.value };
446
					} else if (i instanceof LanguageModelToolCallPart) {
447
						return {
448
							text: '',
449
							copilotToolCalls: [{
450
								name: i.name,
451
								arguments: JSON.stringify(i.input),
452
								id: i.callId
453
							}]
454
						};
455
					} else if (i instanceof LanguageModelToolResultPart) {
456
						// Handle tool results - extract text from content
457
						const resultText = i.content.map(c => c instanceof LanguageModelTextPart ? c.value : '').join('');
458
						return {
459
							text: `[Tool Result ${i.callId}]: ${resultText}`
460
						};
461
					} else {
462
						return { text: '' };
463
					}
464
				}));
465
				throw err;
466
			}
467
		};
468

469
		// Create OTel span and execute with trace context + CapturingToken
470
		const executeRequest = async () => {
471
			otelSpan = this._otelService.startSpan(`chat ${model.id}`, {
472
				kind: SpanKind.CLIENT,
473
				attributes: {
474
					[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,
475
					[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.ANTHROPIC,
476
					[GenAiAttr.REQUEST_MODEL]: model.id,
477
					[GenAiAttr.AGENT_NAME]: 'AnthropicBYOK',
478
					[CopilotChatAttr.MAX_PROMPT_TOKENS]: model.maxInputTokens,
479
					[StdAttr.SERVER_ADDRESS]: 'api.anthropic.com',
480
				},
481
			});
482
			// Opt-in: capture input messages in OTel GenAI format
483
			if (this._otelService.config.captureContent) {
484
				// Tool definitions on the chat span (issue #299934) with `parameters`
485
				// per OTel GenAI semantic conventions (issue #300318).
486
				const toolDefs = toToolDefinitions(options.tools);
487
				if (toolDefs) {
488
					otelSpan.setAttribute(GenAiAttr.TOOL_DEFINITIONS, truncateForOTel(JSON.stringify(toolDefs)));
489
				}
490
				try {
491
					const roleNames: Record<number, string> = { 1: 'user', 2: 'assistant', 3: 'system' };
492
					const inputMsgs = messages.map(m => {
493
						const msg = m as LanguageModelChatMessage;
494
						const role = roleNames[msg.role] ?? String(msg.role);
495
						const parts: Array<{ type: string; content?: string | unknown; id?: string; name?: string; arguments?: unknown; response?: unknown }> = [];
496
						if (Array.isArray(msg.content)) {
497
							for (const p of msg.content) {
498
								if (p instanceof LanguageModelTextPart) {
499
									parts.push({ type: 'text', content: p.value });
500
								} else if (p instanceof LanguageModelToolCallPart) {
501
									parts.push({ type: 'tool_call', id: p.callId, name: p.name, arguments: p.input });
502
								} else if (p instanceof LanguageModelToolResultPart) {
503
									const resultText = p.content.map((c: unknown) => c instanceof LanguageModelTextPart ? c.value : '').join('');
504
									parts.push({ type: 'tool_call_response', id: p.callId, response: resultText });
505
								}
506
							}
507
						}
508
						if (parts.length === 0) {
509
							parts.push({ type: 'text', content: '[non-text content]' });
510
						}
511
						return { role, parts };
512
					});
513
					otelSpan.setAttribute(GenAiAttr.INPUT_MESSAGES, truncateForOTel(JSON.stringify(inputMsgs)));
514
				} catch { /* swallow */ }
515
			}
516
			try {
517
				const result = capturingToken
518
					? await runWithCapturingToken(capturingToken, doRequest)
519
					: await doRequest();
520
				otelSpan.setStatus(SpanStatusCode.OK);
521
				return result;
522
			} catch (err) {
523
				otelSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));
524
				throw err;
525
			} finally {
526
				otelSpan.end();
527
			}
528
		};
529

530
		if (parentTraceContext) {
531
			return this._otelService.runWithTraceContext(parentTraceContext, executeRequest);
532
		}
533
		return executeRequest();
534
	}
535

536
	async provideTokenCount(model: LanguageModelChatInformation, text: string | LanguageModelChatMessage | LanguageModelChatMessage2, token: CancellationToken): Promise<number> {
537
		// Simple estimation - actual token count would require Claude's tokenizer
538
		return Math.ceil(text.toString().length / 4);
539
	}
540

541
	private async _makeRequest(anthropicClient: Anthropic, progress: RecordedProgress<LMResponsePart>, params: Anthropic.Beta.Messages.MessageCreateParamsStreaming, betas: string[], token: CancellationToken, issuedTime: number): Promise<{ ttft: number | undefined; ttfte: number | undefined; usage: APIUsage | undefined; contextManagement: ContextManagementResponse | undefined }> {
542
		const start = Date.now();
543
		let ttft: number | undefined;
544
		let ttfte: number | undefined;
545

546
		const stream = await anthropicClient.beta.messages.create({
547
			...params,
548
			...(betas.length > 0 && { betas })
549
		});
550

551
		let pendingToolCall: {
552
			toolId?: string;
553
			name?: string;
554
			jsonInput?: string;
555
		} | undefined;
556
		let pendingThinking: {
557
			thinking?: string;
558
			signature?: string;
559
		} | undefined;
560
		let pendingRedactedThinking: {
561
			data: string;
562
		} | undefined;
563
		let pendingServerToolCall: {
564
			toolId?: string;
565
			name?: string;
566
			jsonInput?: string;
567
			type?: string;
568
		} | undefined;
569
		let usage: APIUsage | undefined;
570
		let contextManagementResponse: ContextManagementResponse | undefined;
571

572
		let hasText = false;
573
		for await (const chunk of stream) {
574
			if (token.isCancellationRequested) {
575
				break;
576
			}
577

578
			if (ttft === undefined) {
579
				ttft = Date.now() - start;
580
			}
581
			this._logService.trace(`chunk: ${JSON.stringify(chunk)}`);
582

583
			if (chunk.type === 'content_block_start') {
584
				if ('content_block' in chunk && chunk.content_block.type === 'tool_use') {
585
					pendingToolCall = {
586
						toolId: chunk.content_block.id,
587
						name: chunk.content_block.name,
588
						jsonInput: ''
589
					};
590
				} else if ('content_block' in chunk && chunk.content_block.type === 'server_tool_use') {
591
					// Handle server-side tool use (e.g., web_search)
592
					pendingServerToolCall = {
593
						toolId: chunk.content_block.id,
594
						name: chunk.content_block.name,
595
						jsonInput: '',
596
						type: chunk.content_block.name
597
					};
598
					progress.report(new LanguageModelTextPart('\n'));
599

600
				} else if ('content_block' in chunk && chunk.content_block.type === 'thinking') {
601
					pendingThinking = {
602
						thinking: '',
603
						signature: ''
604
					};
605
				} else if ('content_block' in chunk && chunk.content_block.type === 'redacted_thinking') {
606
					const redactedBlock = chunk.content_block as Anthropic.Messages.RedactedThinkingBlock;
607
					pendingRedactedThinking = {
608
						data: redactedBlock.data
609
					};
610
				} else if ('content_block' in chunk && chunk.content_block.type === 'web_search_tool_result') {
611
					if (!pendingServerToolCall || !pendingServerToolCall.toolId) {
612
						continue;
613
					}
614

615
					const resultBlock = chunk.content_block as Anthropic.Messages.WebSearchToolResultBlock;
616
					// Handle potential error in web search
617
					if (!Array.isArray(resultBlock.content)) {
618
						this._logService.error(`Web search error: ${(resultBlock.content as Anthropic.Messages.WebSearchToolResultError).error_code}`);
619
						continue;
620
					}
621

622
					const results = resultBlock.content.map((result: Anthropic.Messages.WebSearchResultBlock) => ({
623
						type: 'web_search_result',
624
						url: result.url,
625
						title: result.title,
626
						page_age: result.page_age,
627
						encrypted_content: result.encrypted_content
628
					}));
629

630
					// Format according to Anthropic's web_search_tool_result specification
631
					const toolResult = {
632
						type: 'web_search_tool_result',
633
						tool_use_id: pendingServerToolCall.toolId,
634
						content: results
635
					};
636

637
					const searchResults = JSON.stringify(toolResult, null, 2);
638

639
					// TODO: @bhavyaus - instead of just pushing text, create a specialized WebSearchResult part
640
					progress.report(new LanguageModelToolResultPart(
641
						pendingServerToolCall.toolId!,
642
						[new LanguageModelTextPart(searchResults)]
643
					));
644
					pendingServerToolCall = undefined;
645
				}
646
				continue;
647
			}
648

649
			if (chunk.type === 'content_block_delta') {
650
				if (chunk.delta.type === 'text_delta') {
651
					progress.report(new LanguageModelTextPart(chunk.delta.text || ''));
652
					if (!hasText && chunk.delta.text?.length > 0) {
653
						ttfte = Date.now() - issuedTime;
654
					}
655
					hasText ||= chunk.delta.text?.length > 0;
656
				} else if (chunk.delta.type === 'citations_delta') {
657
					if ('citation' in chunk.delta) {
658
						// TODO: @bhavyaus - instead of just pushing text, create a specialized Citation part
659
						const citation = chunk.delta.citation as Anthropic.Messages.CitationsWebSearchResultLocation;
660
						if (citation.type === 'web_search_result_location') {
661
							// Format citation according to Anthropic specification
662
							const citationData = {
663
								type: 'web_search_result_location',
664
								url: citation.url,
665
								title: citation.title,
666
								encrypted_index: citation.encrypted_index,
667
								cited_text: citation.cited_text
668
							};
669

670
							// Format citation as readable blockquote with source link
671
							const referenceText = `\n> "${citation.cited_text}" — [${vscode.l10n.t('Source')}](${citation.url})\n\n`;
672

673
							// Report formatted reference text to user
674
							progress.report(new LanguageModelTextPart(referenceText));
675

676
							// Store the citation data in the correct format for multi-turn conversations
677
							progress.report(new LanguageModelToolResultPart(
678
								'citation',
679
								[new LanguageModelTextPart(JSON.stringify(citationData, null, 2))]
680
							));
681
						}
682
					}
683
				} else if (chunk.delta.type === 'thinking_delta') {
684
					if (pendingThinking) {
685
						pendingThinking.thinking = (pendingThinking.thinking || '') + (chunk.delta.thinking || '');
686
						progress.report(new LanguageModelThinkingPart(chunk.delta.thinking || ''));
687
					}
688
				} else if (chunk.delta.type === 'signature_delta') {
689
					// Accumulate signature
690
					if (pendingThinking) {
691
						pendingThinking.signature = (pendingThinking.signature || '') + (chunk.delta.signature || '');
692
					}
693
				} else if (chunk.delta.type === 'input_json_delta' && pendingToolCall) {
694
					pendingToolCall.jsonInput = (pendingToolCall.jsonInput || '') + (chunk.delta.partial_json || '');
695

696
					try {
697
						// Try to parse the accumulated JSON to see if it's complete
698
						const parsedJson = JSON.parse(pendingToolCall.jsonInput);
699
						progress.report(new LanguageModelToolCallPart(
700
							pendingToolCall.toolId!,
701
							pendingToolCall.name!,
702
							parsedJson
703
						));
704
						pendingToolCall = undefined;
705
					} catch {
706
						// JSON is not complete yet, continue accumulating
707
						continue;
708
					}
709
				} else if (chunk.delta.type === 'input_json_delta' && pendingServerToolCall) {
710
					pendingServerToolCall.jsonInput = (pendingServerToolCall.jsonInput || '') + (chunk.delta.partial_json || '');
711
				}
712
			}
713

714
			if (chunk.type === 'content_block_stop') {
715
				if (pendingToolCall) {
716
					try {
717
						const parsedJson = JSON.parse(pendingToolCall.jsonInput || '{}');
718
						progress.report(
719
							new LanguageModelToolCallPart(
720
								pendingToolCall.toolId!,
721
								pendingToolCall.name!,
722
								parsedJson
723
							)
724
						);
725
					} catch (e) {
726
						console.error('Failed to parse tool call JSON:', e);
727
					}
728
					pendingToolCall = undefined;
729
				} else if (pendingThinking) {
730
					if (pendingThinking.signature) {
731
						const finalThinkingPart = new LanguageModelThinkingPart('');
732
						finalThinkingPart.metadata = {
733
							signature: pendingThinking.signature,
734
							_completeThinking: pendingThinking.thinking
735
						};
736
						progress.report(finalThinkingPart);
737
					}
738
					pendingThinking = undefined;
739
				} else if (pendingRedactedThinking) {
740
					pendingRedactedThinking = undefined;
741
				}
742
			}
743

744
			if (chunk.type === 'message_start') {
745
				// TODO final output tokens: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":46}}
746
				usage = {
747
					completion_tokens: -1,
748
					prompt_tokens: chunk.message.usage.input_tokens + (chunk.message.usage.cache_creation_input_tokens ?? 0) + (chunk.message.usage.cache_read_input_tokens ?? 0),
749
					total_tokens: -1,
750
					// Cast needed: Anthropic returns cache_creation_input_tokens which APIUsage.prompt_tokens_details doesn't define
751
					prompt_tokens_details: {
752
						cached_tokens: chunk.message.usage.cache_read_input_tokens ?? 0,
753
						cache_creation_input_tokens: chunk.message.usage.cache_creation_input_tokens
754
					} as any
755
				};
756
			} else if (usage && chunk.type === 'message_delta') {
757
				if (chunk.usage.output_tokens) {
758
					usage.completion_tokens = chunk.usage.output_tokens;
759
					usage.total_tokens = usage.prompt_tokens + chunk.usage.output_tokens;
760
				}
761
				// Handle context management response
762
				if ('context_management' in chunk && chunk.context_management) {
763
					contextManagementResponse = chunk.context_management as ContextManagementResponse;
764
					const totalClearedTokens = contextManagementResponse.applied_edits.reduce(
765
						(sum, edit) => sum + (edit.cleared_input_tokens || 0),
766
						0
767
					);
768
					this._logService.info(`BYOK Anthropic context editing applied: cleared ${totalClearedTokens} tokens across ${contextManagementResponse.applied_edits.length} edits`);
769
					// Emit context management via LanguageModelDataPart so it flows through to toolCallingLoop
770
					progress.report(new LanguageModelDataPart(
771
						new TextEncoder().encode(JSON.stringify(contextManagementResponse)),
772
						CustomDataPartMimeTypes.ContextManagement
773
					));
774
				}
775
			}
776
		}
777

778
		return { ttft, ttfte, usage, contextManagement: contextManagementResponse };
779
	}
780
}
781

782
Product

Resources

Company