Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/byok/vscode-node/anthropicProvider.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import Anthropic from '@anthropic-ai/sdk';
7
import * as vscode from 'vscode';
8
import { CancellationToken, LanguageModelChatInformation, LanguageModelChatMessage, LanguageModelChatMessage2, LanguageModelDataPart, LanguageModelResponsePart2, LanguageModelTextPart, LanguageModelThinkingPart, LanguageModelToolCallPart, LanguageModelToolResultPart, Progress, ProvideLanguageModelChatResponseOptions } from 'vscode';
9
import { ChatFetchResponseType, ChatLocation } from '../../../platform/chat/common/commonTypes';
10
import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';
11
import { CustomDataPartMimeTypes } from '../../../platform/endpoint/common/endpointTypes';
12
import { modelSupportsToolSearch } from '../../../platform/endpoint/common/chatModelCapabilities';
13
import { buildToolInputSchema } from '../../../platform/endpoint/node/messagesApi';
14
import { ILogService } from '../../../platform/log/common/logService';
15
import { ContextManagementResponse, CUSTOM_TOOL_SEARCH_NAME, getContextManagementFromConfig, isAnthropicContextEditingEnabled, isAnthropicMemoryToolEnabled } from '../../../platform/networking/common/anthropic';
16
import { IToolDeferralService } from '../../../platform/networking/common/toolDeferralService';
17
import { IResponseDelta, OpenAiFunctionTool } from '../../../platform/networking/common/fetch';
18
import { APIUsage } from '../../../platform/networking/common/openai';
19
import { CopilotChatAttr, emitInferenceDetailsEvent, GenAiAttr, GenAiMetrics, GenAiOperationName, GenAiProviderName, type OTelModelOptions, StdAttr, toToolDefinitions, truncateForOTel } from '../../../platform/otel/common/index';
20
import { IOTelService, SpanKind, SpanStatusCode } from '../../../platform/otel/common/otelService';
21
import { IRequestLogger } from '../../../platform/requestLogger/common/requestLogger';
22
import { retrieveCapturingTokenByCorrelation, runWithCapturingToken } from '../../../platform/requestLogger/node/requestLogger';
23
import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
24
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
25
import { toErrorMessage } from '../../../util/common/errorMessage';
26
import { RecordedProgress } from '../../../util/common/progressRecorder';
27
import { generateUuid } from '../../../util/vs/base/common/uuid';
28
import { anthropicMessagesToRawMessagesForLogging, apiMessageToAnthropicMessage } from '../common/anthropicMessageConverter';
29
import { BYOKKnownModels, BYOKModelCapabilities, LMResponsePart } from '../common/byokProvider';
30
import { AbstractLanguageModelChatProvider, ExtendedLanguageModelChatInformation, LanguageModelChatConfiguration } from './abstractLanguageModelChatProvider';
31
import { byokKnownModelsToAPIInfoWithEffort } from './byokModelInfo';
32
import { IBYOKStorageService } from './byokStorageService';
33
34
export class AnthropicLMProvider extends AbstractLanguageModelChatProvider {
35
36
public static readonly providerName = 'Anthropic';
37
38
constructor(
39
knownModels: BYOKKnownModels | undefined,
40
byokStorageService: IBYOKStorageService,
41
@ILogService logService: ILogService,
42
@IRequestLogger private readonly _requestLogger: IRequestLogger,
43
@IConfigurationService private readonly _configurationService: IConfigurationService,
44
@IExperimentationService private readonly _experimentationService: IExperimentationService,
45
@ITelemetryService private readonly _telemetryService: ITelemetryService,
46
@IOTelService private readonly _otelService: IOTelService,
47
@IToolDeferralService private readonly _toolDeferralService: IToolDeferralService,
48
) {
49
super(AnthropicLMProvider.providerName.toLowerCase(), AnthropicLMProvider.providerName, knownModels, byokStorageService, logService);
50
51
}
52
53
private _getThinkingBudget(modelId: string, maxOutputTokens: number): number | undefined {
54
const modelCapabilities = this._knownModels?.[modelId];
55
const modelSupportsThinking = modelCapabilities?.thinking ?? false;
56
if (!modelSupportsThinking) {
57
return undefined;
58
}
59
return Math.min(32000, maxOutputTokens - 1, 16000);
60
}
61
62
// Filters the byok known models based on what the anthropic API knows as well
63
protected async getAllModels(silent: boolean, apiKey: string | undefined): Promise<ExtendedLanguageModelChatInformation<LanguageModelChatConfiguration>[]> {
64
if (!apiKey && silent) {
65
return [];
66
}
67
68
try {
69
const response = await new Anthropic({ apiKey }).models.list();
70
const modelList: Record<string, BYOKModelCapabilities> = {};
71
for (const model of response.data) {
72
if (this._knownModels && this._knownModels[model.id]) {
73
modelList[model.id] = this._knownModels[model.id];
74
} else {
75
// Mix in generic capabilities for models we don't know
76
modelList[model.id] = {
77
maxInputTokens: 100000,
78
maxOutputTokens: 16000,
79
name: model.display_name,
80
toolCalling: true,
81
vision: false,
82
thinking: false
83
};
84
}
85
}
86
return byokKnownModelsToAPIInfoWithEffort(this._name, modelList);
87
} catch (error) {
88
this._logService.error(error, `Error fetching available ${AnthropicLMProvider.providerName} models`);
89
throw new Error(error.message ? error.message : error);
90
}
91
}
92
93
async provideLanguageModelChatResponse(model: ExtendedLanguageModelChatInformation<LanguageModelChatConfiguration>, messages: Array<LanguageModelChatMessage | LanguageModelChatMessage2>, options: ProvideLanguageModelChatResponseOptions, progress: Progress<LanguageModelResponsePart2>, token: CancellationToken): Promise<void> {
94
// Restore CapturingToken context if correlation ID was passed through modelOptions.
95
// This handles the case where AsyncLocalStorage context was lost crossing VS Code IPC.
96
const correlationId = (options as { modelOptions?: OTelModelOptions }).modelOptions?._capturingTokenCorrelationId;
97
const capturingToken = correlationId ? retrieveCapturingTokenByCorrelation(correlationId) : undefined;
98
99
// Restore OTel trace context to link spans back to the agent trace
100
const parentTraceContext = (options as { modelOptions?: OTelModelOptions }).modelOptions?._otelTraceContext ?? undefined;
101
102
// OTel span handle — created outside doRequest, enriched inside with usage data
103
let otelSpan: ReturnType<typeof this._otelService.startSpan> | undefined;
104
105
const doRequest = async () => {
106
const issuedTime = Date.now();
107
const apiKey = model.configuration?.apiKey;
108
if (!apiKey) {
109
throw new Error('API key not found for the model');
110
}
111
112
const anthropicClient = new Anthropic({ apiKey });
113
114
// Convert the messages from the API format into messages that we can use against anthropic
115
const { system, messages: convertedMessages } = apiMessageToAnthropicMessage(messages as LanguageModelChatMessage[]);
116
117
const requestId = generateUuid();
118
const pendingLoggedChatRequest = this._requestLogger.logChatRequest(
119
'AnthropicBYOK',
120
{
121
model: model.id,
122
modelMaxPromptTokens: model.maxInputTokens,
123
urlOrRequestMetadata: anthropicClient.baseURL,
124
},
125
{
126
model: model.id,
127
messages: anthropicMessagesToRawMessagesForLogging(convertedMessages, system),
128
ourRequestId: requestId,
129
location: ChatLocation.Other,
130
body: {
131
tools: options.tools?.map((tool): OpenAiFunctionTool => ({
132
type: 'function',
133
function: {
134
name: tool.name,
135
description: tool.description,
136
parameters: tool.inputSchema
137
}
138
}))
139
},
140
});
141
142
const memoryToolEnabled = isAnthropicMemoryToolEnabled(model.id, this._configurationService, this._experimentationService);
143
144
// Requires the client-side tool_search tool in the request: without it, defer-loaded tools can't be retrieved.
145
// If the user disables tool_search in the tool picker, it won't be present here and tool search is skipped.
146
const toolSearchEnabled = modelSupportsToolSearch(model.id)
147
&& !!options.tools?.some(t => t.name === CUSTOM_TOOL_SEARCH_NAME);
148
149
// Build tools array, handling both standard tools and native Anthropic tools
150
const tools: Anthropic.Beta.BetaToolUnion[] = [];
151
152
let hasMemoryTool = false;
153
for (const tool of (options.tools ?? [])) {
154
// Handle native Anthropic memory tool (only for models that support it)
155
if (tool.name === 'memory' && memoryToolEnabled) {
156
157
hasMemoryTool = true;
158
tools.push({
159
name: 'memory',
160
type: 'memory_20250818'
161
} as Anthropic.Beta.BetaMemoryTool20250818);
162
continue;
163
}
164
165
// Mark tools for deferred loading when tool search is enabled, except for frequently used tools
166
const shouldDefer = toolSearchEnabled ? !this._toolDeferralService.isNonDeferredTool(tool.name) : undefined;
167
168
if (!tool.inputSchema) {
169
tools.push({
170
name: tool.name,
171
description: tool.description,
172
input_schema: {
173
type: 'object',
174
properties: {},
175
required: []
176
},
177
...(shouldDefer ? { defer_loading: shouldDefer } : {})
178
});
179
continue;
180
}
181
182
tools.push({
183
name: tool.name,
184
description: tool.description,
185
input_schema: buildToolInputSchema(tool.inputSchema as Record<string, unknown>),
186
...(shouldDefer ? { defer_loading: shouldDefer } : {})
187
});
188
}
189
190
// Check if web search is enabled and append web_search tool if not already present.
191
// We need to do this because there is no local web_search tool definition we can replace.
192
const webSearchEnabled = this._configurationService.getExperimentBasedConfig(ConfigKey.AnthropicWebSearchToolEnabled, this._experimentationService);
193
if (webSearchEnabled && !tools.some(tool => 'name' in tool && tool.name === 'web_search')) {
194
const maxUses = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchMaxUses);
195
const allowedDomains = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchAllowedDomains);
196
const blockedDomains = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchBlockedDomains);
197
const userLocation = this._configurationService.getConfig(ConfigKey.AnthropicWebSearchUserLocation);
198
const shouldDeferWebSearch = toolSearchEnabled ? !this._toolDeferralService.isNonDeferredTool('web_search') : undefined;
199
200
const webSearchTool: Anthropic.Beta.BetaWebSearchTool20250305 = {
201
name: 'web_search',
202
type: 'web_search_20250305',
203
max_uses: maxUses,
204
...(shouldDeferWebSearch ? { defer_loading: shouldDeferWebSearch } : {})
205
};
206
207
// Add domain filtering if configured
208
// Cannot use both allowed and blocked domains simultaneously
209
if (allowedDomains && allowedDomains.length > 0) {
210
webSearchTool.allowed_domains = allowedDomains;
211
} else if (blockedDomains && blockedDomains.length > 0) {
212
webSearchTool.blocked_domains = blockedDomains;
213
}
214
215
// Add user location if configured
216
// Note: All fields are optional according to Anthropic docs
217
if (userLocation && (userLocation.city || userLocation.region || userLocation.country || userLocation.timezone)) {
218
webSearchTool.user_location = {
219
type: 'approximate',
220
...userLocation
221
};
222
}
223
224
tools.push(webSearchTool);
225
}
226
227
const thinkingBudget = this._getThinkingBudget(model.id, model.maxOutputTokens);
228
229
// Check if model supports adaptive thinking
230
const modelCapabilities = this._knownModels?.[model.id];
231
const supportsAdaptiveThinking = modelCapabilities?.adaptiveThinking ?? false;
232
233
// Build context management configuration
234
const thinkingEnabled = supportsAdaptiveThinking || (thinkingBudget ?? 0) > 0;
235
const contextManagement = isAnthropicContextEditingEnabled(model.id, this._configurationService, this._experimentationService) ? getContextManagementFromConfig(
236
this._configurationService,
237
this._experimentationService,
238
thinkingEnabled
239
) : undefined;
240
241
// Build betas array for beta API features (adaptive thinking doesn't need interleaved-thinking beta)
242
const betas: string[] = [];
243
if (thinkingBudget && !supportsAdaptiveThinking) {
244
betas.push('interleaved-thinking-2025-05-14');
245
}
246
if (hasMemoryTool || contextManagement) {
247
betas.push('context-management-2025-06-27');
248
}
249
if (toolSearchEnabled) {
250
betas.push('advanced-tool-use-2025-11-20');
251
}
252
253
const rawEffort = options.modelConfiguration?.reasoningEffort;
254
const supportsEffort = modelCapabilities?.supportsReasoningEffort;
255
const effort = supportsEffort && typeof rawEffort === 'string' && supportsEffort.includes(rawEffort)
256
? rawEffort as 'low' | 'medium' | 'high' | 'max'
257
: undefined;
258
259
const params: Anthropic.Beta.Messages.MessageCreateParamsStreaming = {
260
model: model.id,
261
messages: convertedMessages,
262
max_tokens: model.maxOutputTokens,
263
stream: true,
264
system: [system],
265
tools: tools.length > 0 ? tools : undefined,
266
thinking: supportsAdaptiveThinking
267
? { type: 'adaptive' as const }
268
: thinkingBudget ? { type: 'enabled' as const, budget_tokens: thinkingBudget } : undefined,
269
...(effort ? { output_config: { effort } } : {}),
270
context_management: contextManagement as Anthropic.Beta.Messages.BetaContextManagementConfig | undefined,
271
};
272
273
const wrappedProgress = new RecordedProgress(progress);
274
275
try {
276
const result = await this._makeRequest(anthropicClient, wrappedProgress, params, betas, token, issuedTime);
277
if (result.ttft) {
278
pendingLoggedChatRequest.markTimeToFirstToken(result.ttft);
279
}
280
const responseDeltas: IResponseDelta[] = wrappedProgress.items.map((i): IResponseDelta => {
281
if (i instanceof LanguageModelTextPart) {
282
return { text: i.value };
283
} else if (i instanceof LanguageModelToolCallPart) {
284
return {
285
text: '',
286
copilotToolCalls: [{
287
name: i.name,
288
arguments: JSON.stringify(i.input),
289
id: i.callId
290
}]
291
};
292
} else if (i instanceof LanguageModelToolResultPart) {
293
// Handle tool results - extract text from content
294
const resultText = i.content.map(c => c instanceof LanguageModelTextPart ? c.value : '').join('');
295
return {
296
text: `[Tool Result ${i.callId}]: ${resultText}`
297
};
298
} else {
299
return { text: '' };
300
}
301
});
302
// TODO: @bhavyaus - Add telemetry tracking for context editing (contextEditingApplied, contextEditingClearedTokens, contextEditingEditCount) like messagesApi.ts does
303
if (result.contextManagement) {
304
responseDeltas.push({
305
text: '',
306
contextManagement: result.contextManagement
307
});
308
}
309
pendingLoggedChatRequest.resolve({
310
type: ChatFetchResponseType.Success,
311
requestId,
312
serverRequestId: requestId,
313
usage: result.usage,
314
value: ['value'],
315
resolvedModel: model.id
316
}, responseDeltas);
317
318
// Enrich OTel span with usage data from the Anthropic response
319
if (otelSpan && result.usage) {
320
otelSpan.setAttributes({
321
[GenAiAttr.USAGE_INPUT_TOKENS]: result.usage.prompt_tokens ?? 0,
322
[GenAiAttr.USAGE_OUTPUT_TOKENS]: result.usage.completion_tokens ?? 0,
323
...(result.usage.prompt_tokens_details?.cached_tokens
324
? { [GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS]: result.usage.prompt_tokens_details.cached_tokens }
325
: {}),
326
[GenAiAttr.RESPONSE_MODEL]: model.id,
327
[GenAiAttr.RESPONSE_ID]: requestId,
328
[GenAiAttr.RESPONSE_FINISH_REASONS]: ['stop'],
329
[GenAiAttr.CONVERSATION_ID]: requestId,
330
...(result.ttft ? { [CopilotChatAttr.TIME_TO_FIRST_TOKEN]: result.ttft } : {}),
331
[GenAiAttr.REQUEST_MAX_TOKENS]: model.maxOutputTokens ?? 0,
332
});
333
// Opt-in content capture
334
if (this._otelService.config.captureContent) {
335
const responseText = wrappedProgress.items
336
.filter((p): p is LanguageModelTextPart => p instanceof LanguageModelTextPart)
337
.map(p => p.value).join('');
338
const toolCalls = wrappedProgress.items
339
.filter((p): p is LanguageModelToolCallPart => p instanceof LanguageModelToolCallPart)
340
.map(tc => ({ type: 'tool_call' as const, id: tc.callId, name: tc.name, arguments: tc.input }));
341
const parts: Array<{ type: string; content?: string; id?: string; name?: string; arguments?: unknown }> = [];
342
if (responseText) { parts.push({ type: 'text', content: responseText }); }
343
parts.push(...toolCalls);
344
if (parts.length > 0) {
345
otelSpan.setAttribute(GenAiAttr.OUTPUT_MESSAGES, truncateForOTel(JSON.stringify([{ role: 'assistant', parts }])));
346
}
347
}
348
}
349
350
// Record OTel metrics for this Anthropic LLM call
351
if (result.usage) {
352
const durationSec = (Date.now() - issuedTime) / 1000;
353
const metricAttrs = { operationName: GenAiOperationName.CHAT, providerName: 'anthropic', requestModel: model.id, responseModel: model.id };
354
GenAiMetrics.recordOperationDuration(this._otelService, durationSec, metricAttrs);
355
if (result.usage.prompt_tokens) { GenAiMetrics.recordTokenUsage(this._otelService, result.usage.prompt_tokens, 'input', metricAttrs); }
356
if (result.usage.completion_tokens) { GenAiMetrics.recordTokenUsage(this._otelService, result.usage.completion_tokens, 'output', metricAttrs); }
357
if (result.ttft) { GenAiMetrics.recordTimeToFirstToken(this._otelService, model.id, result.ttft / 1000); }
358
}
359
360
// Emit OTel inference details event
361
emitInferenceDetailsEvent(
362
this._otelService,
363
{ model: model.id, maxTokens: model.maxOutputTokens },
364
result.usage ? {
365
id: requestId,
366
model: model.id,
367
finishReasons: ['stop'],
368
inputTokens: result.usage.prompt_tokens,
369
outputTokens: result.usage.completion_tokens,
370
} : undefined,
371
);
372
373
// Send success telemetry matching response.success format
374
/* __GDPR__
375
"response.success" : {
376
"owner": "digitarald",
377
"comment": "Report quality details for a successful service response.",
378
"reason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response finished" },
379
"filterReason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response was filtered" },
380
"source": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Source of the initial request" },
381
"initiatorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was initiated by a user or an agent" },
382
"model": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Model selection for the response" },
383
"modelInvoked": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Actual model invoked for the response" },
384
"apiType": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "API type for the response- chat completions or responses" },
385
"requestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Id of the current turn request" },
386
"gitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id if available" },
387
"associatedRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Another request ID that this request is associated with (eg, the originating request of a summarization request)." },
388
"reasoningEffort": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reasoning effort level" },
389
"reasoningSummary": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reasoning summary level" },
390
"fetcher": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The fetcher used for the request" },
391
"transport": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The transport used for the request (http or websocket)" },
392
"totalTokenMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum total token window", "isMeasurement": true },
393
"clientPromptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, locally counted", "isMeasurement": true },
394
"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, server side counted", "isMeasurement": true },
395
"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens hitting cache as reported by server", "isMeasurement": true },
396
"tokenCountMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum generated tokens", "isMeasurement": true },
397
"tokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of generated tokens", "isMeasurement": true },
398
"reasoningTokens": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of reasoning tokens", "isMeasurement": true },
399
"acceptedPredictionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the prediction that appeared in the completion", "isMeasurement": true },
400
"rejectedPredictionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the prediction that appeared in the completion", "isMeasurement": true },
401
"completionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the output", "isMeasurement": true },
402
"timeToFirstToken": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to first token", "isMeasurement": true },
403
"timeToFirstTokenEmitted": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to first token emitted (visible text)", "isMeasurement": true },
404
"timeToComplete": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to complete the request", "isMeasurement": true },
405
"issuedTime": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Timestamp when the request was issued", "isMeasurement": true },
406
"isVisionRequest": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether the request was for a vision model", "isMeasurement": true },
407
"isBYOK": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was for a BYOK model", "isMeasurement": true },
408
"isAuto": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was for an Auto model", "isMeasurement": true },
409
"bytesReceived": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of bytes received in the response", "isMeasurement": true },
410
"retryAfterError": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Error of the original request." },
411
"retryAfterErrorGitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id of the original request if available" },
412
"connectivityTestError": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Error of the connectivity test." },
413
"connectivityTestErrorGitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id of the connectivity test request if available" },
414
"retryAfterFilterCategory": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "If the response was filtered and this is a retry attempt, this contains the original filtered content category." },
415
"suspendEventSeen": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a system suspend event was seen during the request", "isMeasurement": true },
416
"resumeEventSeen": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a system resume event was seen during the request", "isMeasurement": true }
417
}
418
*/
419
this._telemetryService.sendTelemetryEvent('response.success', { github: true, microsoft: true }, {
420
source: 'byok.anthropic',
421
model: model.id,
422
requestId,
423
}, {
424
totalTokenMax: model.maxInputTokens ?? -1,
425
tokenCountMax: model.maxOutputTokens ?? -1,
426
promptTokenCount: result.usage?.prompt_tokens,
427
promptCacheTokenCount: result.usage?.prompt_tokens_details?.cached_tokens,
428
tokenCount: result.usage?.total_tokens,
429
completionTokens: result.usage?.completion_tokens,
430
timeToFirstToken: result.ttft,
431
timeToFirstTokenEmitted: result.ttfte,
432
timeToComplete: Date.now() - issuedTime,
433
issuedTime,
434
isBYOK: 1,
435
});
436
} catch (err) {
437
this._logService.error(`BYOK Anthropic error: ${toErrorMessage(err, true)}`);
438
pendingLoggedChatRequest.resolve({
439
type: ChatFetchResponseType.Unknown,
440
requestId,
441
serverRequestId: requestId,
442
reason: err.message
443
}, wrappedProgress.items.map((i): IResponseDelta => {
444
if (i instanceof LanguageModelTextPart) {
445
return { text: i.value };
446
} else if (i instanceof LanguageModelToolCallPart) {
447
return {
448
text: '',
449
copilotToolCalls: [{
450
name: i.name,
451
arguments: JSON.stringify(i.input),
452
id: i.callId
453
}]
454
};
455
} else if (i instanceof LanguageModelToolResultPart) {
456
// Handle tool results - extract text from content
457
const resultText = i.content.map(c => c instanceof LanguageModelTextPart ? c.value : '').join('');
458
return {
459
text: `[Tool Result ${i.callId}]: ${resultText}`
460
};
461
} else {
462
return { text: '' };
463
}
464
}));
465
throw err;
466
}
467
};
468
469
// Create OTel span and execute with trace context + CapturingToken
470
const executeRequest = async () => {
471
otelSpan = this._otelService.startSpan(`chat ${model.id}`, {
472
kind: SpanKind.CLIENT,
473
attributes: {
474
[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,
475
[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.ANTHROPIC,
476
[GenAiAttr.REQUEST_MODEL]: model.id,
477
[GenAiAttr.AGENT_NAME]: 'AnthropicBYOK',
478
[CopilotChatAttr.MAX_PROMPT_TOKENS]: model.maxInputTokens,
479
[StdAttr.SERVER_ADDRESS]: 'api.anthropic.com',
480
},
481
});
482
// Opt-in: capture input messages in OTel GenAI format
483
if (this._otelService.config.captureContent) {
484
// Tool definitions on the chat span (issue #299934) with `parameters`
485
// per OTel GenAI semantic conventions (issue #300318).
486
const toolDefs = toToolDefinitions(options.tools);
487
if (toolDefs) {
488
otelSpan.setAttribute(GenAiAttr.TOOL_DEFINITIONS, truncateForOTel(JSON.stringify(toolDefs)));
489
}
490
try {
491
const roleNames: Record<number, string> = { 1: 'user', 2: 'assistant', 3: 'system' };
492
const inputMsgs = messages.map(m => {
493
const msg = m as LanguageModelChatMessage;
494
const role = roleNames[msg.role] ?? String(msg.role);
495
const parts: Array<{ type: string; content?: string | unknown; id?: string; name?: string; arguments?: unknown; response?: unknown }> = [];
496
if (Array.isArray(msg.content)) {
497
for (const p of msg.content) {
498
if (p instanceof LanguageModelTextPart) {
499
parts.push({ type: 'text', content: p.value });
500
} else if (p instanceof LanguageModelToolCallPart) {
501
parts.push({ type: 'tool_call', id: p.callId, name: p.name, arguments: p.input });
502
} else if (p instanceof LanguageModelToolResultPart) {
503
const resultText = p.content.map((c: unknown) => c instanceof LanguageModelTextPart ? c.value : '').join('');
504
parts.push({ type: 'tool_call_response', id: p.callId, response: resultText });
505
}
506
}
507
}
508
if (parts.length === 0) {
509
parts.push({ type: 'text', content: '[non-text content]' });
510
}
511
return { role, parts };
512
});
513
otelSpan.setAttribute(GenAiAttr.INPUT_MESSAGES, truncateForOTel(JSON.stringify(inputMsgs)));
514
} catch { /* swallow */ }
515
}
516
try {
517
const result = capturingToken
518
? await runWithCapturingToken(capturingToken, doRequest)
519
: await doRequest();
520
otelSpan.setStatus(SpanStatusCode.OK);
521
return result;
522
} catch (err) {
523
otelSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));
524
throw err;
525
} finally {
526
otelSpan.end();
527
}
528
};
529
530
if (parentTraceContext) {
531
return this._otelService.runWithTraceContext(parentTraceContext, executeRequest);
532
}
533
return executeRequest();
534
}
535
536
async provideTokenCount(model: LanguageModelChatInformation, text: string | LanguageModelChatMessage | LanguageModelChatMessage2, token: CancellationToken): Promise<number> {
537
// Simple estimation - actual token count would require Claude's tokenizer
538
return Math.ceil(text.toString().length / 4);
539
}
540
541
private async _makeRequest(anthropicClient: Anthropic, progress: RecordedProgress<LMResponsePart>, params: Anthropic.Beta.Messages.MessageCreateParamsStreaming, betas: string[], token: CancellationToken, issuedTime: number): Promise<{ ttft: number | undefined; ttfte: number | undefined; usage: APIUsage | undefined; contextManagement: ContextManagementResponse | undefined }> {
542
const start = Date.now();
543
let ttft: number | undefined;
544
let ttfte: number | undefined;
545
546
const stream = await anthropicClient.beta.messages.create({
547
...params,
548
...(betas.length > 0 && { betas })
549
});
550
551
let pendingToolCall: {
552
toolId?: string;
553
name?: string;
554
jsonInput?: string;
555
} | undefined;
556
let pendingThinking: {
557
thinking?: string;
558
signature?: string;
559
} | undefined;
560
let pendingRedactedThinking: {
561
data: string;
562
} | undefined;
563
let pendingServerToolCall: {
564
toolId?: string;
565
name?: string;
566
jsonInput?: string;
567
type?: string;
568
} | undefined;
569
let usage: APIUsage | undefined;
570
let contextManagementResponse: ContextManagementResponse | undefined;
571
572
let hasText = false;
573
for await (const chunk of stream) {
574
if (token.isCancellationRequested) {
575
break;
576
}
577
578
if (ttft === undefined) {
579
ttft = Date.now() - start;
580
}
581
this._logService.trace(`chunk: ${JSON.stringify(chunk)}`);
582
583
if (chunk.type === 'content_block_start') {
584
if ('content_block' in chunk && chunk.content_block.type === 'tool_use') {
585
pendingToolCall = {
586
toolId: chunk.content_block.id,
587
name: chunk.content_block.name,
588
jsonInput: ''
589
};
590
} else if ('content_block' in chunk && chunk.content_block.type === 'server_tool_use') {
591
// Handle server-side tool use (e.g., web_search)
592
pendingServerToolCall = {
593
toolId: chunk.content_block.id,
594
name: chunk.content_block.name,
595
jsonInput: '',
596
type: chunk.content_block.name
597
};
598
progress.report(new LanguageModelTextPart('\n'));
599
600
} else if ('content_block' in chunk && chunk.content_block.type === 'thinking') {
601
pendingThinking = {
602
thinking: '',
603
signature: ''
604
};
605
} else if ('content_block' in chunk && chunk.content_block.type === 'redacted_thinking') {
606
const redactedBlock = chunk.content_block as Anthropic.Messages.RedactedThinkingBlock;
607
pendingRedactedThinking = {
608
data: redactedBlock.data
609
};
610
} else if ('content_block' in chunk && chunk.content_block.type === 'web_search_tool_result') {
611
if (!pendingServerToolCall || !pendingServerToolCall.toolId) {
612
continue;
613
}
614
615
const resultBlock = chunk.content_block as Anthropic.Messages.WebSearchToolResultBlock;
616
// Handle potential error in web search
617
if (!Array.isArray(resultBlock.content)) {
618
this._logService.error(`Web search error: ${(resultBlock.content as Anthropic.Messages.WebSearchToolResultError).error_code}`);
619
continue;
620
}
621
622
const results = resultBlock.content.map((result: Anthropic.Messages.WebSearchResultBlock) => ({
623
type: 'web_search_result',
624
url: result.url,
625
title: result.title,
626
page_age: result.page_age,
627
encrypted_content: result.encrypted_content
628
}));
629
630
// Format according to Anthropic's web_search_tool_result specification
631
const toolResult = {
632
type: 'web_search_tool_result',
633
tool_use_id: pendingServerToolCall.toolId,
634
content: results
635
};
636
637
const searchResults = JSON.stringify(toolResult, null, 2);
638
639
// TODO: @bhavyaus - instead of just pushing text, create a specialized WebSearchResult part
640
progress.report(new LanguageModelToolResultPart(
641
pendingServerToolCall.toolId!,
642
[new LanguageModelTextPart(searchResults)]
643
));
644
pendingServerToolCall = undefined;
645
}
646
continue;
647
}
648
649
if (chunk.type === 'content_block_delta') {
650
if (chunk.delta.type === 'text_delta') {
651
progress.report(new LanguageModelTextPart(chunk.delta.text || ''));
652
if (!hasText && chunk.delta.text?.length > 0) {
653
ttfte = Date.now() - issuedTime;
654
}
655
hasText ||= chunk.delta.text?.length > 0;
656
} else if (chunk.delta.type === 'citations_delta') {
657
if ('citation' in chunk.delta) {
658
// TODO: @bhavyaus - instead of just pushing text, create a specialized Citation part
659
const citation = chunk.delta.citation as Anthropic.Messages.CitationsWebSearchResultLocation;
660
if (citation.type === 'web_search_result_location') {
661
// Format citation according to Anthropic specification
662
const citationData = {
663
type: 'web_search_result_location',
664
url: citation.url,
665
title: citation.title,
666
encrypted_index: citation.encrypted_index,
667
cited_text: citation.cited_text
668
};
669
670
// Format citation as readable blockquote with source link
671
const referenceText = `\n> "${citation.cited_text}" — [${vscode.l10n.t('Source')}](${citation.url})\n\n`;
672
673
// Report formatted reference text to user
674
progress.report(new LanguageModelTextPart(referenceText));
675
676
// Store the citation data in the correct format for multi-turn conversations
677
progress.report(new LanguageModelToolResultPart(
678
'citation',
679
[new LanguageModelTextPart(JSON.stringify(citationData, null, 2))]
680
));
681
}
682
}
683
} else if (chunk.delta.type === 'thinking_delta') {
684
if (pendingThinking) {
685
pendingThinking.thinking = (pendingThinking.thinking || '') + (chunk.delta.thinking || '');
686
progress.report(new LanguageModelThinkingPart(chunk.delta.thinking || ''));
687
}
688
} else if (chunk.delta.type === 'signature_delta') {
689
// Accumulate signature
690
if (pendingThinking) {
691
pendingThinking.signature = (pendingThinking.signature || '') + (chunk.delta.signature || '');
692
}
693
} else if (chunk.delta.type === 'input_json_delta' && pendingToolCall) {
694
pendingToolCall.jsonInput = (pendingToolCall.jsonInput || '') + (chunk.delta.partial_json || '');
695
696
try {
697
// Try to parse the accumulated JSON to see if it's complete
698
const parsedJson = JSON.parse(pendingToolCall.jsonInput);
699
progress.report(new LanguageModelToolCallPart(
700
pendingToolCall.toolId!,
701
pendingToolCall.name!,
702
parsedJson
703
));
704
pendingToolCall = undefined;
705
} catch {
706
// JSON is not complete yet, continue accumulating
707
continue;
708
}
709
} else if (chunk.delta.type === 'input_json_delta' && pendingServerToolCall) {
710
pendingServerToolCall.jsonInput = (pendingServerToolCall.jsonInput || '') + (chunk.delta.partial_json || '');
711
}
712
}
713
714
if (chunk.type === 'content_block_stop') {
715
if (pendingToolCall) {
716
try {
717
const parsedJson = JSON.parse(pendingToolCall.jsonInput || '{}');
718
progress.report(
719
new LanguageModelToolCallPart(
720
pendingToolCall.toolId!,
721
pendingToolCall.name!,
722
parsedJson
723
)
724
);
725
} catch (e) {
726
console.error('Failed to parse tool call JSON:', e);
727
}
728
pendingToolCall = undefined;
729
} else if (pendingThinking) {
730
if (pendingThinking.signature) {
731
const finalThinkingPart = new LanguageModelThinkingPart('');
732
finalThinkingPart.metadata = {
733
signature: pendingThinking.signature,
734
_completeThinking: pendingThinking.thinking
735
};
736
progress.report(finalThinkingPart);
737
}
738
pendingThinking = undefined;
739
} else if (pendingRedactedThinking) {
740
pendingRedactedThinking = undefined;
741
}
742
}
743
744
if (chunk.type === 'message_start') {
745
// TODO final output tokens: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":46}}
746
usage = {
747
completion_tokens: -1,
748
prompt_tokens: chunk.message.usage.input_tokens + (chunk.message.usage.cache_creation_input_tokens ?? 0) + (chunk.message.usage.cache_read_input_tokens ?? 0),
749
total_tokens: -1,
750
// Cast needed: Anthropic returns cache_creation_input_tokens which APIUsage.prompt_tokens_details doesn't define
751
prompt_tokens_details: {
752
cached_tokens: chunk.message.usage.cache_read_input_tokens ?? 0,
753
cache_creation_input_tokens: chunk.message.usage.cache_creation_input_tokens
754
} as any
755
};
756
} else if (usage && chunk.type === 'message_delta') {
757
if (chunk.usage.output_tokens) {
758
usage.completion_tokens = chunk.usage.output_tokens;
759
usage.total_tokens = usage.prompt_tokens + chunk.usage.output_tokens;
760
}
761
// Handle context management response
762
if ('context_management' in chunk && chunk.context_management) {
763
contextManagementResponse = chunk.context_management as ContextManagementResponse;
764
const totalClearedTokens = contextManagementResponse.applied_edits.reduce(
765
(sum, edit) => sum + (edit.cleared_input_tokens || 0),
766
0
767
);
768
this._logService.info(`BYOK Anthropic context editing applied: cleared ${totalClearedTokens} tokens across ${contextManagementResponse.applied_edits.length} edits`);
769
// Emit context management via LanguageModelDataPart so it flows through to toolCallingLoop
770
progress.report(new LanguageModelDataPart(
771
new TextEncoder().encode(JSON.stringify(contextManagementResponse)),
772
CustomDataPartMimeTypes.ContextManagement
773
));
774
}
775
}
776
}
777
778
return { ttft, ttfte, usage, contextManagement: contextManagementResponse };
779
}
780
}
781
782