Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/chatSessions/claude/node/test/claudeCodeAgentOTel.spec.ts
13406 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import type { Options, PermissionMode, Query, SDKAssistantMessage, SDKResultMessage, SDKUserMessage as SDKUserMessageType } from '@anthropic-ai/claude-agent-sdk';
7
import type Anthropic from '@anthropic-ai/sdk';
8
import { randomUUID } from 'crypto';
9
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
10
import type * as vscode from 'vscode';
11
import { resolveOTelConfig } from '../../../../../platform/otel/common/index';
12
import { ICompletedSpanData, IOTelService } from '../../../../../platform/otel/common/otelService';
13
import { InMemoryOTelService } from '../../../../../platform/otel/node/inMemoryOTelService';
14
import { CancellationToken } from '../../../../../util/vs/base/common/cancellation';
15
import { DisposableStore } from '../../../../../util/vs/base/common/lifecycle';
16
import { IInstantiationService } from '../../../../../util/vs/platform/instantiation/common/instantiation';
17
import { createExtensionUnitTestingServices } from '../../../../test/node/services';
18
import { MockChatResponseStream } from '../../../../test/node/testHelpers';
19
import type { ClaudeFolderInfo } from '../../common/claudeFolderInfo';
20
import { ClaudeCodeSession } from '../claudeCodeAgent';
21
import { IClaudeCodeSdkService } from '../claudeCodeSdkService';
22
import { ClaudeLanguageModelServer } from '../claudeLanguageModelServer';
23
import { parseClaudeModelId } from '../claudeModelId';
24
import { IClaudeSessionStateService } from '../../common/claudeSessionStateService';
25
26
const TEST_MODEL_ID_STRING = 'claude-3-sonnet';
27
const TEST_MODEL_ID = parseClaudeModelId(TEST_MODEL_ID_STRING);
28
const TEST_PERMISSION_MODE: PermissionMode = 'acceptEdits';
29
const TEST_FOLDER_INFO: ClaudeFolderInfo = { cwd: '/test/project', additionalDirectories: [] };
30
31
function createMockLangModelServer(): ClaudeLanguageModelServer {
32
return {
33
incrementUserInitiatedMessageCount: vi.fn(),
34
getConfig: () => ({ port: 8080, nonce: 'test-nonce' }),
35
} as unknown as ClaudeLanguageModelServer;
36
}
37
38
function createMockChatRequest(prompt = ''): vscode.ChatRequest {
39
return { prompt, references: [], tools: new Map(), id: 'test-request-id', toolInvocationToken: {} } as unknown as vscode.ChatRequest;
40
}
41
42
function commitTestState(
43
sessionStateService: IClaudeSessionStateService,
44
sessionId: string,
45
): void {
46
sessionStateService.setModelIdForSession(sessionId, TEST_MODEL_ID);
47
sessionStateService.setPermissionModeForSession(sessionId, TEST_PERMISSION_MODE);
48
sessionStateService.setFolderInfoForSession(sessionId, TEST_FOLDER_INFO);
49
}
50
51
/**
52
* Creates a mock SDK service that emits a configurable sequence of messages.
53
*/
54
function createToolCallSdkService(messageFactory: (sessionId: string) => AsyncGenerator<SDKAssistantMessage | SDKUserMessageType | SDKResultMessage, void, unknown>): IClaudeCodeSdkService {
55
return {
56
_serviceBrand: undefined,
57
async query(options: { prompt: AsyncIterable<SDKUserMessageType>; options: Options }) {
58
const prompt = options.prompt;
59
const generator = (async function* () {
60
for await (const msg of prompt) {
61
const sessionId = msg.session_id ?? '';
62
yield* messageFactory(sessionId);
63
}
64
})();
65
return {
66
[Symbol.asyncIterator]: () => generator,
67
setModel: async () => { },
68
setPermissionMode: async () => { },
69
abort: () => { },
70
} as unknown as Query;
71
},
72
async listSessions() { return []; },
73
async getSessionInfo() { return undefined; },
74
async getSessionMessages() { return []; },
75
async renameSession() { },
76
async forkSession() { return { sessionId: 'forked' }; },
77
async listSubagents() { return []; },
78
async getSubagentMessages() { return []; },
79
};
80
}
81
82
function createOTelService() {
83
const config = resolveOTelConfig({ env: {}, extensionVersion: '0.0.0', sessionId: 'test' });
84
const otelService = new InMemoryOTelService(config);
85
const spans: ICompletedSpanData[] = [];
86
otelService.onDidCompleteSpan(span => spans.push(span));
87
return { otelService, spans };
88
}
89
90
/** Creates a typed assistant message with tool_use content blocks */
91
function makeAssistantMessage(sessionId: string, content: Anthropic.Beta.Messages.BetaContentBlock[]): SDKAssistantMessage {
92
return {
93
type: 'assistant',
94
session_id: sessionId,
95
uuid: randomUUID(),
96
parent_tool_use_id: null,
97
message: {
98
id: `msg-${randomUUID()}`,
99
type: 'message',
100
role: 'assistant',
101
model: TEST_MODEL_ID_STRING,
102
content,
103
stop_reason: 'tool_use',
104
stop_sequence: null,
105
usage: { input_tokens: 0, output_tokens: 0 },
106
},
107
} as SDKAssistantMessage;
108
}
109
110
/** Creates a typed user message with tool_result content blocks */
111
function makeUserMessage(sessionId: string, content: Anthropic.Messages.ToolResultBlockParam[]): SDKUserMessageType {
112
return {
113
type: 'user',
114
session_id: sessionId,
115
parent_tool_use_id: null,
116
message: {
117
role: 'user',
118
content,
119
},
120
} as SDKUserMessageType;
121
}
122
123
/** Creates a standard result message to end a turn */
124
function makeResultMessage(sessionId: string): SDKResultMessage {
125
// SDKResultMessage requires deep NonNullableUsage fields that are irrelevant
126
// to OTel tests. Use the repo-standard pattern of as unknown as SDKResultMessage.
127
return {
128
type: 'result',
129
subtype: 'error_max_turns',
130
uuid: randomUUID(),
131
session_id: sessionId,
132
duration_ms: 0,
133
duration_api_ms: 0,
134
is_error: false,
135
num_turns: 0,
136
stop_reason: null,
137
total_cost_usd: 0,
138
usage: { input_tokens: 0, output_tokens: 0 },
139
modelUsage: {},
140
permission_denials: [],
141
errors: [],
142
} as unknown as SDKResultMessage;
143
}
144
145
describe('Claude Session OTel Tool Spans', () => {
146
const store = new DisposableStore();
147
let spans: ICompletedSpanData[];
148
149
beforeEach(() => {
150
spans = [];
151
});
152
153
afterEach(() => {
154
store.clear();
155
vi.resetAllMocks();
156
});
157
158
it('emits an execute_tool span for a successful tool call', async () => {
159
const sessionId = 'otel-test-1';
160
const sdkService = createToolCallSdkService(sid => (async function* () {
161
yield makeAssistantMessage(sid, [
162
{ type: 'tool_use', id: 'tu-1', name: 'Read', input: { file_path: '/foo.ts' } },
163
]);
164
165
yield makeUserMessage(sid, [
166
{ type: 'tool_result', tool_use_id: 'tu-1', content: 'file contents here' },
167
]);
168
169
yield makeResultMessage(sid);
170
})());
171
172
const services = store.add(createExtensionUnitTestingServices());
173
const { otelService, spans: localSpans } = createOTelService();
174
spans = localSpans;
175
services.define(IOTelService, otelService);
176
services.define(IClaudeCodeSdkService, sdkService);
177
const accessor = services.createTestingAccessor();
178
const localInstantiationService = accessor.get(IInstantiationService);
179
const localSessionStateService = accessor.get(IClaudeSessionStateService);
180
181
commitTestState(localSessionStateService, sessionId);
182
const session = store.add(localInstantiationService.createInstance(
183
ClaudeCodeSession, createMockLangModelServer(), sessionId, true
184
));
185
const stream = new MockChatResponseStream();
186
187
await session.invoke(createMockChatRequest('read file'), stream, undefined, CancellationToken.None);
188
189
// Should have a user_message span + an execute_tool span
190
const toolSpan = spans.find(s => s.name === 'execute_tool Read');
191
expect(toolSpan).toBeDefined();
192
expect(toolSpan!.attributes['gen_ai.operation.name']).toBe('execute_tool');
193
expect(toolSpan!.attributes['gen_ai.tool.name']).toBe('Read');
194
expect(toolSpan!.attributes['gen_ai.tool.call.id']).toBe('tu-1');
195
expect(toolSpan!.attributes['copilot_chat.chat_session_id']).toBe(sessionId);
196
expect(toolSpan!.status.code).toBe(1); // SpanStatusCode.OK
197
expect(toolSpan!.attributes['gen_ai.tool.call.arguments']).toContain('file_path');
198
expect(toolSpan!.attributes['gen_ai.tool.call.result']).toContain('file contents here');
199
});
200
201
it('emits an execute_tool span with ERROR status for a failed tool call', async () => {
202
const sessionId = 'otel-test-2';
203
const sdkService = createToolCallSdkService(sid => (async function* () {
204
yield makeAssistantMessage(sid, [
205
{ type: 'tool_use', id: 'tu-err', name: 'Write', input: { file_path: '/readonly.ts', content: 'x' } },
206
]);
207
208
yield makeUserMessage(sid, [
209
{ type: 'tool_result', tool_use_id: 'tu-err', content: 'Permission denied', is_error: true },
210
]);
211
212
yield makeResultMessage(sid);
213
})());
214
215
const services = store.add(createExtensionUnitTestingServices());
216
const { otelService, spans: localSpans } = createOTelService();
217
spans = localSpans;
218
services.define(IOTelService, otelService);
219
services.define(IClaudeCodeSdkService, sdkService);
220
const accessor = services.createTestingAccessor();
221
const localInstantiationService = accessor.get(IInstantiationService);
222
const localSessionStateService = accessor.get(IClaudeSessionStateService);
223
224
commitTestState(localSessionStateService, sessionId);
225
const session = store.add(localInstantiationService.createInstance(
226
ClaudeCodeSession, createMockLangModelServer(), sessionId, true
227
));
228
const stream = new MockChatResponseStream();
229
230
await session.invoke(createMockChatRequest('write file'), stream, undefined, CancellationToken.None);
231
232
const toolSpan = spans.find(s => s.name === 'execute_tool Write');
233
expect(toolSpan).toBeDefined();
234
expect(toolSpan!.status.code).toBe(2); // SpanStatusCode.ERROR
235
expect(toolSpan!.status.message).toContain('Permission denied');
236
expect(toolSpan!.attributes['gen_ai.tool.call.result']).toContain('ERROR');
237
});
238
239
it('correctly correlates multiple concurrent tool calls', async () => {
240
const sessionId = 'otel-test-3';
241
const sdkService = createToolCallSdkService(sid => (async function* () {
242
// Assistant emits two tool_use blocks in one message
243
yield makeAssistantMessage(sid, [
244
{ type: 'tool_use', id: 'tu-a', name: 'Read', input: { file_path: '/a.ts' } },
245
{ type: 'tool_use', id: 'tu-b', name: 'Glob', input: { pattern: '*.ts' } },
246
]);
247
248
// Results come in reverse order
249
yield makeUserMessage(sid, [
250
{ type: 'tool_result', tool_use_id: 'tu-b', content: 'glob result' },
251
{ type: 'tool_result', tool_use_id: 'tu-a', content: 'read result' },
252
]);
253
254
yield makeResultMessage(sid);
255
})());
256
257
const services = store.add(createExtensionUnitTestingServices());
258
const { otelService, spans: localSpans } = createOTelService();
259
spans = localSpans;
260
services.define(IOTelService, otelService);
261
services.define(IClaudeCodeSdkService, sdkService);
262
const accessor = services.createTestingAccessor();
263
const localInstantiationService = accessor.get(IInstantiationService);
264
const localSessionStateService = accessor.get(IClaudeSessionStateService);
265
266
commitTestState(localSessionStateService, sessionId);
267
const session = store.add(localInstantiationService.createInstance(
268
ClaudeCodeSession, createMockLangModelServer(), sessionId, true
269
));
270
const stream = new MockChatResponseStream();
271
272
await session.invoke(createMockChatRequest('read and glob'), stream, undefined, CancellationToken.None);
273
274
const readSpan = spans.find(s => s.name === 'execute_tool Read');
275
const globSpan = spans.find(s => s.name === 'execute_tool Glob');
276
expect(readSpan).toBeDefined();
277
expect(globSpan).toBeDefined();
278
expect(readSpan!.attributes['gen_ai.tool.call.result']).toContain('read result');
279
expect(globSpan!.attributes['gen_ai.tool.call.result']).toContain('glob result');
280
expect(readSpan!.status.code).toBe(1); // OK
281
expect(globSpan!.status.code).toBe(1); // OK
282
});
283
284
it('emits user_message span for user prompts', async () => {
285
const sessionId = 'otel-test-4';
286
const sdkService = createToolCallSdkService(sid => (async function* () {
287
yield makeAssistantMessage(sid, [
288
{ type: 'text', text: 'Hello!', citations: [] },
289
]);
290
yield makeResultMessage(sid);
291
})());
292
293
const services = store.add(createExtensionUnitTestingServices());
294
const { otelService, spans: localSpans } = createOTelService();
295
spans = localSpans;
296
services.define(IOTelService, otelService);
297
services.define(IClaudeCodeSdkService, sdkService);
298
const accessor = services.createTestingAccessor();
299
const localInstantiationService = accessor.get(IInstantiationService);
300
const localSessionStateService = accessor.get(IClaudeSessionStateService);
301
302
commitTestState(localSessionStateService, sessionId);
303
const session = store.add(localInstantiationService.createInstance(
304
ClaudeCodeSession, createMockLangModelServer(), sessionId, true
305
));
306
const stream = new MockChatResponseStream();
307
308
await session.invoke(createMockChatRequest('hello'), stream, undefined, CancellationToken.None);
309
310
const userMsgSpan = spans.find(s => s.name === 'user_message');
311
expect(userMsgSpan).toBeDefined();
312
expect(userMsgSpan!.attributes['copilot_chat.chat_session_id']).toBe(sessionId);
313
});
314
315
it('records tool_input as TOOL_CALL_ARGUMENTS', async () => {
316
const sessionId = 'otel-test-5';
317
const sdkService = createToolCallSdkService(sid => (async function* () {
318
yield makeAssistantMessage(sid, [
319
{ type: 'tool_use', id: 'tu-args', name: 'Bash', input: { command: 'ls -la' } },
320
]);
321
322
yield makeUserMessage(sid, [
323
{ type: 'tool_result', tool_use_id: 'tu-args', content: 'output' },
324
]);
325
326
yield makeResultMessage(sid);
327
})());
328
329
const services = store.add(createExtensionUnitTestingServices());
330
const { otelService, spans: localSpans } = createOTelService();
331
spans = localSpans;
332
services.define(IOTelService, otelService);
333
services.define(IClaudeCodeSdkService, sdkService);
334
const accessor = services.createTestingAccessor();
335
const localInstantiationService = accessor.get(IInstantiationService);
336
const localSessionStateService = accessor.get(IClaudeSessionStateService);
337
338
commitTestState(localSessionStateService, sessionId);
339
const session = store.add(localInstantiationService.createInstance(
340
ClaudeCodeSession, createMockLangModelServer(), sessionId, true
341
));
342
const stream = new MockChatResponseStream();
343
344
await session.invoke(createMockChatRequest('run command'), stream, undefined, CancellationToken.None);
345
346
const toolSpan = spans.find(s => s.name === 'execute_tool Bash');
347
expect(toolSpan).toBeDefined();
348
expect(toolSpan!.attributes['gen_ai.tool.call.arguments']).toContain('ls -la');
349
});
350
});
351
352