Path: blob/main/extensions/copilot/src/extension/chatSessions/claude/node/test/claudeCodeAgentOTel.spec.ts
13406 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import type { Options, PermissionMode, Query, SDKAssistantMessage, SDKResultMessage, SDKUserMessage as SDKUserMessageType } from '@anthropic-ai/claude-agent-sdk';6import type Anthropic from '@anthropic-ai/sdk';7import { randomUUID } from 'crypto';8import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';9import type * as vscode from 'vscode';10import { resolveOTelConfig } from '../../../../../platform/otel/common/index';11import { ICompletedSpanData, IOTelService } from '../../../../../platform/otel/common/otelService';12import { InMemoryOTelService } from '../../../../../platform/otel/node/inMemoryOTelService';13import { CancellationToken } from '../../../../../util/vs/base/common/cancellation';14import { DisposableStore } from '../../../../../util/vs/base/common/lifecycle';15import { IInstantiationService } from '../../../../../util/vs/platform/instantiation/common/instantiation';16import { createExtensionUnitTestingServices } from '../../../../test/node/services';17import { MockChatResponseStream } from '../../../../test/node/testHelpers';18import type { ClaudeFolderInfo } from '../../common/claudeFolderInfo';19import { ClaudeCodeSession } from '../claudeCodeAgent';20import { IClaudeCodeSdkService } from '../claudeCodeSdkService';21import { ClaudeLanguageModelServer } from '../claudeLanguageModelServer';22import { parseClaudeModelId } from '../claudeModelId';23import { IClaudeSessionStateService } from '../../common/claudeSessionStateService';2425const TEST_MODEL_ID_STRING = 'claude-3-sonnet';26const TEST_MODEL_ID = parseClaudeModelId(TEST_MODEL_ID_STRING);27const TEST_PERMISSION_MODE: PermissionMode = 'acceptEdits';28const TEST_FOLDER_INFO: ClaudeFolderInfo = { cwd: '/test/project', additionalDirectories: [] };2930function createMockLangModelServer(): ClaudeLanguageModelServer {31return {32incrementUserInitiatedMessageCount: vi.fn(),33getConfig: () => ({ port: 8080, nonce: 'test-nonce' }),34} as unknown as ClaudeLanguageModelServer;35}3637function createMockChatRequest(prompt = ''): vscode.ChatRequest {38return { prompt, references: [], tools: new Map(), id: 'test-request-id', toolInvocationToken: {} } as unknown as vscode.ChatRequest;39}4041function commitTestState(42sessionStateService: IClaudeSessionStateService,43sessionId: string,44): void {45sessionStateService.setModelIdForSession(sessionId, TEST_MODEL_ID);46sessionStateService.setPermissionModeForSession(sessionId, TEST_PERMISSION_MODE);47sessionStateService.setFolderInfoForSession(sessionId, TEST_FOLDER_INFO);48}4950/**51* Creates a mock SDK service that emits a configurable sequence of messages.52*/53function createToolCallSdkService(messageFactory: (sessionId: string) => AsyncGenerator<SDKAssistantMessage | SDKUserMessageType | SDKResultMessage, void, unknown>): IClaudeCodeSdkService {54return {55_serviceBrand: undefined,56async query(options: { prompt: AsyncIterable<SDKUserMessageType>; options: Options }) {57const prompt = options.prompt;58const generator = (async function* () {59for await (const msg of prompt) {60const sessionId = msg.session_id ?? '';61yield* messageFactory(sessionId);62}63})();64return {65[Symbol.asyncIterator]: () => generator,66setModel: async () => { },67setPermissionMode: async () => { },68abort: () => { },69} as unknown as Query;70},71async listSessions() { return []; },72async getSessionInfo() { return undefined; },73async getSessionMessages() { return []; },74async renameSession() { },75async forkSession() { return { sessionId: 'forked' }; },76async listSubagents() { return []; },77async getSubagentMessages() { return []; },78};79}8081function createOTelService() {82const config = resolveOTelConfig({ env: {}, extensionVersion: '0.0.0', sessionId: 'test' });83const otelService = new InMemoryOTelService(config);84const spans: ICompletedSpanData[] = [];85otelService.onDidCompleteSpan(span => spans.push(span));86return { otelService, spans };87}8889/** Creates a typed assistant message with tool_use content blocks */90function makeAssistantMessage(sessionId: string, content: Anthropic.Beta.Messages.BetaContentBlock[]): SDKAssistantMessage {91return {92type: 'assistant',93session_id: sessionId,94uuid: randomUUID(),95parent_tool_use_id: null,96message: {97id: `msg-${randomUUID()}`,98type: 'message',99role: 'assistant',100model: TEST_MODEL_ID_STRING,101content,102stop_reason: 'tool_use',103stop_sequence: null,104usage: { input_tokens: 0, output_tokens: 0 },105},106} as SDKAssistantMessage;107}108109/** Creates a typed user message with tool_result content blocks */110function makeUserMessage(sessionId: string, content: Anthropic.Messages.ToolResultBlockParam[]): SDKUserMessageType {111return {112type: 'user',113session_id: sessionId,114parent_tool_use_id: null,115message: {116role: 'user',117content,118},119} as SDKUserMessageType;120}121122/** Creates a standard result message to end a turn */123function makeResultMessage(sessionId: string): SDKResultMessage {124// SDKResultMessage requires deep NonNullableUsage fields that are irrelevant125// to OTel tests. Use the repo-standard pattern of as unknown as SDKResultMessage.126return {127type: 'result',128subtype: 'error_max_turns',129uuid: randomUUID(),130session_id: sessionId,131duration_ms: 0,132duration_api_ms: 0,133is_error: false,134num_turns: 0,135stop_reason: null,136total_cost_usd: 0,137usage: { input_tokens: 0, output_tokens: 0 },138modelUsage: {},139permission_denials: [],140errors: [],141} as unknown as SDKResultMessage;142}143144describe('Claude Session OTel Tool Spans', () => {145const store = new DisposableStore();146let spans: ICompletedSpanData[];147148beforeEach(() => {149spans = [];150});151152afterEach(() => {153store.clear();154vi.resetAllMocks();155});156157it('emits an execute_tool span for a successful tool call', async () => {158const sessionId = 'otel-test-1';159const sdkService = createToolCallSdkService(sid => (async function* () {160yield makeAssistantMessage(sid, [161{ type: 'tool_use', id: 'tu-1', name: 'Read', input: { file_path: '/foo.ts' } },162]);163164yield makeUserMessage(sid, [165{ type: 'tool_result', tool_use_id: 'tu-1', content: 'file contents here' },166]);167168yield makeResultMessage(sid);169})());170171const services = store.add(createExtensionUnitTestingServices());172const { otelService, spans: localSpans } = createOTelService();173spans = localSpans;174services.define(IOTelService, otelService);175services.define(IClaudeCodeSdkService, sdkService);176const accessor = services.createTestingAccessor();177const localInstantiationService = accessor.get(IInstantiationService);178const localSessionStateService = accessor.get(IClaudeSessionStateService);179180commitTestState(localSessionStateService, sessionId);181const session = store.add(localInstantiationService.createInstance(182ClaudeCodeSession, createMockLangModelServer(), sessionId, true183));184const stream = new MockChatResponseStream();185186await session.invoke(createMockChatRequest('read file'), stream, undefined, CancellationToken.None);187188// Should have a user_message span + an execute_tool span189const toolSpan = spans.find(s => s.name === 'execute_tool Read');190expect(toolSpan).toBeDefined();191expect(toolSpan!.attributes['gen_ai.operation.name']).toBe('execute_tool');192expect(toolSpan!.attributes['gen_ai.tool.name']).toBe('Read');193expect(toolSpan!.attributes['gen_ai.tool.call.id']).toBe('tu-1');194expect(toolSpan!.attributes['copilot_chat.chat_session_id']).toBe(sessionId);195expect(toolSpan!.status.code).toBe(1); // SpanStatusCode.OK196expect(toolSpan!.attributes['gen_ai.tool.call.arguments']).toContain('file_path');197expect(toolSpan!.attributes['gen_ai.tool.call.result']).toContain('file contents here');198});199200it('emits an execute_tool span with ERROR status for a failed tool call', async () => {201const sessionId = 'otel-test-2';202const sdkService = createToolCallSdkService(sid => (async function* () {203yield makeAssistantMessage(sid, [204{ type: 'tool_use', id: 'tu-err', name: 'Write', input: { file_path: '/readonly.ts', content: 'x' } },205]);206207yield makeUserMessage(sid, [208{ type: 'tool_result', tool_use_id: 'tu-err', content: 'Permission denied', is_error: true },209]);210211yield makeResultMessage(sid);212})());213214const services = store.add(createExtensionUnitTestingServices());215const { otelService, spans: localSpans } = createOTelService();216spans = localSpans;217services.define(IOTelService, otelService);218services.define(IClaudeCodeSdkService, sdkService);219const accessor = services.createTestingAccessor();220const localInstantiationService = accessor.get(IInstantiationService);221const localSessionStateService = accessor.get(IClaudeSessionStateService);222223commitTestState(localSessionStateService, sessionId);224const session = store.add(localInstantiationService.createInstance(225ClaudeCodeSession, createMockLangModelServer(), sessionId, true226));227const stream = new MockChatResponseStream();228229await session.invoke(createMockChatRequest('write file'), stream, undefined, CancellationToken.None);230231const toolSpan = spans.find(s => s.name === 'execute_tool Write');232expect(toolSpan).toBeDefined();233expect(toolSpan!.status.code).toBe(2); // SpanStatusCode.ERROR234expect(toolSpan!.status.message).toContain('Permission denied');235expect(toolSpan!.attributes['gen_ai.tool.call.result']).toContain('ERROR');236});237238it('correctly correlates multiple concurrent tool calls', async () => {239const sessionId = 'otel-test-3';240const sdkService = createToolCallSdkService(sid => (async function* () {241// Assistant emits two tool_use blocks in one message242yield makeAssistantMessage(sid, [243{ type: 'tool_use', id: 'tu-a', name: 'Read', input: { file_path: '/a.ts' } },244{ type: 'tool_use', id: 'tu-b', name: 'Glob', input: { pattern: '*.ts' } },245]);246247// Results come in reverse order248yield makeUserMessage(sid, [249{ type: 'tool_result', tool_use_id: 'tu-b', content: 'glob result' },250{ type: 'tool_result', tool_use_id: 'tu-a', content: 'read result' },251]);252253yield makeResultMessage(sid);254})());255256const services = store.add(createExtensionUnitTestingServices());257const { otelService, spans: localSpans } = createOTelService();258spans = localSpans;259services.define(IOTelService, otelService);260services.define(IClaudeCodeSdkService, sdkService);261const accessor = services.createTestingAccessor();262const localInstantiationService = accessor.get(IInstantiationService);263const localSessionStateService = accessor.get(IClaudeSessionStateService);264265commitTestState(localSessionStateService, sessionId);266const session = store.add(localInstantiationService.createInstance(267ClaudeCodeSession, createMockLangModelServer(), sessionId, true268));269const stream = new MockChatResponseStream();270271await session.invoke(createMockChatRequest('read and glob'), stream, undefined, CancellationToken.None);272273const readSpan = spans.find(s => s.name === 'execute_tool Read');274const globSpan = spans.find(s => s.name === 'execute_tool Glob');275expect(readSpan).toBeDefined();276expect(globSpan).toBeDefined();277expect(readSpan!.attributes['gen_ai.tool.call.result']).toContain('read result');278expect(globSpan!.attributes['gen_ai.tool.call.result']).toContain('glob result');279expect(readSpan!.status.code).toBe(1); // OK280expect(globSpan!.status.code).toBe(1); // OK281});282283it('emits user_message span for user prompts', async () => {284const sessionId = 'otel-test-4';285const sdkService = createToolCallSdkService(sid => (async function* () {286yield makeAssistantMessage(sid, [287{ type: 'text', text: 'Hello!', citations: [] },288]);289yield makeResultMessage(sid);290})());291292const services = store.add(createExtensionUnitTestingServices());293const { otelService, spans: localSpans } = createOTelService();294spans = localSpans;295services.define(IOTelService, otelService);296services.define(IClaudeCodeSdkService, sdkService);297const accessor = services.createTestingAccessor();298const localInstantiationService = accessor.get(IInstantiationService);299const localSessionStateService = accessor.get(IClaudeSessionStateService);300301commitTestState(localSessionStateService, sessionId);302const session = store.add(localInstantiationService.createInstance(303ClaudeCodeSession, createMockLangModelServer(), sessionId, true304));305const stream = new MockChatResponseStream();306307await session.invoke(createMockChatRequest('hello'), stream, undefined, CancellationToken.None);308309const userMsgSpan = spans.find(s => s.name === 'user_message');310expect(userMsgSpan).toBeDefined();311expect(userMsgSpan!.attributes['copilot_chat.chat_session_id']).toBe(sessionId);312});313314it('records tool_input as TOOL_CALL_ARGUMENTS', async () => {315const sessionId = 'otel-test-5';316const sdkService = createToolCallSdkService(sid => (async function* () {317yield makeAssistantMessage(sid, [318{ type: 'tool_use', id: 'tu-args', name: 'Bash', input: { command: 'ls -la' } },319]);320321yield makeUserMessage(sid, [322{ type: 'tool_result', tool_use_id: 'tu-args', content: 'output' },323]);324325yield makeResultMessage(sid);326})());327328const services = store.add(createExtensionUnitTestingServices());329const { otelService, spans: localSpans } = createOTelService();330spans = localSpans;331services.define(IOTelService, otelService);332services.define(IClaudeCodeSdkService, sdkService);333const accessor = services.createTestingAccessor();334const localInstantiationService = accessor.get(IInstantiationService);335const localSessionStateService = accessor.get(IClaudeSessionStateService);336337commitTestState(localSessionStateService, sessionId);338const session = store.add(localInstantiationService.createInstance(339ClaudeCodeSession, createMockLangModelServer(), sessionId, true340));341const stream = new MockChatResponseStream();342343await session.invoke(createMockChatRequest('run command'), stream, undefined, CancellationToken.None);344345const toolSpan = spans.find(s => s.name === 'execute_tool Bash');346expect(toolSpan).toBeDefined();347expect(toolSpan!.attributes['gen_ai.tool.call.arguments']).toContain('ls -la');348});349});350351352