Path: blob/main/extensions/copilot/src/platform/otel/common/test/agentTraceHierarchy.spec.ts
13406 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { describe, expect, it } from 'vitest';6import { CopilotChatAttr, GenAiAttr, GenAiOperationName, GenAiProviderName } from '../genAiAttributes';7import { emitAgentTurnEvent, emitSessionStartEvent } from '../genAiEvents';8import { GenAiMetrics } from '../genAiMetrics';9import { SpanKind, SpanStatusCode } from '../otelService';10import { CapturingOTelService } from './capturingOTelService';1112/**13* Verifies that the OTel instrumentation produces the correct span hierarchy,14* metric recordings, and event emissions for a complete agent interaction.15*16* Span hierarchy (expected):17* invoke_agent copilot [INTERNAL]18* ├── chat gpt-4o [CLIENT]19* ├── execute_tool readFile [INTERNAL]20* └── chat gpt-4o [CLIENT]21*22* Subagent trace propagation (via storeTraceContext/getStoredTraceContext):23* invoke_agent copilot24* ├── execute_tool runSubagent25* │ └── invoke_agent Explore (same traceId via parentTraceContext)26*/27describe('Agent Trace Hierarchy', () => {28it('produces invoke_agent, chat, and execute_tool spans with correct attributes', async () => {29const otel = new CapturingOTelService();3031// Simulate invoke_agent span32await otel.startActiveSpan('invoke_agent copilot', {33kind: SpanKind.INTERNAL,34attributes: {35[GenAiAttr.OPERATION_NAME]: GenAiOperationName.INVOKE_AGENT,36[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.GITHUB,37[GenAiAttr.AGENT_NAME]: 'copilot',38[GenAiAttr.CONVERSATION_ID]: 'conv-123',39},40}, async (agentSpan) => {41// Simulate chat span (LLM call)42const chatSpan = otel.startSpan('chat gpt-4o', {43kind: SpanKind.CLIENT,44attributes: {45[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,46[GenAiAttr.REQUEST_MODEL]: 'gpt-4o',47},48});49chatSpan.setAttributes({50[GenAiAttr.USAGE_INPUT_TOKENS]: 1500,51[GenAiAttr.USAGE_OUTPUT_TOKENS]: 250,52[GenAiAttr.RESPONSE_MODEL]: 'gpt-4o-2024-08-06',53});54chatSpan.setStatus(SpanStatusCode.OK);55chatSpan.end();5657// Simulate tool call span58const toolSpan = otel.startSpan('execute_tool readFile', {59kind: SpanKind.INTERNAL,60attributes: {61[GenAiAttr.OPERATION_NAME]: GenAiOperationName.EXECUTE_TOOL,62[GenAiAttr.TOOL_NAME]: 'readFile',63},64});65toolSpan.setStatus(SpanStatusCode.OK);66toolSpan.end();6768// Simulate second chat span69const chat2 = otel.startSpan('chat gpt-4o', {70kind: SpanKind.CLIENT,71attributes: {72[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,73[GenAiAttr.REQUEST_MODEL]: 'gpt-4o',74},75});76chat2.setStatus(SpanStatusCode.OK);77chat2.end();7879agentSpan.setStatus(SpanStatusCode.OK);80});8182// Verify all 4 spans created83expect(otel.spans).toHaveLength(4);8485// invoke_agent span86const agentSpan = otel.spans[0];87expect(agentSpan.name).toBe('invoke_agent copilot');88expect(agentSpan.kind).toBe(SpanKind.INTERNAL);89expect(agentSpan.attributes[GenAiAttr.OPERATION_NAME]).toBe('invoke_agent');90expect(agentSpan.attributes[GenAiAttr.AGENT_NAME]).toBe('copilot');91expect(agentSpan.statusCode).toBe(SpanStatusCode.OK);92expect(agentSpan.ended).toBe(true);9394// First chat span95const chatSpan = otel.spans[1];96expect(chatSpan.name).toBe('chat gpt-4o');97expect(chatSpan.kind).toBe(SpanKind.CLIENT);98expect(chatSpan.attributes[GenAiAttr.USAGE_INPUT_TOKENS]).toBe(1500);99expect(chatSpan.attributes[GenAiAttr.RESPONSE_MODEL]).toBe('gpt-4o-2024-08-06');100101// Tool span102const toolSpan = otel.spans[2];103expect(toolSpan.name).toBe('execute_tool readFile');104expect(toolSpan.kind).toBe(SpanKind.INTERNAL);105expect(toolSpan.attributes[GenAiAttr.TOOL_NAME]).toBe('readFile');106107// Second chat span108expect(otel.spans[3].name).toBe('chat gpt-4o');109});110111it('emits session start event and agent metrics', async () => {112const otel = new CapturingOTelService();113114emitSessionStartEvent(otel, 'sess-abc', 'gpt-4o', 'copilot');115GenAiMetrics.incrementSessionCount(otel);116GenAiMetrics.recordAgentDuration(otel, 'copilot', 15.2);117GenAiMetrics.recordAgentTurnCount(otel, 'copilot', 4);118emitAgentTurnEvent(otel, 0, 1500, 250, 2);119120// Session event121expect(otel.logRecords).toHaveLength(2); // session.start + agent.turn122expect(otel.logRecords[0].attributes?.['event.name']).toBe('copilot_chat.session.start');123124// Agent turn event125expect(otel.logRecords[1].attributes?.['event.name']).toBe('copilot_chat.agent.turn');126expect(otel.logRecords[1].attributes?.['turn.index']).toBe(0);127128// Metrics129expect(otel.counters).toHaveLength(1);130expect(otel.counters[0].name).toBe('copilot_chat.session.count');131expect(otel.metrics).toHaveLength(2);132expect(otel.metrics[0].name).toBe('copilot_chat.agent.invocation.duration');133expect(otel.metrics[1].name).toBe('copilot_chat.agent.turn.count');134});135136it('propagates trace context for subagent via store/retrieve', () => {137const otel = new CapturingOTelService();138const parentCtx = { traceId: 'aaaa0000bbbb1111cccc2222dddd3333', spanId: 'eeee4444ffff5555' };139140// Parent agent stores context when launching subagent141otel.storeTraceContext('subagent:req-123', parentCtx);142143// Subagent retrieves it144const restored = otel.getStoredTraceContext('subagent:req-123');145expect(restored).toEqual(parentCtx);146147// Create subagent span with parentTraceContext148otel.startSpan('invoke_agent Explore', {149kind: SpanKind.INTERNAL,150attributes: { [GenAiAttr.OPERATION_NAME]: GenAiOperationName.INVOKE_AGENT },151parentTraceContext: restored,152});153154const subagentSpan = otel.spans[0];155expect(subagentSpan.name).toBe('invoke_agent Explore');156expect(subagentSpan.parentTraceContext).toEqual(parentCtx);157158// Context is consumed (single-use)159expect(otel.getStoredTraceContext('subagent:req-123')).toBeUndefined();160});161162it('records error status on failed spans', async () => {163const otel = new CapturingOTelService();164165await otel.startActiveSpan('chat gpt-4o', { kind: SpanKind.CLIENT, attributes: {} }, async (span) => {166span.setStatus(SpanStatusCode.ERROR, 'timeout');167span.setAttribute('error.type', 'TimeoutError');168span.recordException(new Error('Request timed out'));169});170171const span = otel.spans[0];172expect(span.statusCode).toBe(SpanStatusCode.ERROR);173expect(span.statusMessage).toBe('timeout');174expect(span.attributes['error.type']).toBe('TimeoutError');175expect(span.exceptions).toHaveLength(1);176expect(span.ended).toBe(true);177});178179it('records tool call metrics and events correctly', () => {180const otel = new CapturingOTelService();181182// Simulate a successful and failed tool call183GenAiMetrics.recordToolCallCount(otel, 'readFile', true);184GenAiMetrics.recordToolCallDuration(otel, 'readFile', 50);185GenAiMetrics.recordToolCallCount(otel, 'runCommand', false);186GenAiMetrics.recordToolCallDuration(otel, 'runCommand', 5000);187188expect(otel.counters).toHaveLength(2);189expect(otel.counters[0].attributes?.[GenAiAttr.TOOL_NAME]).toBe('readFile');190expect(otel.counters[0].attributes?.success).toBe(true);191expect(otel.counters[1].attributes?.success).toBe(false);192193expect(otel.metrics).toHaveLength(2);194expect(otel.metrics[0].value).toBe(50);195expect(otel.metrics[1].value).toBe(5000);196});197198it('records chat operation duration and token usage metrics', () => {199const otel = new CapturingOTelService();200201GenAiMetrics.recordOperationDuration(otel, 3.5, {202operationName: GenAiOperationName.CHAT,203providerName: GenAiProviderName.GITHUB,204requestModel: 'gpt-4o',205});206GenAiMetrics.recordTokenUsage(otel, 1500, 'input', {207operationName: GenAiOperationName.CHAT,208providerName: GenAiProviderName.GITHUB,209requestModel: 'gpt-4o',210});211GenAiMetrics.recordTokenUsage(otel, 250, 'output', {212operationName: GenAiOperationName.CHAT,213providerName: GenAiProviderName.GITHUB,214requestModel: 'gpt-4o',215});216217expect(otel.metrics).toHaveLength(3);218expect(otel.metrics[0].name).toBe('gen_ai.client.operation.duration');219expect(otel.metrics[0].value).toBe(3.5);220expect(otel.metrics[1].name).toBe('gen_ai.client.token.usage');221expect(otel.metrics[1].value).toBe(1500);222expect(otel.metrics[2].name).toBe('gen_ai.client.token.usage');223expect(otel.metrics[2].value).toBe(250);224});225226it('records edit acceptance and survival metrics', () => {227const otel = new CapturingOTelService();228229GenAiMetrics.recordEditAcceptance(otel, 'inline_chat', 'accepted', 'typescript');230GenAiMetrics.recordEditAcceptance(otel, 'chat_editing_hunk', 'rejected', 'python');231GenAiMetrics.recordEditSurvivalFourGram(otel, 'inline_chat', 0.85, 30000);232GenAiMetrics.recordEditSurvivalNoRevert(otel, 'inline_chat', 0.92, 30000);233GenAiMetrics.recordChatEditOutcome(otel, 'chat_editing', 'accepted', 'typescript', false);234235// Acceptance counters236expect(otel.counters).toHaveLength(3);237expect(otel.counters[0].name).toBe('copilot_chat.edit.acceptance.count');238expect(otel.counters[0].attributes?.[CopilotChatAttr.EDIT_SOURCE]).toBe('inline_chat');239expect(otel.counters[0].attributes?.[CopilotChatAttr.EDIT_OUTCOME]).toBe('accepted');240expect(otel.counters[0].attributes?.[CopilotChatAttr.LANGUAGE_ID]).toBe('typescript');241242expect(otel.counters[1].name).toBe('copilot_chat.edit.acceptance.count');243expect(otel.counters[1].attributes?.[CopilotChatAttr.EDIT_OUTCOME]).toBe('rejected');244245// Chat edit outcome counter246expect(otel.counters[2].name).toBe('copilot_chat.chat_edit.outcome.count');247expect(otel.counters[2].attributes?.[CopilotChatAttr.EDIT_SOURCE]).toBe('chat_editing');248expect(otel.counters[2].attributes?.[CopilotChatAttr.EDIT_OUTCOME]).toBe('accepted');249expect(otel.counters[2].attributes?.[CopilotChatAttr.HAS_REMAINING_EDITS]).toBe(false);250251// Survival histograms252expect(otel.metrics).toHaveLength(2);253expect(otel.metrics[0].name).toBe('copilot_chat.edit.survival.four_gram');254expect(otel.metrics[0].value).toBe(0.85);255expect(otel.metrics[0].attributes?.[CopilotChatAttr.EDIT_SOURCE]).toBe('inline_chat');256expect(otel.metrics[0].attributes?.[CopilotChatAttr.TIME_DELAY_MS]).toBe(30000);257258expect(otel.metrics[1].name).toBe('copilot_chat.edit.survival.no_revert');259expect(otel.metrics[1].value).toBe(0.92);260});261262it('omits optional attributes when undefined', () => {263const otel = new CapturingOTelService();264265GenAiMetrics.recordEditAcceptance(otel, 'inline_chat', 'accepted', undefined);266GenAiMetrics.recordChatEditOutcome(otel, 'chat_editing', 'rejected', undefined, undefined);267268expect(otel.counters[0].attributes?.[CopilotChatAttr.LANGUAGE_ID]).toBeUndefined();269expect(otel.counters[1].attributes?.[CopilotChatAttr.LANGUAGE_ID]).toBeUndefined();270expect(otel.counters[1].attributes?.[CopilotChatAttr.HAS_REMAINING_EDITS]).toBeUndefined();271});272});273274275