Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/otel/common/test/agentTraceHierarchy.spec.ts
13406 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { describe, expect, it } from 'vitest';
7
import { CopilotChatAttr, GenAiAttr, GenAiOperationName, GenAiProviderName } from '../genAiAttributes';
8
import { emitAgentTurnEvent, emitSessionStartEvent } from '../genAiEvents';
9
import { GenAiMetrics } from '../genAiMetrics';
10
import { SpanKind, SpanStatusCode } from '../otelService';
11
import { CapturingOTelService } from './capturingOTelService';
12
13
/**
14
* Verifies that the OTel instrumentation produces the correct span hierarchy,
15
* metric recordings, and event emissions for a complete agent interaction.
16
*
17
* Span hierarchy (expected):
18
* invoke_agent copilot [INTERNAL]
19
* ├── chat gpt-4o [CLIENT]
20
* ├── execute_tool readFile [INTERNAL]
21
* └── chat gpt-4o [CLIENT]
22
*
23
* Subagent trace propagation (via storeTraceContext/getStoredTraceContext):
24
* invoke_agent copilot
25
* ├── execute_tool runSubagent
26
* │ └── invoke_agent Explore (same traceId via parentTraceContext)
27
*/
28
describe('Agent Trace Hierarchy', () => {
29
it('produces invoke_agent, chat, and execute_tool spans with correct attributes', async () => {
30
const otel = new CapturingOTelService();
31
32
// Simulate invoke_agent span
33
await otel.startActiveSpan('invoke_agent copilot', {
34
kind: SpanKind.INTERNAL,
35
attributes: {
36
[GenAiAttr.OPERATION_NAME]: GenAiOperationName.INVOKE_AGENT,
37
[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.GITHUB,
38
[GenAiAttr.AGENT_NAME]: 'copilot',
39
[GenAiAttr.CONVERSATION_ID]: 'conv-123',
40
},
41
}, async (agentSpan) => {
42
// Simulate chat span (LLM call)
43
const chatSpan = otel.startSpan('chat gpt-4o', {
44
kind: SpanKind.CLIENT,
45
attributes: {
46
[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,
47
[GenAiAttr.REQUEST_MODEL]: 'gpt-4o',
48
},
49
});
50
chatSpan.setAttributes({
51
[GenAiAttr.USAGE_INPUT_TOKENS]: 1500,
52
[GenAiAttr.USAGE_OUTPUT_TOKENS]: 250,
53
[GenAiAttr.RESPONSE_MODEL]: 'gpt-4o-2024-08-06',
54
});
55
chatSpan.setStatus(SpanStatusCode.OK);
56
chatSpan.end();
57
58
// Simulate tool call span
59
const toolSpan = otel.startSpan('execute_tool readFile', {
60
kind: SpanKind.INTERNAL,
61
attributes: {
62
[GenAiAttr.OPERATION_NAME]: GenAiOperationName.EXECUTE_TOOL,
63
[GenAiAttr.TOOL_NAME]: 'readFile',
64
},
65
});
66
toolSpan.setStatus(SpanStatusCode.OK);
67
toolSpan.end();
68
69
// Simulate second chat span
70
const chat2 = otel.startSpan('chat gpt-4o', {
71
kind: SpanKind.CLIENT,
72
attributes: {
73
[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,
74
[GenAiAttr.REQUEST_MODEL]: 'gpt-4o',
75
},
76
});
77
chat2.setStatus(SpanStatusCode.OK);
78
chat2.end();
79
80
agentSpan.setStatus(SpanStatusCode.OK);
81
});
82
83
// Verify all 4 spans created
84
expect(otel.spans).toHaveLength(4);
85
86
// invoke_agent span
87
const agentSpan = otel.spans[0];
88
expect(agentSpan.name).toBe('invoke_agent copilot');
89
expect(agentSpan.kind).toBe(SpanKind.INTERNAL);
90
expect(agentSpan.attributes[GenAiAttr.OPERATION_NAME]).toBe('invoke_agent');
91
expect(agentSpan.attributes[GenAiAttr.AGENT_NAME]).toBe('copilot');
92
expect(agentSpan.statusCode).toBe(SpanStatusCode.OK);
93
expect(agentSpan.ended).toBe(true);
94
95
// First chat span
96
const chatSpan = otel.spans[1];
97
expect(chatSpan.name).toBe('chat gpt-4o');
98
expect(chatSpan.kind).toBe(SpanKind.CLIENT);
99
expect(chatSpan.attributes[GenAiAttr.USAGE_INPUT_TOKENS]).toBe(1500);
100
expect(chatSpan.attributes[GenAiAttr.RESPONSE_MODEL]).toBe('gpt-4o-2024-08-06');
101
102
// Tool span
103
const toolSpan = otel.spans[2];
104
expect(toolSpan.name).toBe('execute_tool readFile');
105
expect(toolSpan.kind).toBe(SpanKind.INTERNAL);
106
expect(toolSpan.attributes[GenAiAttr.TOOL_NAME]).toBe('readFile');
107
108
// Second chat span
109
expect(otel.spans[3].name).toBe('chat gpt-4o');
110
});
111
112
it('emits session start event and agent metrics', async () => {
113
const otel = new CapturingOTelService();
114
115
emitSessionStartEvent(otel, 'sess-abc', 'gpt-4o', 'copilot');
116
GenAiMetrics.incrementSessionCount(otel);
117
GenAiMetrics.recordAgentDuration(otel, 'copilot', 15.2);
118
GenAiMetrics.recordAgentTurnCount(otel, 'copilot', 4);
119
emitAgentTurnEvent(otel, 0, 1500, 250, 2);
120
121
// Session event
122
expect(otel.logRecords).toHaveLength(2); // session.start + agent.turn
123
expect(otel.logRecords[0].attributes?.['event.name']).toBe('copilot_chat.session.start');
124
125
// Agent turn event
126
expect(otel.logRecords[1].attributes?.['event.name']).toBe('copilot_chat.agent.turn');
127
expect(otel.logRecords[1].attributes?.['turn.index']).toBe(0);
128
129
// Metrics
130
expect(otel.counters).toHaveLength(1);
131
expect(otel.counters[0].name).toBe('copilot_chat.session.count');
132
expect(otel.metrics).toHaveLength(2);
133
expect(otel.metrics[0].name).toBe('copilot_chat.agent.invocation.duration');
134
expect(otel.metrics[1].name).toBe('copilot_chat.agent.turn.count');
135
});
136
137
it('propagates trace context for subagent via store/retrieve', () => {
138
const otel = new CapturingOTelService();
139
const parentCtx = { traceId: 'aaaa0000bbbb1111cccc2222dddd3333', spanId: 'eeee4444ffff5555' };
140
141
// Parent agent stores context when launching subagent
142
otel.storeTraceContext('subagent:req-123', parentCtx);
143
144
// Subagent retrieves it
145
const restored = otel.getStoredTraceContext('subagent:req-123');
146
expect(restored).toEqual(parentCtx);
147
148
// Create subagent span with parentTraceContext
149
otel.startSpan('invoke_agent Explore', {
150
kind: SpanKind.INTERNAL,
151
attributes: { [GenAiAttr.OPERATION_NAME]: GenAiOperationName.INVOKE_AGENT },
152
parentTraceContext: restored,
153
});
154
155
const subagentSpan = otel.spans[0];
156
expect(subagentSpan.name).toBe('invoke_agent Explore');
157
expect(subagentSpan.parentTraceContext).toEqual(parentCtx);
158
159
// Context is consumed (single-use)
160
expect(otel.getStoredTraceContext('subagent:req-123')).toBeUndefined();
161
});
162
163
it('records error status on failed spans', async () => {
164
const otel = new CapturingOTelService();
165
166
await otel.startActiveSpan('chat gpt-4o', { kind: SpanKind.CLIENT, attributes: {} }, async (span) => {
167
span.setStatus(SpanStatusCode.ERROR, 'timeout');
168
span.setAttribute('error.type', 'TimeoutError');
169
span.recordException(new Error('Request timed out'));
170
});
171
172
const span = otel.spans[0];
173
expect(span.statusCode).toBe(SpanStatusCode.ERROR);
174
expect(span.statusMessage).toBe('timeout');
175
expect(span.attributes['error.type']).toBe('TimeoutError');
176
expect(span.exceptions).toHaveLength(1);
177
expect(span.ended).toBe(true);
178
});
179
180
it('records tool call metrics and events correctly', () => {
181
const otel = new CapturingOTelService();
182
183
// Simulate a successful and failed tool call
184
GenAiMetrics.recordToolCallCount(otel, 'readFile', true);
185
GenAiMetrics.recordToolCallDuration(otel, 'readFile', 50);
186
GenAiMetrics.recordToolCallCount(otel, 'runCommand', false);
187
GenAiMetrics.recordToolCallDuration(otel, 'runCommand', 5000);
188
189
expect(otel.counters).toHaveLength(2);
190
expect(otel.counters[0].attributes?.[GenAiAttr.TOOL_NAME]).toBe('readFile');
191
expect(otel.counters[0].attributes?.success).toBe(true);
192
expect(otel.counters[1].attributes?.success).toBe(false);
193
194
expect(otel.metrics).toHaveLength(2);
195
expect(otel.metrics[0].value).toBe(50);
196
expect(otel.metrics[1].value).toBe(5000);
197
});
198
199
it('records chat operation duration and token usage metrics', () => {
200
const otel = new CapturingOTelService();
201
202
GenAiMetrics.recordOperationDuration(otel, 3.5, {
203
operationName: GenAiOperationName.CHAT,
204
providerName: GenAiProviderName.GITHUB,
205
requestModel: 'gpt-4o',
206
});
207
GenAiMetrics.recordTokenUsage(otel, 1500, 'input', {
208
operationName: GenAiOperationName.CHAT,
209
providerName: GenAiProviderName.GITHUB,
210
requestModel: 'gpt-4o',
211
});
212
GenAiMetrics.recordTokenUsage(otel, 250, 'output', {
213
operationName: GenAiOperationName.CHAT,
214
providerName: GenAiProviderName.GITHUB,
215
requestModel: 'gpt-4o',
216
});
217
218
expect(otel.metrics).toHaveLength(3);
219
expect(otel.metrics[0].name).toBe('gen_ai.client.operation.duration');
220
expect(otel.metrics[0].value).toBe(3.5);
221
expect(otel.metrics[1].name).toBe('gen_ai.client.token.usage');
222
expect(otel.metrics[1].value).toBe(1500);
223
expect(otel.metrics[2].name).toBe('gen_ai.client.token.usage');
224
expect(otel.metrics[2].value).toBe(250);
225
});
226
227
it('records edit acceptance and survival metrics', () => {
228
const otel = new CapturingOTelService();
229
230
GenAiMetrics.recordEditAcceptance(otel, 'inline_chat', 'accepted', 'typescript');
231
GenAiMetrics.recordEditAcceptance(otel, 'chat_editing_hunk', 'rejected', 'python');
232
GenAiMetrics.recordEditSurvivalFourGram(otel, 'inline_chat', 0.85, 30000);
233
GenAiMetrics.recordEditSurvivalNoRevert(otel, 'inline_chat', 0.92, 30000);
234
GenAiMetrics.recordChatEditOutcome(otel, 'chat_editing', 'accepted', 'typescript', false);
235
236
// Acceptance counters
237
expect(otel.counters).toHaveLength(3);
238
expect(otel.counters[0].name).toBe('copilot_chat.edit.acceptance.count');
239
expect(otel.counters[0].attributes?.[CopilotChatAttr.EDIT_SOURCE]).toBe('inline_chat');
240
expect(otel.counters[0].attributes?.[CopilotChatAttr.EDIT_OUTCOME]).toBe('accepted');
241
expect(otel.counters[0].attributes?.[CopilotChatAttr.LANGUAGE_ID]).toBe('typescript');
242
243
expect(otel.counters[1].name).toBe('copilot_chat.edit.acceptance.count');
244
expect(otel.counters[1].attributes?.[CopilotChatAttr.EDIT_OUTCOME]).toBe('rejected');
245
246
// Chat edit outcome counter
247
expect(otel.counters[2].name).toBe('copilot_chat.chat_edit.outcome.count');
248
expect(otel.counters[2].attributes?.[CopilotChatAttr.EDIT_SOURCE]).toBe('chat_editing');
249
expect(otel.counters[2].attributes?.[CopilotChatAttr.EDIT_OUTCOME]).toBe('accepted');
250
expect(otel.counters[2].attributes?.[CopilotChatAttr.HAS_REMAINING_EDITS]).toBe(false);
251
252
// Survival histograms
253
expect(otel.metrics).toHaveLength(2);
254
expect(otel.metrics[0].name).toBe('copilot_chat.edit.survival.four_gram');
255
expect(otel.metrics[0].value).toBe(0.85);
256
expect(otel.metrics[0].attributes?.[CopilotChatAttr.EDIT_SOURCE]).toBe('inline_chat');
257
expect(otel.metrics[0].attributes?.[CopilotChatAttr.TIME_DELAY_MS]).toBe(30000);
258
259
expect(otel.metrics[1].name).toBe('copilot_chat.edit.survival.no_revert');
260
expect(otel.metrics[1].value).toBe(0.92);
261
});
262
263
it('omits optional attributes when undefined', () => {
264
const otel = new CapturingOTelService();
265
266
GenAiMetrics.recordEditAcceptance(otel, 'inline_chat', 'accepted', undefined);
267
GenAiMetrics.recordChatEditOutcome(otel, 'chat_editing', 'rejected', undefined, undefined);
268
269
expect(otel.counters[0].attributes?.[CopilotChatAttr.LANGUAGE_ID]).toBeUndefined();
270
expect(otel.counters[1].attributes?.[CopilotChatAttr.LANGUAGE_ID]).toBeUndefined();
271
expect(otel.counters[1].attributes?.[CopilotChatAttr.HAS_REMAINING_EDITS]).toBeUndefined();
272
});
273
});
274
275