CoCalc -- agentTraceHierarchy.spec.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/otel/common/test/agentTraceHierarchy.spec.ts
¹³⁴⁰⁶ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { describe, expect, it } from 'vitest';
7
import { CopilotChatAttr, GenAiAttr, GenAiOperationName, GenAiProviderName } from '../genAiAttributes';
8
import { emitAgentTurnEvent, emitSessionStartEvent } from '../genAiEvents';
9
import { GenAiMetrics } from '../genAiMetrics';
10
import { SpanKind, SpanStatusCode } from '../otelService';
11
import { CapturingOTelService } from './capturingOTelService';
12

13
/**
14
 * Verifies that the OTel instrumentation produces the correct span hierarchy,
15
 * metric recordings, and event emissions for a complete agent interaction.
16
 *
17
 * Span hierarchy (expected):
18
 *   invoke_agent copilot        [INTERNAL]
19
 *     ├── chat gpt-4o           [CLIENT]
20
 *     ├── execute_tool readFile  [INTERNAL]
21
 *     └── chat gpt-4o           [CLIENT]
22
 *
23
 * Subagent trace propagation (via storeTraceContext/getStoredTraceContext):
24
 *   invoke_agent copilot
25
 *     ├── execute_tool runSubagent
26
 *     │   └── invoke_agent Explore  (same traceId via parentTraceContext)
27
 */
28
describe('Agent Trace Hierarchy', () => {
29
	it('produces invoke_agent, chat, and execute_tool spans with correct attributes', async () => {
30
		const otel = new CapturingOTelService();
31

32
		// Simulate invoke_agent span
33
		await otel.startActiveSpan('invoke_agent copilot', {
34
			kind: SpanKind.INTERNAL,
35
			attributes: {
36
				[GenAiAttr.OPERATION_NAME]: GenAiOperationName.INVOKE_AGENT,
37
				[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.GITHUB,
38
				[GenAiAttr.AGENT_NAME]: 'copilot',
39
				[GenAiAttr.CONVERSATION_ID]: 'conv-123',
40
			},
41
		}, async (agentSpan) => {
42
			// Simulate chat span (LLM call)
43
			const chatSpan = otel.startSpan('chat gpt-4o', {
44
				kind: SpanKind.CLIENT,
45
				attributes: {
46
					[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,
47
					[GenAiAttr.REQUEST_MODEL]: 'gpt-4o',
48
				},
49
			});
50
			chatSpan.setAttributes({
51
				[GenAiAttr.USAGE_INPUT_TOKENS]: 1500,
52
				[GenAiAttr.USAGE_OUTPUT_TOKENS]: 250,
53
				[GenAiAttr.RESPONSE_MODEL]: 'gpt-4o-2024-08-06',
54
			});
55
			chatSpan.setStatus(SpanStatusCode.OK);
56
			chatSpan.end();
57

58
			// Simulate tool call span
59
			const toolSpan = otel.startSpan('execute_tool readFile', {
60
				kind: SpanKind.INTERNAL,
61
				attributes: {
62
					[GenAiAttr.OPERATION_NAME]: GenAiOperationName.EXECUTE_TOOL,
63
					[GenAiAttr.TOOL_NAME]: 'readFile',
64
				},
65
			});
66
			toolSpan.setStatus(SpanStatusCode.OK);
67
			toolSpan.end();
68

69
			// Simulate second chat span
70
			const chat2 = otel.startSpan('chat gpt-4o', {
71
				kind: SpanKind.CLIENT,
72
				attributes: {
73
					[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,
74
					[GenAiAttr.REQUEST_MODEL]: 'gpt-4o',
75
				},
76
			});
77
			chat2.setStatus(SpanStatusCode.OK);
78
			chat2.end();
79

80
			agentSpan.setStatus(SpanStatusCode.OK);
81
		});
82

83
		// Verify all 4 spans created
84
		expect(otel.spans).toHaveLength(4);
85

86
		// invoke_agent span
87
		const agentSpan = otel.spans[0];
88
		expect(agentSpan.name).toBe('invoke_agent copilot');
89
		expect(agentSpan.kind).toBe(SpanKind.INTERNAL);
90
		expect(agentSpan.attributes[GenAiAttr.OPERATION_NAME]).toBe('invoke_agent');
91
		expect(agentSpan.attributes[GenAiAttr.AGENT_NAME]).toBe('copilot');
92
		expect(agentSpan.statusCode).toBe(SpanStatusCode.OK);
93
		expect(agentSpan.ended).toBe(true);
94

95
		// First chat span
96
		const chatSpan = otel.spans[1];
97
		expect(chatSpan.name).toBe('chat gpt-4o');
98
		expect(chatSpan.kind).toBe(SpanKind.CLIENT);
99
		expect(chatSpan.attributes[GenAiAttr.USAGE_INPUT_TOKENS]).toBe(1500);
100
		expect(chatSpan.attributes[GenAiAttr.RESPONSE_MODEL]).toBe('gpt-4o-2024-08-06');
101

102
		// Tool span
103
		const toolSpan = otel.spans[2];
104
		expect(toolSpan.name).toBe('execute_tool readFile');
105
		expect(toolSpan.kind).toBe(SpanKind.INTERNAL);
106
		expect(toolSpan.attributes[GenAiAttr.TOOL_NAME]).toBe('readFile');
107

108
		// Second chat span
109
		expect(otel.spans[3].name).toBe('chat gpt-4o');
110
	});
111

112
	it('emits session start event and agent metrics', async () => {
113
		const otel = new CapturingOTelService();
114

115
		emitSessionStartEvent(otel, 'sess-abc', 'gpt-4o', 'copilot');
116
		GenAiMetrics.incrementSessionCount(otel);
117
		GenAiMetrics.recordAgentDuration(otel, 'copilot', 15.2);
118
		GenAiMetrics.recordAgentTurnCount(otel, 'copilot', 4);
119
		emitAgentTurnEvent(otel, 0, 1500, 250, 2);
120

121
		// Session event
122
		expect(otel.logRecords).toHaveLength(2); // session.start + agent.turn
123
		expect(otel.logRecords[0].attributes?.['event.name']).toBe('copilot_chat.session.start');
124

125
		// Agent turn event
126
		expect(otel.logRecords[1].attributes?.['event.name']).toBe('copilot_chat.agent.turn');
127
		expect(otel.logRecords[1].attributes?.['turn.index']).toBe(0);
128

129
		// Metrics
130
		expect(otel.counters).toHaveLength(1);
131
		expect(otel.counters[0].name).toBe('copilot_chat.session.count');
132
		expect(otel.metrics).toHaveLength(2);
133
		expect(otel.metrics[0].name).toBe('copilot_chat.agent.invocation.duration');
134
		expect(otel.metrics[1].name).toBe('copilot_chat.agent.turn.count');
135
	});
136

137
	it('propagates trace context for subagent via store/retrieve', () => {
138
		const otel = new CapturingOTelService();
139
		const parentCtx = { traceId: 'aaaa0000bbbb1111cccc2222dddd3333', spanId: 'eeee4444ffff5555' };
140

141
		// Parent agent stores context when launching subagent
142
		otel.storeTraceContext('subagent:req-123', parentCtx);
143

144
		// Subagent retrieves it
145
		const restored = otel.getStoredTraceContext('subagent:req-123');
146
		expect(restored).toEqual(parentCtx);
147

148
		// Create subagent span with parentTraceContext
149
		otel.startSpan('invoke_agent Explore', {
150
			kind: SpanKind.INTERNAL,
151
			attributes: { [GenAiAttr.OPERATION_NAME]: GenAiOperationName.INVOKE_AGENT },
152
			parentTraceContext: restored,
153
		});
154

155
		const subagentSpan = otel.spans[0];
156
		expect(subagentSpan.name).toBe('invoke_agent Explore');
157
		expect(subagentSpan.parentTraceContext).toEqual(parentCtx);
158

159
		// Context is consumed (single-use)
160
		expect(otel.getStoredTraceContext('subagent:req-123')).toBeUndefined();
161
	});
162

163
	it('records error status on failed spans', async () => {
164
		const otel = new CapturingOTelService();
165

166
		await otel.startActiveSpan('chat gpt-4o', { kind: SpanKind.CLIENT, attributes: {} }, async (span) => {
167
			span.setStatus(SpanStatusCode.ERROR, 'timeout');
168
			span.setAttribute('error.type', 'TimeoutError');
169
			span.recordException(new Error('Request timed out'));
170
		});
171

172
		const span = otel.spans[0];
173
		expect(span.statusCode).toBe(SpanStatusCode.ERROR);
174
		expect(span.statusMessage).toBe('timeout');
175
		expect(span.attributes['error.type']).toBe('TimeoutError');
176
		expect(span.exceptions).toHaveLength(1);
177
		expect(span.ended).toBe(true);
178
	});
179

180
	it('records tool call metrics and events correctly', () => {
181
		const otel = new CapturingOTelService();
182

183
		// Simulate a successful and failed tool call
184
		GenAiMetrics.recordToolCallCount(otel, 'readFile', true);
185
		GenAiMetrics.recordToolCallDuration(otel, 'readFile', 50);
186
		GenAiMetrics.recordToolCallCount(otel, 'runCommand', false);
187
		GenAiMetrics.recordToolCallDuration(otel, 'runCommand', 5000);
188

189
		expect(otel.counters).toHaveLength(2);
190
		expect(otel.counters[0].attributes?.[GenAiAttr.TOOL_NAME]).toBe('readFile');
191
		expect(otel.counters[0].attributes?.success).toBe(true);
192
		expect(otel.counters[1].attributes?.success).toBe(false);
193

194
		expect(otel.metrics).toHaveLength(2);
195
		expect(otel.metrics[0].value).toBe(50);
196
		expect(otel.metrics[1].value).toBe(5000);
197
	});
198

199
	it('records chat operation duration and token usage metrics', () => {
200
		const otel = new CapturingOTelService();
201

202
		GenAiMetrics.recordOperationDuration(otel, 3.5, {
203
			operationName: GenAiOperationName.CHAT,
204
			providerName: GenAiProviderName.GITHUB,
205
			requestModel: 'gpt-4o',
206
		});
207
		GenAiMetrics.recordTokenUsage(otel, 1500, 'input', {
208
			operationName: GenAiOperationName.CHAT,
209
			providerName: GenAiProviderName.GITHUB,
210
			requestModel: 'gpt-4o',
211
		});
212
		GenAiMetrics.recordTokenUsage(otel, 250, 'output', {
213
			operationName: GenAiOperationName.CHAT,
214
			providerName: GenAiProviderName.GITHUB,
215
			requestModel: 'gpt-4o',
216
		});
217

218
		expect(otel.metrics).toHaveLength(3);
219
		expect(otel.metrics[0].name).toBe('gen_ai.client.operation.duration');
220
		expect(otel.metrics[0].value).toBe(3.5);
221
		expect(otel.metrics[1].name).toBe('gen_ai.client.token.usage');
222
		expect(otel.metrics[1].value).toBe(1500);
223
		expect(otel.metrics[2].name).toBe('gen_ai.client.token.usage');
224
		expect(otel.metrics[2].value).toBe(250);
225
	});
226

227
	it('records edit acceptance and survival metrics', () => {
228
		const otel = new CapturingOTelService();
229

230
		GenAiMetrics.recordEditAcceptance(otel, 'inline_chat', 'accepted', 'typescript');
231
		GenAiMetrics.recordEditAcceptance(otel, 'chat_editing_hunk', 'rejected', 'python');
232
		GenAiMetrics.recordEditSurvivalFourGram(otel, 'inline_chat', 0.85, 30000);
233
		GenAiMetrics.recordEditSurvivalNoRevert(otel, 'inline_chat', 0.92, 30000);
234
		GenAiMetrics.recordChatEditOutcome(otel, 'chat_editing', 'accepted', 'typescript', false);
235

236
		// Acceptance counters
237
		expect(otel.counters).toHaveLength(3);
238
		expect(otel.counters[0].name).toBe('copilot_chat.edit.acceptance.count');
239
		expect(otel.counters[0].attributes?.[CopilotChatAttr.EDIT_SOURCE]).toBe('inline_chat');
240
		expect(otel.counters[0].attributes?.[CopilotChatAttr.EDIT_OUTCOME]).toBe('accepted');
241
		expect(otel.counters[0].attributes?.[CopilotChatAttr.LANGUAGE_ID]).toBe('typescript');
242

243
		expect(otel.counters[1].name).toBe('copilot_chat.edit.acceptance.count');
244
		expect(otel.counters[1].attributes?.[CopilotChatAttr.EDIT_OUTCOME]).toBe('rejected');
245

246
		// Chat edit outcome counter
247
		expect(otel.counters[2].name).toBe('copilot_chat.chat_edit.outcome.count');
248
		expect(otel.counters[2].attributes?.[CopilotChatAttr.EDIT_SOURCE]).toBe('chat_editing');
249
		expect(otel.counters[2].attributes?.[CopilotChatAttr.EDIT_OUTCOME]).toBe('accepted');
250
		expect(otel.counters[2].attributes?.[CopilotChatAttr.HAS_REMAINING_EDITS]).toBe(false);
251

252
		// Survival histograms
253
		expect(otel.metrics).toHaveLength(2);
254
		expect(otel.metrics[0].name).toBe('copilot_chat.edit.survival.four_gram');
255
		expect(otel.metrics[0].value).toBe(0.85);
256
		expect(otel.metrics[0].attributes?.[CopilotChatAttr.EDIT_SOURCE]).toBe('inline_chat');
257
		expect(otel.metrics[0].attributes?.[CopilotChatAttr.TIME_DELAY_MS]).toBe(30000);
258

259
		expect(otel.metrics[1].name).toBe('copilot_chat.edit.survival.no_revert');
260
		expect(otel.metrics[1].value).toBe(0.92);
261
	});
262

263
	it('omits optional attributes when undefined', () => {
264
		const otel = new CapturingOTelService();
265

266
		GenAiMetrics.recordEditAcceptance(otel, 'inline_chat', 'accepted', undefined);
267
		GenAiMetrics.recordChatEditOutcome(otel, 'chat_editing', 'rejected', undefined, undefined);
268

269
		expect(otel.counters[0].attributes?.[CopilotChatAttr.LANGUAGE_ID]).toBeUndefined();
270
		expect(otel.counters[1].attributes?.[CopilotChatAttr.LANGUAGE_ID]).toBeUndefined();
271
		expect(otel.counters[1].attributes?.[CopilotChatAttr.HAS_REMAINING_EDITS]).toBeUndefined();
272
	});
273
});
274

275
Product

Resources

Company