CoCalc -- promptCategorizer.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/prompt/node/promptCategorizer.ts
¹³³⁹⁹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import type * as vscode from 'vscode';
7
import { ICopilotTokenStore } from '../../../platform/authentication/common/copilotTokenStore';
8
import { ChatFetchResponseType, ChatLocation } from '../../../platform/chat/common/commonTypes';
9
import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
10
import { ILogService } from '../../../platform/log/common/logService';
11
import { ICopilotToolCall } from '../../../platform/networking/common/fetch';
12
import { CapturingToken } from '../../../platform/requestLogger/common/capturingToken';
13
import { IRequestLogger } from '../../../platform/requestLogger/common/requestLogger';
14
import { ITabsAndEditorsService } from '../../../platform/tabs/common/tabsAndEditorsService';
15
import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
16
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
17
import { createServiceIdentifier } from '../../../util/common/services';
18
import { CancellationTokenSource } from '../../../util/vs/base/common/cancellation';
19
import { isCancellationError } from '../../../util/vs/base/common/errors';
20
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
21
import { renderPromptElement } from '../../prompts/node/base/promptRenderer';
22
import { PromptCategorizationPrompt } from '../../prompts/node/panel/promptCategorization';
23
import { CATEGORIZE_PROMPT_TOOL_NAME, CATEGORIZE_PROMPT_TOOL_SCHEMA, isValidDomain, isValidIntent, isValidScope, PromptClassification } from '../common/promptCategorizationTaxonomy';
24

25
/** Experiment flag to enable prompt categorization */
26
const EXP_FLAG_PROMPT_CATEGORIZATION = 'copilotchat.promptCategorization';
27

28
export const IPromptCategorizerService = createServiceIdentifier<IPromptCategorizerService>('IPromptCategorizerService');
29

30
export interface IPromptCategorizerService {
31
	readonly _serviceBrand: undefined;
32

33
	/**
34
	 * Categorizes the first user prompt in a chat session.
35
	 * This runs as a fire-and-forget operation and sends results to telemetry.
36
	 * Only runs for panel location, first attempt, non-subagent requests.
37
	 * Requires telemetry to be enabled and experiment flag to be set.
38
	 *
39
	 * @param telemetryMessageId The extension-generated request ID (shared with panel.request telemetry)
40
	 */
41
	categorizePrompt(request: vscode.ChatRequest, context: vscode.ChatContext, telemetryMessageId: string): void;
42
}
43

44
// Categorization outcome values for telemetry
45
// Success: outcome == '' — full classification with valid timeEstimates
46
// Partial success: outcome == 'partialClassification' — core fields valid, timeEstimate malformed
47
// Pipeline failures: other non-empty outcomes (timeout, requestFailed, noToolCall, parseError, invalidClassification, error)
48
// Low confidence: outcome == '' AND confidence < 0.5
49
const CATEGORIZATION_OUTCOMES = {
50
	SUCCESS: '',
51
	TIMEOUT: 'timeout',
52
	REQUEST_FAILED: 'requestFailed',
53
	NO_TOOL_CALL: 'noToolCall',
54
	PARSE_ERROR: 'parseError',
55
	INVALID_CLASSIFICATION: 'invalidClassification',
56
	PARTIAL_CLASSIFICATION: 'partialClassification',
57
	ERROR: 'error',
58
} as const;
59

60
// ISO 8601 duration regex: PT followed by at least one of hours (H), minutes (M), seconds (S)
61
const ISO_8601_DURATION_REGEX = /^PT(?!$)(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?$/;
62

63
function isValidIsoDuration(duration: string): boolean {
64
	return ISO_8601_DURATION_REGEX.test(duration);
65
}
66

67
/**
68
 * Returns true when the partial classification has fully valid ISO 8601 time estimates.
69
 */
70
function hasValidTimeEstimates(partial: PromptClassification): boolean {
71
	return partial.timeEstimate.bestCase !== '' && partial.timeEstimate.realistic !== '';
72
}
73

74
/**
75
 * Extracts a partial classification from the LLM response, validating only the core
76
 * fields (intent, domain, scope, confidence, reasoning). Time estimates are extracted
77
 * on a best-effort basis — malformed durations are replaced with empty strings.
78
 *
79
 * Returns undefined if the core fields are missing or invalid.
80
 */
81
function extractPartialClassification(obj: unknown): PromptClassification | undefined {
82
	if (typeof obj !== 'object' || obj === null) {
83
		return undefined;
84
	}
85

86
	const c = obj as Record<string, unknown>;
87

88
	// Core fields must all be valid
89
	if (
90
		typeof c.intent !== 'string' || !isValidIntent(c.intent) ||
91
		typeof c.domain !== 'string' || !isValidDomain(c.domain) ||
92
		typeof c.scope !== 'string' || !isValidScope(c.scope) ||
93
		typeof c.confidence !== 'number' || c.confidence < 0 || c.confidence > 1 ||
94
		typeof c.reasoning !== 'string'
95
	) {
96
		return undefined;
97
	}
98

99
	// Time estimates are optional — extract valid durations, fall back to ''
100
	let bestCase = '';
101
	let realistic = '';
102
	if (typeof c.timeEstimate === 'object' && c.timeEstimate !== null) {
103
		const te = c.timeEstimate as Record<string, unknown>;
104
		if (typeof te.bestCase === 'string' && isValidIsoDuration(te.bestCase)) {
105
			bestCase = te.bestCase;
106
		}
107
		if (typeof te.realistic === 'string' && isValidIsoDuration(te.realistic)) {
108
			realistic = te.realistic;
109
		}
110
	}
111

112
	return {
113
		intent: c.intent,
114
		domain: c.domain,
115
		scope: c.scope,
116
		confidence: c.confidence,
117
		reasoning: c.reasoning,
118
		timeEstimate: { bestCase, realistic },
119
	};
120
}
121

122
export class PromptCategorizerService implements IPromptCategorizerService {
123
	declare readonly _serviceBrand: undefined;
124

125
	constructor(
126
		@ILogService private readonly logService: ILogService,
127
		@IEndpointProvider private readonly endpointProvider: IEndpointProvider,
128
		@IInstantiationService private readonly instantiationService: IInstantiationService,
129
		@ITelemetryService private readonly telemetryService: ITelemetryService,
130
		@IExperimentationService private readonly experimentationService: IExperimentationService,
131
		@ITabsAndEditorsService private readonly tabsAndEditorsService: ITabsAndEditorsService,
132
		@ICopilotTokenStore private readonly copilotTokenStore: ICopilotTokenStore,
133
		@IRequestLogger private readonly requestLogger: IRequestLogger,
134
	) { }
135

136
	categorizePrompt(request: vscode.ChatRequest, context: vscode.ChatContext, telemetryMessageId: string): void {
137
		// Always enable for internal users; external users require experiment flag
138
		const isInternal = this.copilotTokenStore.copilotToken?.isInternal === true;
139
		if (!isInternal && !this.experimentationService.getTreatmentVariable<boolean>(EXP_FLAG_PROMPT_CATEGORIZATION)) {
140
			return;
141
		}
142

143
		// Guard conditions - only run for first attempt, panel location, non-subagent
144
		// location2 === undefined means Panel (ChatRequestEditorData = editor, ChatRequestNotebookData = notebook)
145
		if (request.location2 !== undefined) {
146
			return;
147
		}
148
		if (request.subAgentName !== undefined) {
149
			return;
150
		}
151
		if (request.attempt !== 0) {
152
			return;
153
		}
154
		// Only categorize truly first messages in a session
155
		if (context.history.length > 0) {
156
			return;
157
		}
158

159
		// Fire and forget - don't await
160
		const parentChatSessionId = (request as { sessionId?: string }).sessionId;
161
		this._categorizePromptAsync(request, context, telemetryMessageId, parentChatSessionId).catch(err => {
162
			this.logService.error(`[PromptCategorizer] Error categorizing prompt: ${err instanceof Error ? err.message : String(err)}`);
163
		});
164
	}
165

166
	private async _categorizePromptAsync(request: vscode.ChatRequest, _context: vscode.ChatContext, telemetryMessageId: string, parentChatSessionId: string | undefined): Promise<void> {
167
		const startTime = Date.now();
168
		let outcome: typeof CATEGORIZATION_OUTCOMES[keyof typeof CATEGORIZATION_OUTCOMES] = CATEGORIZATION_OUTCOMES.ERROR;
169
		let errorDetail = '';
170
		let classification: PromptClassification | undefined;
171

172
		// Gather context signals (outside try block for telemetry access)
173
		const currentLanguage = this.tabsAndEditorsService.activeTextEditor?.document.languageId;
174

175
		// Use 10 second timeout - classification should be fast with copilot-fast model
176
		const CATEGORIZATION_TIMEOUT_MS = 10_000;
177
		const cts = new CancellationTokenSource();
178
		const timeoutHandle = setTimeout(() => cts.cancel(), CATEGORIZATION_TIMEOUT_MS);
179

180
		try {
181
			const endpoint = await this.endpointProvider.getChatEndpoint('copilot-fast');
182

183
			const { messages } = await renderPromptElement(
184
				this.instantiationService,
185
				endpoint,
186
				PromptCategorizationPrompt,
187
				{
188
					userRequest: request.prompt,
189
				}
190
			);
191

192
			// Collect tool calls from the response stream
193
			const toolCalls: ICopilotToolCall[] = [];
194

195
			const capturingToken = new CapturingToken(
196
				'categorization',
197
				undefined,
198
				undefined,
199
				undefined,
200
				undefined,
201
				parentChatSessionId,
202
				'categorization',
203
			);
204

205
			const response = await this.requestLogger.captureInvocation(capturingToken, () => endpoint.makeChatRequest2({
206
				debugName: 'promptCategorization',
207
				messages,
208
				finishedCb: async (_text, _index, delta) => {
209
					if (delta.copilotToolCalls) {
210
						toolCalls.push(...delta.copilotToolCalls);
211
					}
212
					return undefined;
213
				},
214
				location: ChatLocation.Panel,
215
				userInitiatedRequest: false,
216
				isConversationRequest: false,
217
				requestOptions: {
218
					tools: [{
219
						type: 'function',
220
						function: {
221
							name: CATEGORIZE_PROMPT_TOOL_NAME,
222
							description: 'Classify a user prompt across intent, domain, scope, and time estimate dimensions',
223
							parameters: CATEGORIZE_PROMPT_TOOL_SCHEMA
224
						}
225
					}],
226
					tool_choice: { type: 'function', function: { name: CATEGORIZE_PROMPT_TOOL_NAME } }
227
				}
228
			}, cts.token));
229

230
			if (cts.token.isCancellationRequested) {
231
				outcome = CATEGORIZATION_OUTCOMES.TIMEOUT;
232
				errorDetail = `Timed out after ${CATEGORIZATION_TIMEOUT_MS}ms`;
233
				this.logService.debug('[PromptCategorizer] Request cancelled due to timeout');
234
				// Don't return early - still send telemetry below to track timeouts
235
			} else if (response.type === ChatFetchResponseType.Success) {
236
				// Find the categorize_prompt tool call
237
				const categorizationCall = toolCalls.find(tc => tc.name === CATEGORIZE_PROMPT_TOOL_NAME);
238

239
				if (categorizationCall) {
240
					try {
241
						const parsed = JSON.parse(categorizationCall.arguments);
242
						const partial = extractPartialClassification(parsed);
243
						if (partial && hasValidTimeEstimates(partial)) {
244
							classification = partial;
245
							outcome = CATEGORIZATION_OUTCOMES.SUCCESS;
246
						} else if (partial) {
247
							// Core fields valid but timeEstimate malformed — recover partial
248
							classification = partial;
249
							outcome = CATEGORIZATION_OUTCOMES.PARTIAL_CLASSIFICATION;
250
							errorDetail = `Recovered core fields; invalid timeEstimate (arguments length: ${categorizationCall.arguments.length})`;
251
							this.logService.debug(`[PromptCategorizer] Partial classification recovered; ${errorDetail}`);
252
						} else {
253
							outcome = CATEGORIZATION_OUTCOMES.INVALID_CLASSIFICATION;
254
							errorDetail = `Invalid classification structure (arguments length: ${categorizationCall.arguments.length})`;
255
							this.logService.warn(`[PromptCategorizer] Invalid classification structure; ${errorDetail}`);
256
						}
257
					} catch (parseError) {
258
						outcome = CATEGORIZATION_OUTCOMES.PARSE_ERROR;
259
						const parseMsg = parseError instanceof Error ? parseError.message : String(parseError);
260
						errorDetail = `${parseMsg} (arguments length: ${categorizationCall.arguments.length}, timedOut: ${cts.token.isCancellationRequested})`;
261
						this.logService.warn(`[PromptCategorizer] Failed to parse tool arguments: ${errorDetail}`);
262
					}
263
				} else {
264
					outcome = CATEGORIZATION_OUTCOMES.NO_TOOL_CALL;
265
					errorDetail = `${toolCalls.length} tool calls returned, none matched ${CATEGORIZE_PROMPT_TOOL_NAME}`;
266
					this.logService.warn('[PromptCategorizer] No categorization tool call found in response');
267
				}
268
			} else {
269
				outcome = CATEGORIZATION_OUTCOMES.REQUEST_FAILED;
270
				errorDetail = `Response type: ${response.type}`;
271
				this.logService.warn(`[PromptCategorizer] Request failed with type: ${response.type}`);
272
			}
273

274
			// Release accumulated tool call data that may be retained via finishedCb closure
275
			toolCalls.length = 0;
276
		} catch (err) {
277
			if (isCancellationError(err)) {
278
				outcome = CATEGORIZATION_OUTCOMES.TIMEOUT;
279
				errorDetail = `Request cancelled after ${Date.now() - startTime}ms`;
280
			} else {
281
				errorDetail = err instanceof Error ? err.message : String(err);
282
			}
283
			this.logService.error(`[PromptCategorizer] Error during categorization: ${errorDetail}`);
284
		} finally {
285
			clearTimeout(timeoutHandle);
286
			cts.dispose();
287
		}
288

289
		const latencyMs = Date.now() - startTime;
290

291
		// Truncate errorDetail to prevent telemetry backend limits
292
		const MAX_ERROR_DETAIL_LENGTH = 500;
293
		const truncatedErrorDetail = errorDetail.length > MAX_ERROR_DETAIL_LENGTH
294
			? errorDetail.slice(0, MAX_ERROR_DETAIL_LENGTH)
295
			: errorDetail;
296

297
		// Send telemetry
298
		/* __GDPR__
299
			"promptCategorization" : {
300
				"owner": "digitarald",
301
				"comment": "Classifies agent requests for understanding user intent and response quality",
302
				"taxonomyVersion": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The taxonomy version used for classification (e.g. v2). Used to segment data when taxonomy keys change." },
303
				"sessionId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat session identifier" },
304
				"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The extension-generated request identifier, matches panel.request requestId" },
305
				"vscodeRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The VS Code chat request id, for joining with VS Code telemetry events" },
306
				"modeName": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat mode name being used" },
307
				"currentLanguage": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The language ID of the active editor" },
308
				"outcome": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Classification outcome: empty string for success, partialClassification for recovered core fields, or error kind (timeout, requestFailed, noToolCall, parseError, invalidClassification, error)" },
309
				"intent": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The classified intent (populated on success or partialClassification, empty string on failure)" },
310
				"domain": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The classified domain (populated on success or partialClassification, empty string on failure)" },
311
				"timeEstimateBestCase": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "ISO 8601 duration for best case time estimate" },
312
				"timeEstimateRealistic": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "ISO 8601 duration for realistic time estimate" },
313
				"scope": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The classified scope (populated on success or partialClassification, empty string on failure)" },
314
				"promptLength": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Length of the user prompt in characters" },
315
				"numReferences": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Number of context references attached to the request" },
316
				"numToolReferences": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Number of tool references in the request" },
317
				"confidence": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Confidence score of the classification (0.0 to 1.0)" },
318
				"latencyMs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Time in milliseconds to complete the classification" }
319
			}
320
		*/
321
		this.telemetryService.sendMSFTTelemetryEvent(
322
			'promptCategorization',
323
			{
324
				taxonomyVersion: 'v2',
325
				sessionId: request.sessionId ?? '',
326
				requestId: telemetryMessageId,
327
				vscodeRequestId: request.id ?? '',
328
				modeName: request.modeInstructions2?.isBuiltin ? request.modeInstructions2?.name.toLowerCase() : 'custom',
329
				currentLanguage: currentLanguage ?? '',
330
				outcome,
331
				intent: classification?.intent ?? '',
332
				domain: classification?.domain ?? '',
333
				timeEstimateBestCase: classification?.timeEstimate?.bestCase ?? '',
334
				timeEstimateRealistic: classification?.timeEstimate?.realistic ?? '',
335
				scope: classification?.scope ?? '',
336
			},
337
			{
338
				promptLength: request.prompt.length,
339
				numReferences: request.references?.length ?? 0,
340
				numToolReferences: request.toolReferences?.length ?? 0,
341
				confidence: classification?.confidence ?? 0,
342
				latencyMs,
343
			}
344
		);
345

346
		// Send internal telemetry with full metrics including PAI data (reasoning + prompt)
347
		// Truncate prompt to 8192 chars to avoid telemetry backend limits; promptLength measurement preserves original size
348
		const MAX_TELEMETRY_PROMPT_LENGTH = 8192;
349
		const truncatedPrompt = request.prompt.length > MAX_TELEMETRY_PROMPT_LENGTH
350
			? request.prompt.slice(0, MAX_TELEMETRY_PROMPT_LENGTH)
351
			: request.prompt;
352

353
		this.telemetryService.sendInternalMSFTTelemetryEvent(
354
			'promptCategorization',
355
			{
356
				taxonomyVersion: 'v2',
357
				sessionId: request.sessionId ?? '',
358
				requestId: telemetryMessageId,
359
				vscodeRequestId: request.id ?? '',
360
				modeName: request.modeInstructions2?.isBuiltin ? request.modeInstructions2?.name.toLowerCase() : 'custom',
361
				currentLanguage: currentLanguage ?? '',
362
				outcome,
363
				errorDetail: truncatedErrorDetail,
364
				intent: classification?.intent ?? '',
365
				domain: classification?.domain ?? '',
366
				timeEstimateBestCase: classification?.timeEstimate?.bestCase ?? '',
367
				timeEstimateRealistic: classification?.timeEstimate?.realistic ?? '',
368
				scope: classification?.scope ?? '',
369
				reasoning: classification?.reasoning ?? '',
370
				prompt: truncatedPrompt,
371
			},
372
			{
373
				promptLength: request.prompt.length,
374
				numReferences: request.references?.length ?? 0,
375
				numToolReferences: request.toolReferences?.length ?? 0,
376
				confidence: classification?.confidence ?? 0,
377
				latencyMs,
378
			}
379
		);
380

381
		this.logService.debug(`[PromptCategorizer] Classification complete: outcome=${outcome || 'success'}, latencyMs=${latencyMs}, intent=${classification?.intent}, domain=${classification?.domain}, scope=${classification?.scope}`);
382
	}
383
}
384

385
Product

Resources

Company