Path: blob/main/extensions/copilot/src/extension/prompt/node/promptCategorizer.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import type * as vscode from 'vscode';6import { ICopilotTokenStore } from '../../../platform/authentication/common/copilotTokenStore';7import { ChatFetchResponseType, ChatLocation } from '../../../platform/chat/common/commonTypes';8import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';9import { ILogService } from '../../../platform/log/common/logService';10import { ICopilotToolCall } from '../../../platform/networking/common/fetch';11import { CapturingToken } from '../../../platform/requestLogger/common/capturingToken';12import { IRequestLogger } from '../../../platform/requestLogger/common/requestLogger';13import { ITabsAndEditorsService } from '../../../platform/tabs/common/tabsAndEditorsService';14import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';15import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';16import { createServiceIdentifier } from '../../../util/common/services';17import { CancellationTokenSource } from '../../../util/vs/base/common/cancellation';18import { isCancellationError } from '../../../util/vs/base/common/errors';19import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';20import { renderPromptElement } from '../../prompts/node/base/promptRenderer';21import { PromptCategorizationPrompt } from '../../prompts/node/panel/promptCategorization';22import { CATEGORIZE_PROMPT_TOOL_NAME, CATEGORIZE_PROMPT_TOOL_SCHEMA, isValidDomain, isValidIntent, isValidScope, PromptClassification } from '../common/promptCategorizationTaxonomy';2324/** Experiment flag to enable prompt categorization */25const EXP_FLAG_PROMPT_CATEGORIZATION = 'copilotchat.promptCategorization';2627export const IPromptCategorizerService = createServiceIdentifier<IPromptCategorizerService>('IPromptCategorizerService');2829export interface IPromptCategorizerService {30readonly _serviceBrand: undefined;3132/**33* Categorizes the first user prompt in a chat session.34* This runs as a fire-and-forget operation and sends results to telemetry.35* Only runs for panel location, first attempt, non-subagent requests.36* Requires telemetry to be enabled and experiment flag to be set.37*38* @param telemetryMessageId The extension-generated request ID (shared with panel.request telemetry)39*/40categorizePrompt(request: vscode.ChatRequest, context: vscode.ChatContext, telemetryMessageId: string): void;41}4243// Categorization outcome values for telemetry44// Success: outcome == '' — full classification with valid timeEstimates45// Partial success: outcome == 'partialClassification' — core fields valid, timeEstimate malformed46// Pipeline failures: other non-empty outcomes (timeout, requestFailed, noToolCall, parseError, invalidClassification, error)47// Low confidence: outcome == '' AND confidence < 0.548const CATEGORIZATION_OUTCOMES = {49SUCCESS: '',50TIMEOUT: 'timeout',51REQUEST_FAILED: 'requestFailed',52NO_TOOL_CALL: 'noToolCall',53PARSE_ERROR: 'parseError',54INVALID_CLASSIFICATION: 'invalidClassification',55PARTIAL_CLASSIFICATION: 'partialClassification',56ERROR: 'error',57} as const;5859// ISO 8601 duration regex: PT followed by at least one of hours (H), minutes (M), seconds (S)60const ISO_8601_DURATION_REGEX = /^PT(?!$)(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?$/;6162function isValidIsoDuration(duration: string): boolean {63return ISO_8601_DURATION_REGEX.test(duration);64}6566/**67* Returns true when the partial classification has fully valid ISO 8601 time estimates.68*/69function hasValidTimeEstimates(partial: PromptClassification): boolean {70return partial.timeEstimate.bestCase !== '' && partial.timeEstimate.realistic !== '';71}7273/**74* Extracts a partial classification from the LLM response, validating only the core75* fields (intent, domain, scope, confidence, reasoning). Time estimates are extracted76* on a best-effort basis — malformed durations are replaced with empty strings.77*78* Returns undefined if the core fields are missing or invalid.79*/80function extractPartialClassification(obj: unknown): PromptClassification | undefined {81if (typeof obj !== 'object' || obj === null) {82return undefined;83}8485const c = obj as Record<string, unknown>;8687// Core fields must all be valid88if (89typeof c.intent !== 'string' || !isValidIntent(c.intent) ||90typeof c.domain !== 'string' || !isValidDomain(c.domain) ||91typeof c.scope !== 'string' || !isValidScope(c.scope) ||92typeof c.confidence !== 'number' || c.confidence < 0 || c.confidence > 1 ||93typeof c.reasoning !== 'string'94) {95return undefined;96}9798// Time estimates are optional — extract valid durations, fall back to ''99let bestCase = '';100let realistic = '';101if (typeof c.timeEstimate === 'object' && c.timeEstimate !== null) {102const te = c.timeEstimate as Record<string, unknown>;103if (typeof te.bestCase === 'string' && isValidIsoDuration(te.bestCase)) {104bestCase = te.bestCase;105}106if (typeof te.realistic === 'string' && isValidIsoDuration(te.realistic)) {107realistic = te.realistic;108}109}110111return {112intent: c.intent,113domain: c.domain,114scope: c.scope,115confidence: c.confidence,116reasoning: c.reasoning,117timeEstimate: { bestCase, realistic },118};119}120121export class PromptCategorizerService implements IPromptCategorizerService {122declare readonly _serviceBrand: undefined;123124constructor(125@ILogService private readonly logService: ILogService,126@IEndpointProvider private readonly endpointProvider: IEndpointProvider,127@IInstantiationService private readonly instantiationService: IInstantiationService,128@ITelemetryService private readonly telemetryService: ITelemetryService,129@IExperimentationService private readonly experimentationService: IExperimentationService,130@ITabsAndEditorsService private readonly tabsAndEditorsService: ITabsAndEditorsService,131@ICopilotTokenStore private readonly copilotTokenStore: ICopilotTokenStore,132@IRequestLogger private readonly requestLogger: IRequestLogger,133) { }134135categorizePrompt(request: vscode.ChatRequest, context: vscode.ChatContext, telemetryMessageId: string): void {136// Always enable for internal users; external users require experiment flag137const isInternal = this.copilotTokenStore.copilotToken?.isInternal === true;138if (!isInternal && !this.experimentationService.getTreatmentVariable<boolean>(EXP_FLAG_PROMPT_CATEGORIZATION)) {139return;140}141142// Guard conditions - only run for first attempt, panel location, non-subagent143// location2 === undefined means Panel (ChatRequestEditorData = editor, ChatRequestNotebookData = notebook)144if (request.location2 !== undefined) {145return;146}147if (request.subAgentName !== undefined) {148return;149}150if (request.attempt !== 0) {151return;152}153// Only categorize truly first messages in a session154if (context.history.length > 0) {155return;156}157158// Fire and forget - don't await159const parentChatSessionId = (request as { sessionId?: string }).sessionId;160this._categorizePromptAsync(request, context, telemetryMessageId, parentChatSessionId).catch(err => {161this.logService.error(`[PromptCategorizer] Error categorizing prompt: ${err instanceof Error ? err.message : String(err)}`);162});163}164165private async _categorizePromptAsync(request: vscode.ChatRequest, _context: vscode.ChatContext, telemetryMessageId: string, parentChatSessionId: string | undefined): Promise<void> {166const startTime = Date.now();167let outcome: typeof CATEGORIZATION_OUTCOMES[keyof typeof CATEGORIZATION_OUTCOMES] = CATEGORIZATION_OUTCOMES.ERROR;168let errorDetail = '';169let classification: PromptClassification | undefined;170171// Gather context signals (outside try block for telemetry access)172const currentLanguage = this.tabsAndEditorsService.activeTextEditor?.document.languageId;173174// Use 10 second timeout - classification should be fast with copilot-fast model175const CATEGORIZATION_TIMEOUT_MS = 10_000;176const cts = new CancellationTokenSource();177const timeoutHandle = setTimeout(() => cts.cancel(), CATEGORIZATION_TIMEOUT_MS);178179try {180const endpoint = await this.endpointProvider.getChatEndpoint('copilot-fast');181182const { messages } = await renderPromptElement(183this.instantiationService,184endpoint,185PromptCategorizationPrompt,186{187userRequest: request.prompt,188}189);190191// Collect tool calls from the response stream192const toolCalls: ICopilotToolCall[] = [];193194const capturingToken = new CapturingToken(195'categorization',196undefined,197undefined,198undefined,199undefined,200parentChatSessionId,201'categorization',202);203204const response = await this.requestLogger.captureInvocation(capturingToken, () => endpoint.makeChatRequest2({205debugName: 'promptCategorization',206messages,207finishedCb: async (_text, _index, delta) => {208if (delta.copilotToolCalls) {209toolCalls.push(...delta.copilotToolCalls);210}211return undefined;212},213location: ChatLocation.Panel,214userInitiatedRequest: false,215isConversationRequest: false,216requestOptions: {217tools: [{218type: 'function',219function: {220name: CATEGORIZE_PROMPT_TOOL_NAME,221description: 'Classify a user prompt across intent, domain, scope, and time estimate dimensions',222parameters: CATEGORIZE_PROMPT_TOOL_SCHEMA223}224}],225tool_choice: { type: 'function', function: { name: CATEGORIZE_PROMPT_TOOL_NAME } }226}227}, cts.token));228229if (cts.token.isCancellationRequested) {230outcome = CATEGORIZATION_OUTCOMES.TIMEOUT;231errorDetail = `Timed out after ${CATEGORIZATION_TIMEOUT_MS}ms`;232this.logService.debug('[PromptCategorizer] Request cancelled due to timeout');233// Don't return early - still send telemetry below to track timeouts234} else if (response.type === ChatFetchResponseType.Success) {235// Find the categorize_prompt tool call236const categorizationCall = toolCalls.find(tc => tc.name === CATEGORIZE_PROMPT_TOOL_NAME);237238if (categorizationCall) {239try {240const parsed = JSON.parse(categorizationCall.arguments);241const partial = extractPartialClassification(parsed);242if (partial && hasValidTimeEstimates(partial)) {243classification = partial;244outcome = CATEGORIZATION_OUTCOMES.SUCCESS;245} else if (partial) {246// Core fields valid but timeEstimate malformed — recover partial247classification = partial;248outcome = CATEGORIZATION_OUTCOMES.PARTIAL_CLASSIFICATION;249errorDetail = `Recovered core fields; invalid timeEstimate (arguments length: ${categorizationCall.arguments.length})`;250this.logService.debug(`[PromptCategorizer] Partial classification recovered; ${errorDetail}`);251} else {252outcome = CATEGORIZATION_OUTCOMES.INVALID_CLASSIFICATION;253errorDetail = `Invalid classification structure (arguments length: ${categorizationCall.arguments.length})`;254this.logService.warn(`[PromptCategorizer] Invalid classification structure; ${errorDetail}`);255}256} catch (parseError) {257outcome = CATEGORIZATION_OUTCOMES.PARSE_ERROR;258const parseMsg = parseError instanceof Error ? parseError.message : String(parseError);259errorDetail = `${parseMsg} (arguments length: ${categorizationCall.arguments.length}, timedOut: ${cts.token.isCancellationRequested})`;260this.logService.warn(`[PromptCategorizer] Failed to parse tool arguments: ${errorDetail}`);261}262} else {263outcome = CATEGORIZATION_OUTCOMES.NO_TOOL_CALL;264errorDetail = `${toolCalls.length} tool calls returned, none matched ${CATEGORIZE_PROMPT_TOOL_NAME}`;265this.logService.warn('[PromptCategorizer] No categorization tool call found in response');266}267} else {268outcome = CATEGORIZATION_OUTCOMES.REQUEST_FAILED;269errorDetail = `Response type: ${response.type}`;270this.logService.warn(`[PromptCategorizer] Request failed with type: ${response.type}`);271}272273// Release accumulated tool call data that may be retained via finishedCb closure274toolCalls.length = 0;275} catch (err) {276if (isCancellationError(err)) {277outcome = CATEGORIZATION_OUTCOMES.TIMEOUT;278errorDetail = `Request cancelled after ${Date.now() - startTime}ms`;279} else {280errorDetail = err instanceof Error ? err.message : String(err);281}282this.logService.error(`[PromptCategorizer] Error during categorization: ${errorDetail}`);283} finally {284clearTimeout(timeoutHandle);285cts.dispose();286}287288const latencyMs = Date.now() - startTime;289290// Truncate errorDetail to prevent telemetry backend limits291const MAX_ERROR_DETAIL_LENGTH = 500;292const truncatedErrorDetail = errorDetail.length > MAX_ERROR_DETAIL_LENGTH293? errorDetail.slice(0, MAX_ERROR_DETAIL_LENGTH)294: errorDetail;295296// Send telemetry297/* __GDPR__298"promptCategorization" : {299"owner": "digitarald",300"comment": "Classifies agent requests for understanding user intent and response quality",301"taxonomyVersion": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The taxonomy version used for classification (e.g. v2). Used to segment data when taxonomy keys change." },302"sessionId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat session identifier" },303"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The extension-generated request identifier, matches panel.request requestId" },304"vscodeRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The VS Code chat request id, for joining with VS Code telemetry events" },305"modeName": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat mode name being used" },306"currentLanguage": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The language ID of the active editor" },307"outcome": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Classification outcome: empty string for success, partialClassification for recovered core fields, or error kind (timeout, requestFailed, noToolCall, parseError, invalidClassification, error)" },308"intent": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The classified intent (populated on success or partialClassification, empty string on failure)" },309"domain": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The classified domain (populated on success or partialClassification, empty string on failure)" },310"timeEstimateBestCase": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "ISO 8601 duration for best case time estimate" },311"timeEstimateRealistic": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "ISO 8601 duration for realistic time estimate" },312"scope": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The classified scope (populated on success or partialClassification, empty string on failure)" },313"promptLength": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Length of the user prompt in characters" },314"numReferences": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Number of context references attached to the request" },315"numToolReferences": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Number of tool references in the request" },316"confidence": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Confidence score of the classification (0.0 to 1.0)" },317"latencyMs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Time in milliseconds to complete the classification" }318}319*/320this.telemetryService.sendMSFTTelemetryEvent(321'promptCategorization',322{323taxonomyVersion: 'v2',324sessionId: request.sessionId ?? '',325requestId: telemetryMessageId,326vscodeRequestId: request.id ?? '',327modeName: request.modeInstructions2?.isBuiltin ? request.modeInstructions2?.name.toLowerCase() : 'custom',328currentLanguage: currentLanguage ?? '',329outcome,330intent: classification?.intent ?? '',331domain: classification?.domain ?? '',332timeEstimateBestCase: classification?.timeEstimate?.bestCase ?? '',333timeEstimateRealistic: classification?.timeEstimate?.realistic ?? '',334scope: classification?.scope ?? '',335},336{337promptLength: request.prompt.length,338numReferences: request.references?.length ?? 0,339numToolReferences: request.toolReferences?.length ?? 0,340confidence: classification?.confidence ?? 0,341latencyMs,342}343);344345// Send internal telemetry with full metrics including PAI data (reasoning + prompt)346// Truncate prompt to 8192 chars to avoid telemetry backend limits; promptLength measurement preserves original size347const MAX_TELEMETRY_PROMPT_LENGTH = 8192;348const truncatedPrompt = request.prompt.length > MAX_TELEMETRY_PROMPT_LENGTH349? request.prompt.slice(0, MAX_TELEMETRY_PROMPT_LENGTH)350: request.prompt;351352this.telemetryService.sendInternalMSFTTelemetryEvent(353'promptCategorization',354{355taxonomyVersion: 'v2',356sessionId: request.sessionId ?? '',357requestId: telemetryMessageId,358vscodeRequestId: request.id ?? '',359modeName: request.modeInstructions2?.isBuiltin ? request.modeInstructions2?.name.toLowerCase() : 'custom',360currentLanguage: currentLanguage ?? '',361outcome,362errorDetail: truncatedErrorDetail,363intent: classification?.intent ?? '',364domain: classification?.domain ?? '',365timeEstimateBestCase: classification?.timeEstimate?.bestCase ?? '',366timeEstimateRealistic: classification?.timeEstimate?.realistic ?? '',367scope: classification?.scope ?? '',368reasoning: classification?.reasoning ?? '',369prompt: truncatedPrompt,370},371{372promptLength: request.prompt.length,373numReferences: request.references?.length ?? 0,374numToolReferences: request.toolReferences?.length ?? 0,375confidence: classification?.confidence ?? 0,376latencyMs,377}378);379380this.logService.debug(`[PromptCategorizer] Classification complete: outcome=${outcome || 'success'}, latencyMs=${latencyMs}, intent=${classification?.intent}, domain=${classification?.domain}, scope=${classification?.scope}`);381}382}383384385