Path: blob/main/extensions/copilot/src/platform/endpoint/node/routerDecisionFetcher.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { RequestType } from '@vscode/copilot-api';6import { Codicon } from '../../../util/vs/base/common/codicons';7import { IAuthenticationService } from '../../authentication/common/authentication';8import { ILogService } from '../../log/common/logService';9import { Response } from '../../networking/common/fetcherService';10import { IRequestLogger, LoggedRequestKind } from '../../requestLogger/common/requestLogger';11import { ITelemetryService } from '../../telemetry/common/telemetry';12import { ICAPIClientService } from '../common/capiClient';1314export interface RouterDecisionResponse {15predicted_label: 'needs_reasoning' | 'no_reasoning' | 'fallback';16confidence: number;17latency_ms: number;18candidate_models: string[];19scores: {20needs_reasoning: number;21no_reasoning: number;22};23sticky_override?: boolean;24routing_method?: string;25fallback?: boolean;26fallback_reason?: string;27hydra_scores?: Record<string, number>;28chosen_model?: string;29chosen_shortfall?: number;30}3132export interface RoutingContextSignals {33turn_number?: number;34session_id?: string;35previous_model?: string;36reference_count?: number;37prompt_char_count?: number;38}3940/**41* Thrown when the router API returns a non-OK HTTP response.42* Carries the parsed `errorCode` from the response body (e.g. `no_vision_models`)43* so callers can classify the failure without string-matching the message.44*/45export class RouterDecisionError extends Error {46override readonly name = 'RouterDecisionError';47constructor(message: string, public readonly errorCode?: string) {48super(message);49}50}5152/**53* Fetches routing decisions from a classification API to determine which model should handle a query.54*55* This class sends queries along with available models to a router API endpoint, which uses reasoning56* classification to select the most appropriate model based on the query's requirements.57*/58export class RouterDecisionFetcher {59constructor(60private readonly _capiClientService: ICAPIClientService,61private readonly _authService: IAuthenticationService,62private readonly _logService: ILogService,63private readonly _telemetryService: ITelemetryService,64private readonly _requestLogger: IRequestLogger,65) {66}6768async getRouterDecision(query: string, autoModeToken: string, availableModels: string[], stickyThreshold?: number, contextSignals?: RoutingContextSignals, conversationId?: string, vscodeRequestId?: string, routingMethod?: string, hasImage?: boolean): Promise<RouterDecisionResponse> {69const startTime = Date.now();70const requestBody: Record<string, unknown> = { prompt: query, available_models: availableModels, ...contextSignals };71if (stickyThreshold !== undefined) {72requestBody.sticky_threshold = stickyThreshold;73}74if (routingMethod) {75requestBody.routing_method = routingMethod;76}77if (hasImage) {78requestBody.has_image = true;79}80const copilotToken = (await this._authService.getCopilotToken()).token;81const abortController = new AbortController();82const timeout = setTimeout(() => abortController.abort(), 1000);83let response: Response;84try {85response = await this._capiClientService.makeRequest<Response>({86method: 'POST',87headers: {88'Authorization': `Bearer ${copilotToken}`,89'Copilot-Session-Token': autoModeToken,90},91body: JSON.stringify(requestBody),92signal: abortController.signal,93}, { type: RequestType.ModelRouter });94} finally {95clearTimeout(timeout);96}9798if (!response.ok) {99const errorText = await response.text().catch(() => '');100let errorCode: string | undefined;101try {102const parsed = JSON.parse(errorText);103if (typeof parsed === 'object' && parsed !== null && 'error' in parsed && typeof parsed.error === 'string') {104errorCode = parsed.error;105}106} catch { /* not JSON */ }107throw new RouterDecisionError(`Router decision request failed with status ${response.status}: ${response.statusText}`, errorCode);108}109110const text = await response.text();111const result: RouterDecisionResponse = JSON.parse(text);112const e2eLatencyMs = Date.now() - startTime;113this._logService.trace(`[RouterDecisionFetcher] Prediction: ${result.predicted_label}, (confidence: ${(result.confidence * 100).toFixed(1)}%, scores: needs_reasoning=${(result.scores.needs_reasoning * 100).toFixed(1)}%, no_reasoning=${(result.scores.no_reasoning * 100).toFixed(1)}%) (latency_ms: ${result.latency_ms}, e2e_latency_ms: ${e2eLatencyMs}, candidate models: ${result.candidate_models.join(', ')}, sticky_override: ${result.sticky_override ?? false}, routing_method: ${result.routing_method ?? 'n/a'}, fallback: ${result.fallback ?? false})`);114115this._requestLogger.addEntry({116type: LoggedRequestKind.MarkdownContentRequest,117debugName: `Auto Mode Router`,118startTimeMs: startTime,119icon: Codicon.lightbulbSparkle,120markdownContent: [121`# Auto Mode Router Decision`,122`## Result`,123`- **Predicted Label**: ${result.predicted_label}`,124`- **Confidence**: ${(result.confidence * 100).toFixed(1)}%`,125`- **Sticky Override**: ${result.sticky_override ?? false}`,126`## Scores`,127`- **Needs Reasoning**: ${(result.scores.needs_reasoning * 100).toFixed(1)}%`,128`- **No Reasoning**: ${(result.scores.no_reasoning * 100).toFixed(1)}%`,129`## Latency`,130`- **Router Latency**: ${result.latency_ms}ms`,131`- **E2E Latency**: ${e2eLatencyMs}ms`,132`## Candidate Models`,133...result.candidate_models.map(m => `- ${m}`),134`## Query`,135query,136].join('\n'),137});138139/* __GDPR__140"automode.routerDecision" : {141"owner": "lramos15",142"comment": "Reports the routing decision made by the auto mode router API",143"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The conversation ID in which the routing decision was made." },144"vscodeRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The VS Code chat request id in which the routing decision was made." },145"predictedLabel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The predicted classification label (needs_reasoning, no_reasoning, or fallback)" },146"routingMethod": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The routing method used for this request (empty=server default, binary, hydra). Identifies the A/B/C experiment path." },147"fallback": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the router signaled a fallback to default automod selection." },148"fallbackReason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The reason provided by the server when fallback is true." },149"candidateModel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The top candidate model recommended by the router before any sticky-provider or vision overrides are applied." },150"confidence": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The confidence score of the routing decision" },151"latencyMs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "The latency of the router API call in milliseconds" },152"e2eLatencyMs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "The end-to-end latency of the router request in milliseconds, including network overhead" },153"stickyOverride": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether the router applied a sticky override (1) or not (0)" }154}155*/156this._telemetryService.sendMSFTTelemetryEvent('automode.routerDecision',157{158conversationId: conversationId ?? '',159vscodeRequestId: vscodeRequestId ?? '',160predictedLabel: result.predicted_label,161routingMethod: result.routing_method ?? '',162fallback: String(result.fallback ?? false),163fallbackReason: result.fallback_reason ?? '',164candidateModel: result.candidate_models?.[0] ?? '',165},166{167confidence: result.confidence,168latencyMs: result.latency_ms,169e2eLatencyMs: e2eLatencyMs,170stickyOverride: result.sticky_override ? 1 : 0,171}172);173174this._telemetryService.sendEnhancedGHTelemetryEvent('automode.routerDecisionRestricted',175{176conversationId: conversationId ?? '',177vscodeRequestId: vscodeRequestId ?? '',178predictedLabel: result.predicted_label,179routingMethod: result.routing_method ?? '',180fallback: String(result.fallback ?? false),181fallbackReason: result.fallback_reason ?? '',182candidateModel: result.candidate_models?.[0] ?? '',183chosenModel: result.chosen_model ?? '',184candidateModels: JSON.stringify(result.candidate_models ?? []),185availableModels: JSON.stringify(availableModels),186stickyOverrideStr: String(result.sticky_override ?? false),187hydraScores: result.hydra_scores ? JSON.stringify(result.hydra_scores) : 'null',188binaryScores: JSON.stringify(result.scores),189},190{191confidence: result.confidence,192latencyMs: result.latency_ms,193e2eLatencyMs: e2eLatencyMs,194stickyOverride: result.sticky_override ? 1 : 0,195chosenShortfall: result.chosen_shortfall,196scoreNeedsReasoning: result.scores.needs_reasoning,197scoreNoReasoning: result.scores.no_reasoning,198}199);200201return result;202}203}204205206