Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/routerDecisionFetcher.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { RequestType } from '@vscode/copilot-api';
7
import { Codicon } from '../../../util/vs/base/common/codicons';
8
import { IAuthenticationService } from '../../authentication/common/authentication';
9
import { ILogService } from '../../log/common/logService';
10
import { Response } from '../../networking/common/fetcherService';
11
import { IRequestLogger, LoggedRequestKind } from '../../requestLogger/common/requestLogger';
12
import { ITelemetryService } from '../../telemetry/common/telemetry';
13
import { ICAPIClientService } from '../common/capiClient';
14
15
export interface RouterDecisionResponse {
16
predicted_label: 'needs_reasoning' | 'no_reasoning' | 'fallback';
17
confidence: number;
18
latency_ms: number;
19
candidate_models: string[];
20
scores: {
21
needs_reasoning: number;
22
no_reasoning: number;
23
};
24
sticky_override?: boolean;
25
routing_method?: string;
26
fallback?: boolean;
27
fallback_reason?: string;
28
hydra_scores?: Record<string, number>;
29
chosen_model?: string;
30
chosen_shortfall?: number;
31
}
32
33
export interface RoutingContextSignals {
34
turn_number?: number;
35
session_id?: string;
36
previous_model?: string;
37
reference_count?: number;
38
prompt_char_count?: number;
39
}
40
41
/**
42
* Thrown when the router API returns a non-OK HTTP response.
43
* Carries the parsed `errorCode` from the response body (e.g. `no_vision_models`)
44
* so callers can classify the failure without string-matching the message.
45
*/
46
export class RouterDecisionError extends Error {
47
override readonly name = 'RouterDecisionError';
48
constructor(message: string, public readonly errorCode?: string) {
49
super(message);
50
}
51
}
52
53
/**
54
* Fetches routing decisions from a classification API to determine which model should handle a query.
55
*
56
* This class sends queries along with available models to a router API endpoint, which uses reasoning
57
* classification to select the most appropriate model based on the query's requirements.
58
*/
59
export class RouterDecisionFetcher {
60
constructor(
61
private readonly _capiClientService: ICAPIClientService,
62
private readonly _authService: IAuthenticationService,
63
private readonly _logService: ILogService,
64
private readonly _telemetryService: ITelemetryService,
65
private readonly _requestLogger: IRequestLogger,
66
) {
67
}
68
69
async getRouterDecision(query: string, autoModeToken: string, availableModels: string[], stickyThreshold?: number, contextSignals?: RoutingContextSignals, conversationId?: string, vscodeRequestId?: string, routingMethod?: string, hasImage?: boolean): Promise<RouterDecisionResponse> {
70
const startTime = Date.now();
71
const requestBody: Record<string, unknown> = { prompt: query, available_models: availableModels, ...contextSignals };
72
if (stickyThreshold !== undefined) {
73
requestBody.sticky_threshold = stickyThreshold;
74
}
75
if (routingMethod) {
76
requestBody.routing_method = routingMethod;
77
}
78
if (hasImage) {
79
requestBody.has_image = true;
80
}
81
const copilotToken = (await this._authService.getCopilotToken()).token;
82
const abortController = new AbortController();
83
const timeout = setTimeout(() => abortController.abort(), 1000);
84
let response: Response;
85
try {
86
response = await this._capiClientService.makeRequest<Response>({
87
method: 'POST',
88
headers: {
89
'Authorization': `Bearer ${copilotToken}`,
90
'Copilot-Session-Token': autoModeToken,
91
},
92
body: JSON.stringify(requestBody),
93
signal: abortController.signal,
94
}, { type: RequestType.ModelRouter });
95
} finally {
96
clearTimeout(timeout);
97
}
98
99
if (!response.ok) {
100
const errorText = await response.text().catch(() => '');
101
let errorCode: string | undefined;
102
try {
103
const parsed = JSON.parse(errorText);
104
if (typeof parsed === 'object' && parsed !== null && 'error' in parsed && typeof parsed.error === 'string') {
105
errorCode = parsed.error;
106
}
107
} catch { /* not JSON */ }
108
throw new RouterDecisionError(`Router decision request failed with status ${response.status}: ${response.statusText}`, errorCode);
109
}
110
111
const text = await response.text();
112
const result: RouterDecisionResponse = JSON.parse(text);
113
const e2eLatencyMs = Date.now() - startTime;
114
this._logService.trace(`[RouterDecisionFetcher] Prediction: ${result.predicted_label}, (confidence: ${(result.confidence * 100).toFixed(1)}%, scores: needs_reasoning=${(result.scores.needs_reasoning * 100).toFixed(1)}%, no_reasoning=${(result.scores.no_reasoning * 100).toFixed(1)}%) (latency_ms: ${result.latency_ms}, e2e_latency_ms: ${e2eLatencyMs}, candidate models: ${result.candidate_models.join(', ')}, sticky_override: ${result.sticky_override ?? false}, routing_method: ${result.routing_method ?? 'n/a'}, fallback: ${result.fallback ?? false})`);
115
116
this._requestLogger.addEntry({
117
type: LoggedRequestKind.MarkdownContentRequest,
118
debugName: `Auto Mode Router`,
119
startTimeMs: startTime,
120
icon: Codicon.lightbulbSparkle,
121
markdownContent: [
122
`# Auto Mode Router Decision`,
123
`## Result`,
124
`- **Predicted Label**: ${result.predicted_label}`,
125
`- **Confidence**: ${(result.confidence * 100).toFixed(1)}%`,
126
`- **Sticky Override**: ${result.sticky_override ?? false}`,
127
`## Scores`,
128
`- **Needs Reasoning**: ${(result.scores.needs_reasoning * 100).toFixed(1)}%`,
129
`- **No Reasoning**: ${(result.scores.no_reasoning * 100).toFixed(1)}%`,
130
`## Latency`,
131
`- **Router Latency**: ${result.latency_ms}ms`,
132
`- **E2E Latency**: ${e2eLatencyMs}ms`,
133
`## Candidate Models`,
134
...result.candidate_models.map(m => `- ${m}`),
135
`## Query`,
136
query,
137
].join('\n'),
138
});
139
140
/* __GDPR__
141
"automode.routerDecision" : {
142
"owner": "lramos15",
143
"comment": "Reports the routing decision made by the auto mode router API",
144
"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The conversation ID in which the routing decision was made." },
145
"vscodeRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The VS Code chat request id in which the routing decision was made." },
146
"predictedLabel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The predicted classification label (needs_reasoning, no_reasoning, or fallback)" },
147
"routingMethod": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The routing method used for this request (empty=server default, binary, hydra). Identifies the A/B/C experiment path." },
148
"fallback": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the router signaled a fallback to default automod selection." },
149
"fallbackReason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The reason provided by the server when fallback is true." },
150
"candidateModel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The top candidate model recommended by the router before any sticky-provider or vision overrides are applied." },
151
"confidence": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The confidence score of the routing decision" },
152
"latencyMs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "The latency of the router API call in milliseconds" },
153
"e2eLatencyMs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "The end-to-end latency of the router request in milliseconds, including network overhead" },
154
"stickyOverride": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether the router applied a sticky override (1) or not (0)" }
155
}
156
*/
157
this._telemetryService.sendMSFTTelemetryEvent('automode.routerDecision',
158
{
159
conversationId: conversationId ?? '',
160
vscodeRequestId: vscodeRequestId ?? '',
161
predictedLabel: result.predicted_label,
162
routingMethod: result.routing_method ?? '',
163
fallback: String(result.fallback ?? false),
164
fallbackReason: result.fallback_reason ?? '',
165
candidateModel: result.candidate_models?.[0] ?? '',
166
},
167
{
168
confidence: result.confidence,
169
latencyMs: result.latency_ms,
170
e2eLatencyMs: e2eLatencyMs,
171
stickyOverride: result.sticky_override ? 1 : 0,
172
}
173
);
174
175
this._telemetryService.sendEnhancedGHTelemetryEvent('automode.routerDecisionRestricted',
176
{
177
conversationId: conversationId ?? '',
178
vscodeRequestId: vscodeRequestId ?? '',
179
predictedLabel: result.predicted_label,
180
routingMethod: result.routing_method ?? '',
181
fallback: String(result.fallback ?? false),
182
fallbackReason: result.fallback_reason ?? '',
183
candidateModel: result.candidate_models?.[0] ?? '',
184
chosenModel: result.chosen_model ?? '',
185
candidateModels: JSON.stringify(result.candidate_models ?? []),
186
availableModels: JSON.stringify(availableModels),
187
stickyOverrideStr: String(result.sticky_override ?? false),
188
hydraScores: result.hydra_scores ? JSON.stringify(result.hydra_scores) : 'null',
189
binaryScores: JSON.stringify(result.scores),
190
},
191
{
192
confidence: result.confidence,
193
latencyMs: result.latency_ms,
194
e2eLatencyMs: e2eLatencyMs,
195
stickyOverride: result.sticky_override ? 1 : 0,
196
chosenShortfall: result.chosen_shortfall,
197
scoreNeedsReasoning: result.scores.needs_reasoning,
198
scoreNoReasoning: result.scores.no_reasoning,
199
}
200
);
201
202
return result;
203
}
204
}
205
206