Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/automodeService.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { RequestType } from '@vscode/copilot-api';
7
import type { ChatRequest } from 'vscode';
8
import { FetchedValue } from '../../../shared-fetch-utils/common/fetchedValue';
9
import { createServiceIdentifier } from '../../../util/common/services';
10
import { Disposable, DisposableMap } from '../../../util/vs/base/common/lifecycle';
11
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
12
import { ChatLocation } from '../../../vscodeTypes';
13
import { IAuthenticationService } from '../../authentication/common/authentication';
14
import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';
15
import { IEnvService } from '../../env/common/envService';
16
import { ILogService } from '../../log/common/logService';
17
import { createCapiClientFetchedValue } from '../../networking/common/capiClientFetchedValue';
18
import { isAbortError } from '../../networking/common/fetcherService';
19
import { IChatEndpoint } from '../../networking/common/networking';
20
import { IRequestLogger } from '../../requestLogger/common/requestLogger';
21
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
22
import { ITelemetryService } from '../../telemetry/common/telemetry';
23
import { ICAPIClientService } from '../common/capiClient';
24
import { AutoChatEndpoint } from './autoChatEndpoint';
25
import { RouterDecisionError, RouterDecisionFetcher, RoutingContextSignals } from './routerDecisionFetcher';
26
27
interface AutoModeAPIResponse {
28
available_models: string[];
29
expires_at: number;
30
discounted_costs?: { [key: string]: number };
31
session_token: string;
32
}
33
34
interface AutoModelCacheEntry {
35
endpoint: AutoChatEndpoint;
36
tokenBank: AutoModeTokenBank;
37
lastSessionToken?: string;
38
lastRoutedPrompt?: string;
39
routerFallbackReason?: string;
40
turnCount: number;
41
needsReEval: boolean;
42
}
43
44
class AutoModeTokenBank extends Disposable {
45
private readonly _fetchedValue: FetchedValue<AutoModeAPIResponse>;
46
private _usedSinceLastFetch = false;
47
48
constructor(
49
public debugName: string,
50
location: ChatLocation,
51
capiClientService: ICAPIClientService,
52
authService: IAuthenticationService,
53
_logService: ILogService,
54
expService: IExperimentationService,
55
envService: IEnvService,
56
) {
57
super();
58
59
const expName = location === ChatLocation.Editor
60
? 'copilotchat.autoModelHint.editor'
61
: 'copilotchat.autoModelHint';
62
63
this._fetchedValue = this._register(createCapiClientFetchedValue<AutoModeAPIResponse>(capiClientService, envService, {
64
request: async () => {
65
const authToken = (await authService.getCopilotToken()).token;
66
const extValue = expService.getTreatmentVariable<string>(expName);
67
const model_hints = [extValue || 'auto'];
68
if (location === ChatLocation.Editor && model_hints[0] !== 'auto') {
69
model_hints.push('auto');
70
}
71
return {
72
headers: {
73
'Content-Type': 'application/json',
74
'Authorization': `Bearer ${authToken}`,
75
},
76
method: 'POST' as const,
77
json: { auto_mode: { model_hints } },
78
};
79
},
80
requestMetadata: { type: RequestType.AutoModels },
81
parseResponse: async (res) => {
82
if (res.status < 200 || res.status >= 300) {
83
const text = await res.text().catch(() => '');
84
throw new Error(`AutoMode token response status: ${res.status}${text ? `, body: ${text}` : ''}`);
85
}
86
const data = await res.json() as AutoModeAPIResponse;
87
this._usedSinceLastFetch = false;
88
return data;
89
},
90
isStale: (token) => {
91
if (!this._usedSinceLastFetch) {
92
return false;
93
}
94
return token.expires_at * 1000 - Date.now() < 5 * 60 * 1000;
95
},
96
keepCacheHot: true,
97
}));
98
}
99
100
async getToken(): Promise<AutoModeAPIResponse> {
101
this._usedSinceLastFetch = true;
102
return this._fetchedValue.resolve();
103
}
104
}
105
106
export const IAutomodeService = createServiceIdentifier<IAutomodeService>('IAutomodeService');
107
108
export interface IAutomodeService {
109
readonly _serviceBrand: undefined;
110
111
resolveAutoModeEndpoint(chatRequest: ChatRequest | undefined, knownEndpoints: IChatEndpoint[]): Promise<IChatEndpoint>;
112
113
/**
114
* Marks the router cache for this conversation as needing re-evaluation.
115
* The next call to {@link resolveAutoModeEndpoint} will re-run the router
116
* instead of returning the cached endpoint.
117
*/
118
invalidateRouterCache(chatRequest: ChatRequest): void;
119
}
120
121
export class AutomodeService extends Disposable implements IAutomodeService {
122
readonly _serviceBrand: undefined;
123
private readonly _autoModelCache: Map<string, AutoModelCacheEntry> = new Map();
124
private _reserveTokens: DisposableMap<ChatLocation, AutoModeTokenBank> = new DisposableMap();
125
private readonly _routerDecisionFetcher: RouterDecisionFetcher;
126
127
constructor(
128
@ICAPIClientService private readonly _capiClientService: ICAPIClientService,
129
@IAuthenticationService private readonly _authService: IAuthenticationService,
130
@ILogService private readonly _logService: ILogService,
131
@IInstantiationService private readonly _instantiationService: IInstantiationService,
132
@IExperimentationService private readonly _expService: IExperimentationService,
133
@IConfigurationService private readonly _configurationService: IConfigurationService,
134
@IEnvService private readonly _envService: IEnvService,
135
@ITelemetryService private readonly _telemetryService: ITelemetryService,
136
@IRequestLogger private readonly _requestLogger: IRequestLogger,
137
) {
138
super();
139
this._register(this._authService.onDidAuthenticationChange(() => {
140
for (const entry of this._autoModelCache.values()) {
141
entry.tokenBank.dispose();
142
}
143
this._autoModelCache.clear();
144
const keys = Array.from(this._reserveTokens.keys());
145
this._reserveTokens.clearAndDisposeAll();
146
for (const location of keys) {
147
this._reserveTokens.set(location, new AutoModeTokenBank('reserve', location, this._capiClientService, this._authService, this._logService, this._expService, this._envService));
148
}
149
}));
150
this._serviceBrand = undefined;
151
this._routerDecisionFetcher = new RouterDecisionFetcher(this._capiClientService, this._authService, this._logService, this._telemetryService, this._requestLogger);
152
}
153
154
override dispose(): void {
155
for (const entry of this._autoModelCache.values()) {
156
entry.tokenBank.dispose();
157
}
158
this._autoModelCache.clear();
159
this._reserveTokens.dispose();
160
super.dispose();
161
}
162
163
/**
164
* Resolve an auto mode endpoint
165
* Optionally uses a router model to select the best endpoint based on the prompt.
166
*/
167
invalidateRouterCache(chatRequest: ChatRequest): void {
168
const conversationId = chatRequest.sessionResource?.toString() ?? chatRequest.sessionId ?? 'unknown';
169
const entry = this._autoModelCache.get(conversationId);
170
if (entry) {
171
entry.needsReEval = true;
172
this._logService.trace(`[AutomodeService] Router cache invalidated for conversation ${conversationId}`);
173
}
174
}
175
176
async resolveAutoModeEndpoint(chatRequest: ChatRequest | undefined, knownEndpoints: IChatEndpoint[]): Promise<IChatEndpoint> {
177
if (!knownEndpoints.length) {
178
throw new Error('No auto mode endpoints provided.');
179
}
180
181
const conversationId = chatRequest?.sessionResource?.toString() ?? chatRequest?.sessionId ?? 'unknown';
182
const entry = this._autoModelCache.get(conversationId);
183
const tokenBank = this._acquireTokenBank(entry, chatRequest?.location, conversationId);
184
const token = await tokenBank.getToken();
185
186
// After the first turn, skip the router unless explicitly invalidated
187
// (e.g. after conversation compaction/summarization). Token refresh and
188
// default model selection still run so available-model changes are respected.
189
const skipRouter = entry !== undefined && entry.turnCount > 0 && !entry.needsReEval;
190
if (entry?.needsReEval) {
191
entry.needsReEval = false;
192
}
193
194
const routerResult = skipRouter
195
? { lastRoutedPrompt: chatRequest?.prompt?.trim() ?? entry?.lastRoutedPrompt }
196
: await this._tryRouterSelection(chatRequest, conversationId, entry, token, knownEndpoints);
197
let selectedModel = routerResult.selectedModel;
198
const lastRoutedPrompt = routerResult.lastRoutedPrompt;
199
const routerFallbackReason = routerResult.fallbackReason;
200
201
// Default model selection when router was skipped or failed
202
if (!selectedModel) {
203
if (routerFallbackReason) {
204
/* __GDPR__
205
"automode.routerFallback" : {
206
"owner": "lramos15",
207
"comment": "Reports when the auto mode router is skipped or fails and falls back to default model selection",
208
"reason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The reason the router was skipped or failed, e.g. emptyPrompt, emptyCandidateList, noMatchingEndpoint, routerError, routerTimeout, or a server error code" },
209
"hasImage": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether the request contained an attached image" }
210
}
211
*/
212
this._telemetryService.sendMSFTTelemetryEvent('automode.routerFallback', {
213
reason: routerFallbackReason,
214
hasImage: String(hasImage(chatRequest)),
215
});
216
}
217
selectedModel = this._selectDefaultModel(entry?.endpoint?.modelProvider, token.available_models, knownEndpoints);
218
}
219
220
selectedModel = this._applyVisionFallback(chatRequest, selectedModel, token.available_models, knownEndpoints);
221
222
// Emit the final model selection alongside the router's recommendation
223
// so analysts can detect overrides without fragile telemetry joins
224
if (!skipRouter && routerResult.candidateModel) {
225
/* __GDPR__
226
"automode.routerModelSelection" : {
227
"owner": "aashnagarg",
228
"comment": "Reports the router's recommended model vs the actual model used after all client-side overrides",
229
"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The conversation ID" },
230
"candidateModel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The router's top candidate model (candidate_models[0])" },
231
"actualModel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model actually selected after all client-side overrides" },
232
"overrideReason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Why the actual model differs from the candidate: none or clientOverride" }
233
}
234
*/
235
const candidateModel = routerResult.candidateModel;
236
const overrideReason = candidateModel === selectedModel.model ? 'none' : 'clientOverride';
237
this._telemetryService.sendMSFTTelemetryEvent('automode.routerModelSelection', {
238
conversationId: conversationId ?? '',
239
candidateModel,
240
actualModel: selectedModel.model,
241
overrideReason,
242
});
243
}
244
245
// Reuse the cached endpoint if the session token and model haven't changed
246
const autoEndpoint = (entry?.endpoint && entry.lastSessionToken === token.session_token && entry.endpoint.model === selectedModel.model)
247
? entry.endpoint
248
: this._instantiationService.createInstance(AutoChatEndpoint, selectedModel, token.session_token, token.discounted_costs?.[selectedModel.model] || 0, this._calculateDiscountRange(token.discounted_costs));
249
250
const isNewTurn = !entry || lastRoutedPrompt !== entry.lastRoutedPrompt;
251
this._autoModelCache.set(conversationId, {
252
endpoint: autoEndpoint,
253
tokenBank,
254
lastSessionToken: token.session_token,
255
lastRoutedPrompt,
256
routerFallbackReason,
257
turnCount: (entry?.turnCount ?? 0) + (isNewTurn ? 1 : 0),
258
needsReEval: false,
259
});
260
return autoEndpoint;
261
}
262
263
private _acquireTokenBank(entry: AutoModelCacheEntry | undefined, location: ChatLocation | undefined, conversationId: string): AutoModeTokenBank {
264
if (entry) {
265
return entry.tokenBank;
266
}
267
const loc = location ?? ChatLocation.Panel;
268
const tokenBank = this._reserveTokens.deleteAndLeak(loc) || new AutoModeTokenBank('reserve', loc, this._capiClientService, this._authService, this._logService, this._expService, this._envService);
269
this._reserveTokens.set(loc, new AutoModeTokenBank('reserve', loc, this._capiClientService, this._authService, this._logService, this._expService, this._envService));
270
tokenBank.debugName = conversationId;
271
return tokenBank;
272
}
273
274
private async _tryRouterSelection(
275
chatRequest: ChatRequest | undefined,
276
conversationId: string,
277
entry: AutoModelCacheEntry | undefined,
278
token: AutoModeAPIResponse,
279
knownEndpoints: IChatEndpoint[],
280
): Promise<{ selectedModel?: IChatEndpoint; lastRoutedPrompt?: string; fallbackReason?: string; candidateModel?: string }> {
281
const prompt = chatRequest?.prompt?.trim();
282
const lastRoutedPrompt = entry?.lastRoutedPrompt ?? prompt;
283
284
if (!this._isRouterEnabled(chatRequest) || conversationId === 'unknown') {
285
return { lastRoutedPrompt };
286
}
287
288
if (!prompt?.length) {
289
return { lastRoutedPrompt, fallbackReason: 'emptyPrompt' };
290
}
291
292
// Prompt hasn't changed since last decision — skip router but allow endpoint refresh
293
if (entry && entry.lastRoutedPrompt === prompt) {
294
return { lastRoutedPrompt };
295
}
296
297
try {
298
const contextSignals: RoutingContextSignals = {
299
session_id: conversationId !== 'unknown' ? conversationId : undefined,
300
reference_count: chatRequest?.references?.length,
301
prompt_char_count: prompt.length,
302
previous_model: entry?.endpoint?.model,
303
turn_number: (entry?.turnCount ?? 0) + 1,
304
};
305
const routingMethod = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.AutoModeRoutingMethod, this._expService) || undefined;
306
307
// Filter available_models to only those the client can actually serve.
308
// The AutoModels API and Models API are separate CAPI calls that can be
309
// out of sync (e.g. a new model appears in available_models before the
310
// Models API returns it). Sending unresolvable models to the router
311
// causes it to recommend models the client must silently discard.
312
const knownModelIds = new Set(knownEndpoints.map(e => e.model));
313
const routableModels: string[] = [];
314
const droppedModels: string[] = [];
315
for (const m of token.available_models) {
316
(knownModelIds.has(m) ? routableModels : droppedModels).push(m);
317
}
318
if (!routableModels.length) {
319
this._logService.warn(`[AutomodeService] No available_models matched knownEndpoints. available_models=[${token.available_models.join(', ')}], knownEndpoints=[${knownEndpoints.map(e => e.model).join(', ')}]`);
320
return { lastRoutedPrompt: prompt, fallbackReason: 'noMatchingEndpoint' };
321
}
322
if (droppedModels.length) {
323
this._logService.info(`[AutomodeService] Filtered ${droppedModels.length} unresolvable model(s) before routing: [${droppedModels.join(', ')}]`);
324
}
325
326
const result = await this._routerDecisionFetcher.getRouterDecision(prompt, token.session_token, routableModels, undefined, contextSignals, conversationId, chatRequest?.id, routingMethod, hasImage(chatRequest));
327
328
if (result.fallback) {
329
this._logService.info(`[AutomodeService] Router signaled fallback: ${result.fallback_reason ?? 'unknown'}, routing_method=${result.routing_method ?? 'n/a'}`);
330
return { lastRoutedPrompt: prompt, fallbackReason: 'routerFallback' };
331
}
332
333
if (!result.candidate_models.length) {
334
return { lastRoutedPrompt: prompt, fallbackReason: 'emptyCandidateList' };
335
}
336
337
// Trust the router's ranked candidate list directly.
338
// Same-provider preference is intentionally NOT applied here — the router
339
// already accounts for available models and re-runs after /compact, so
340
// overriding its pick with same-provider negates cost-saving decisions.
341
// Same-provider is still used in _selectDefaultModel (the non-router fallback).
342
const selectedModel = this._findFirstAvailableModel(result.candidate_models, knownEndpoints);
343
344
if (!selectedModel) {
345
this._logService.warn(`[AutomodeService] None of the router's candidate_models matched knownEndpoints: [${result.candidate_models.join(', ')}]`);
346
return { lastRoutedPrompt: prompt, fallbackReason: 'noMatchingEndpoint' };
347
}
348
349
if (result.sticky_override) {
350
this._logService.trace(`[AutomodeService] Sticky routing override: confidence=${(result.confidence * 100).toFixed(1)}%, label=${result.predicted_label}, router_model=${result.candidate_models[0]}, actual_model=${selectedModel.model}`);
351
}
352
return { selectedModel, lastRoutedPrompt: prompt, candidateModel: result.candidate_models[0] };
353
} catch (e) {
354
const isTimeout = isAbortError(e);
355
let fallbackReason: string;
356
if (isTimeout) {
357
fallbackReason = 'routerTimeout';
358
} else if (e instanceof RouterDecisionError && e.errorCode) {
359
fallbackReason = e.errorCode;
360
} else {
361
fallbackReason = 'routerError';
362
}
363
this._logService.error(`Failed to get routed model for conversation ${conversationId} (${fallbackReason}):`, (e as Error).message);
364
return { lastRoutedPrompt: prompt, fallbackReason };
365
}
366
}
367
368
private _selectDefaultModel(currentModelProvider: string | undefined, availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint {
369
const selectedModel = (currentModelProvider && this._findSameProviderModel(currentModelProvider, availableModels, knownEndpoints))
370
?? this._findFirstAvailableModel(availableModels, knownEndpoints);
371
if (!selectedModel) {
372
const errorMsg = 'Auto mode failed: no available model found in known endpoints.';
373
this._logService.error(errorMsg);
374
throw new Error(errorMsg);
375
}
376
return selectedModel;
377
}
378
379
private _isRouterEnabled(chatRequest: ChatRequest | undefined): boolean {
380
const isPanelChat = !chatRequest?.location || chatRequest?.location === ChatLocation.Panel;
381
return isPanelChat && this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.UseAutoModeRouting, this._expService);
382
}
383
384
/**
385
* Find the first model in available_models that has a known endpoint.
386
*/
387
private _findFirstAvailableModel(availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint | undefined {
388
for (const model of availableModels) {
389
const endpoint = knownEndpoints.find(e => e.model === model);
390
if (endpoint) {
391
return endpoint;
392
}
393
}
394
return undefined;
395
}
396
397
/**
398
* Find the first model in available_models whose knownEndpoint has the same modelProvider
399
* as the current model. Skips any model that doesn't have a known endpoint.
400
*/
401
private _findSameProviderModel(currentModelProvider: string, availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint | undefined {
402
for (const model of availableModels) {
403
const endpoint = knownEndpoints.find(e => e.model === model);
404
if (endpoint && endpoint.modelProvider === currentModelProvider) {
405
return endpoint;
406
}
407
}
408
return undefined;
409
}
410
411
/**
412
* If the request contains an image and the selected model doesn't support vision,
413
* fall back to the first vision-capable model from the available models.
414
*/
415
private _applyVisionFallback(chatRequest: ChatRequest | undefined, selectedModel: IChatEndpoint, availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint {
416
if (!hasImage(chatRequest) || selectedModel.supportsVision) {
417
return selectedModel;
418
}
419
const visionModel = availableModels
420
.map(model => knownEndpoints.find(e => e.model === model))
421
.find(endpoint => endpoint?.supportsVision);
422
if (visionModel) {
423
this._logService.trace(`Selected model '${selectedModel.model}' does not support vision, falling back to '${visionModel.model}'.`);
424
return visionModel;
425
}
426
this._logService.warn(`Request contains an image but no vision-capable model is available.`);
427
return selectedModel;
428
}
429
430
private _calculateDiscountRange(discounts: Record<string, number> | undefined): { low: number; high: number } {
431
if (!discounts) {
432
return { low: 0, high: 0 };
433
}
434
let low = Infinity;
435
let high = -Infinity;
436
let hasValues = false;
437
438
for (const value of Object.values(discounts)) {
439
hasValues = true;
440
if (value < low) {
441
low = value;
442
}
443
if (value > high) {
444
high = value;
445
}
446
}
447
return hasValues ? { low, high } : { low: 0, high: 0 };
448
}
449
}
450
451
function hasImage(chatRequest: ChatRequest | undefined): boolean {
452
if (!chatRequest || !chatRequest.references) {
453
return false;
454
}
455
return chatRequest.references.some(ref => {
456
const value = ref.value;
457
return typeof value === 'object' &&
458
value !== null &&
459
'mimeType' in value &&
460
typeof value.mimeType === 'string'
461
&& value.mimeType.startsWith('image/');
462
});
463
}
464
465