CoCalc -- automodeService.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/automodeService.ts
¹³⁴⁰¹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { RequestType } from '@vscode/copilot-api';
7
import type { ChatRequest } from 'vscode';
8
import { FetchedValue } from '../../../shared-fetch-utils/common/fetchedValue';
9
import { createServiceIdentifier } from '../../../util/common/services';
10
import { Disposable, DisposableMap } from '../../../util/vs/base/common/lifecycle';
11
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
12
import { ChatLocation } from '../../../vscodeTypes';
13
import { IAuthenticationService } from '../../authentication/common/authentication';
14
import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';
15
import { IEnvService } from '../../env/common/envService';
16
import { ILogService } from '../../log/common/logService';
17
import { createCapiClientFetchedValue } from '../../networking/common/capiClientFetchedValue';
18
import { isAbortError } from '../../networking/common/fetcherService';
19
import { IChatEndpoint } from '../../networking/common/networking';
20
import { IRequestLogger } from '../../requestLogger/common/requestLogger';
21
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
22
import { ITelemetryService } from '../../telemetry/common/telemetry';
23
import { ICAPIClientService } from '../common/capiClient';
24
import { AutoChatEndpoint } from './autoChatEndpoint';
25
import { RouterDecisionError, RouterDecisionFetcher, RoutingContextSignals } from './routerDecisionFetcher';
26

27
interface AutoModeAPIResponse {
28
	available_models: string[];
29
	expires_at: number;
30
	discounted_costs?: { [key: string]: number };
31
	session_token: string;
32
}
33

34
interface AutoModelCacheEntry {
35
	endpoint: AutoChatEndpoint;
36
	tokenBank: AutoModeTokenBank;
37
	lastSessionToken?: string;
38
	lastRoutedPrompt?: string;
39
	routerFallbackReason?: string;
40
	turnCount: number;
41
	needsReEval: boolean;
42
}
43

44
class AutoModeTokenBank extends Disposable {
45
	private readonly _fetchedValue: FetchedValue<AutoModeAPIResponse>;
46
	private _usedSinceLastFetch = false;
47

48
	constructor(
49
		public debugName: string,
50
		location: ChatLocation,
51
		capiClientService: ICAPIClientService,
52
		authService: IAuthenticationService,
53
		_logService: ILogService,
54
		expService: IExperimentationService,
55
		envService: IEnvService,
56
	) {
57
		super();
58

59
		const expName = location === ChatLocation.Editor
60
			? 'copilotchat.autoModelHint.editor'
61
			: 'copilotchat.autoModelHint';
62

63
		this._fetchedValue = this._register(createCapiClientFetchedValue<AutoModeAPIResponse>(capiClientService, envService, {
64
			request: async () => {
65
				const authToken = (await authService.getCopilotToken()).token;
66
				const extValue = expService.getTreatmentVariable<string>(expName);
67
				const model_hints = [extValue || 'auto'];
68
				if (location === ChatLocation.Editor && model_hints[0] !== 'auto') {
69
					model_hints.push('auto');
70
				}
71
				return {
72
					headers: {
73
						'Content-Type': 'application/json',
74
						'Authorization': `Bearer ${authToken}`,
75
					},
76
					method: 'POST' as const,
77
					json: { auto_mode: { model_hints } },
78
				};
79
			},
80
			requestMetadata: { type: RequestType.AutoModels },
81
			parseResponse: async (res) => {
82
				if (res.status < 200 || res.status >= 300) {
83
					const text = await res.text().catch(() => '');
84
					throw new Error(`AutoMode token response status: ${res.status}${text ? `, body: ${text}` : ''}`);
85
				}
86
				const data = await res.json() as AutoModeAPIResponse;
87
				this._usedSinceLastFetch = false;
88
				return data;
89
			},
90
			isStale: (token) => {
91
				if (!this._usedSinceLastFetch) {
92
					return false;
93
				}
94
				return token.expires_at * 1000 - Date.now() < 5 * 60 * 1000;
95
			},
96
			keepCacheHot: true,
97
		}));
98
	}
99

100
	async getToken(): Promise<AutoModeAPIResponse> {
101
		this._usedSinceLastFetch = true;
102
		return this._fetchedValue.resolve();
103
	}
104
}
105

106
export const IAutomodeService = createServiceIdentifier<IAutomodeService>('IAutomodeService');
107

108
export interface IAutomodeService {
109
	readonly _serviceBrand: undefined;
110

111
	resolveAutoModeEndpoint(chatRequest: ChatRequest | undefined, knownEndpoints: IChatEndpoint[]): Promise<IChatEndpoint>;
112

113
	/**
114
	 * Marks the router cache for this conversation as needing re-evaluation.
115
	 * The next call to {@link resolveAutoModeEndpoint} will re-run the router
116
	 * instead of returning the cached endpoint.
117
	 */
118
	invalidateRouterCache(chatRequest: ChatRequest): void;
119
}
120

121
export class AutomodeService extends Disposable implements IAutomodeService {
122
	readonly _serviceBrand: undefined;
123
	private readonly _autoModelCache: Map<string, AutoModelCacheEntry> = new Map();
124
	private _reserveTokens: DisposableMap<ChatLocation, AutoModeTokenBank> = new DisposableMap();
125
	private readonly _routerDecisionFetcher: RouterDecisionFetcher;
126

127
	constructor(
128
		@ICAPIClientService private readonly _capiClientService: ICAPIClientService,
129
		@IAuthenticationService private readonly _authService: IAuthenticationService,
130
		@ILogService private readonly _logService: ILogService,
131
		@IInstantiationService private readonly _instantiationService: IInstantiationService,
132
		@IExperimentationService private readonly _expService: IExperimentationService,
133
		@IConfigurationService private readonly _configurationService: IConfigurationService,
134
		@IEnvService private readonly _envService: IEnvService,
135
		@ITelemetryService private readonly _telemetryService: ITelemetryService,
136
		@IRequestLogger private readonly _requestLogger: IRequestLogger,
137
	) {
138
		super();
139
		this._register(this._authService.onDidAuthenticationChange(() => {
140
			for (const entry of this._autoModelCache.values()) {
141
				entry.tokenBank.dispose();
142
			}
143
			this._autoModelCache.clear();
144
			const keys = Array.from(this._reserveTokens.keys());
145
			this._reserveTokens.clearAndDisposeAll();
146
			for (const location of keys) {
147
				this._reserveTokens.set(location, new AutoModeTokenBank('reserve', location, this._capiClientService, this._authService, this._logService, this._expService, this._envService));
148
			}
149
		}));
150
		this._serviceBrand = undefined;
151
		this._routerDecisionFetcher = new RouterDecisionFetcher(this._capiClientService, this._authService, this._logService, this._telemetryService, this._requestLogger);
152
	}
153

154
	override dispose(): void {
155
		for (const entry of this._autoModelCache.values()) {
156
			entry.tokenBank.dispose();
157
		}
158
		this._autoModelCache.clear();
159
		this._reserveTokens.dispose();
160
		super.dispose();
161
	}
162

163
	/**
164
	 * Resolve an auto mode endpoint
165
	 * Optionally uses a router model to select the best endpoint based on the prompt.
166
	 */
167
	invalidateRouterCache(chatRequest: ChatRequest): void {
168
		const conversationId = chatRequest.sessionResource?.toString() ?? chatRequest.sessionId ?? 'unknown';
169
		const entry = this._autoModelCache.get(conversationId);
170
		if (entry) {
171
			entry.needsReEval = true;
172
			this._logService.trace(`[AutomodeService] Router cache invalidated for conversation ${conversationId}`);
173
		}
174
	}
175

176
	async resolveAutoModeEndpoint(chatRequest: ChatRequest | undefined, knownEndpoints: IChatEndpoint[]): Promise<IChatEndpoint> {
177
		if (!knownEndpoints.length) {
178
			throw new Error('No auto mode endpoints provided.');
179
		}
180

181
		const conversationId = chatRequest?.sessionResource?.toString() ?? chatRequest?.sessionId ?? 'unknown';
182
		const entry = this._autoModelCache.get(conversationId);
183
		const tokenBank = this._acquireTokenBank(entry, chatRequest?.location, conversationId);
184
		const token = await tokenBank.getToken();
185

186
		// After the first turn, skip the router unless explicitly invalidated
187
		// (e.g. after conversation compaction/summarization). Token refresh and
188
		// default model selection still run so available-model changes are respected.
189
		const skipRouter = entry !== undefined && entry.turnCount > 0 && !entry.needsReEval;
190
		if (entry?.needsReEval) {
191
			entry.needsReEval = false;
192
		}
193

194
		const routerResult = skipRouter
195
			? { lastRoutedPrompt: chatRequest?.prompt?.trim() ?? entry?.lastRoutedPrompt }
196
			: await this._tryRouterSelection(chatRequest, conversationId, entry, token, knownEndpoints);
197
		let selectedModel = routerResult.selectedModel;
198
		const lastRoutedPrompt = routerResult.lastRoutedPrompt;
199
		const routerFallbackReason = routerResult.fallbackReason;
200

201
		// Default model selection when router was skipped or failed
202
		if (!selectedModel) {
203
			if (routerFallbackReason) {
204
				/* __GDPR__
205
					"automode.routerFallback" : {
206
						"owner": "lramos15",
207
						"comment": "Reports when the auto mode router is skipped or fails and falls back to default model selection",
208
						"reason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The reason the router was skipped or failed, e.g. emptyPrompt, emptyCandidateList, noMatchingEndpoint, routerError, routerTimeout, or a server error code" },
209
						"hasImage": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether the request contained an attached image" }
210
					}
211
				*/
212
				this._telemetryService.sendMSFTTelemetryEvent('automode.routerFallback', {
213
					reason: routerFallbackReason,
214
					hasImage: String(hasImage(chatRequest)),
215
				});
216
			}
217
			selectedModel = this._selectDefaultModel(entry?.endpoint?.modelProvider, token.available_models, knownEndpoints);
218
		}
219

220
		selectedModel = this._applyVisionFallback(chatRequest, selectedModel, token.available_models, knownEndpoints);
221

222
		// Emit the final model selection alongside the router's recommendation
223
		// so analysts can detect overrides without fragile telemetry joins
224
		if (!skipRouter && routerResult.candidateModel) {
225
			/* __GDPR__
226
				"automode.routerModelSelection" : {
227
					"owner": "aashnagarg",
228
					"comment": "Reports the router's recommended model vs the actual model used after all client-side overrides",
229
					"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The conversation ID" },
230
					"candidateModel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The router's top candidate model (candidate_models[0])" },
231
					"actualModel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model actually selected after all client-side overrides" },
232
					"overrideReason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Why the actual model differs from the candidate: none or clientOverride" }
233
				}
234
			*/
235
			const candidateModel = routerResult.candidateModel;
236
			const overrideReason = candidateModel === selectedModel.model ? 'none' : 'clientOverride';
237
			this._telemetryService.sendMSFTTelemetryEvent('automode.routerModelSelection', {
238
				conversationId: conversationId ?? '',
239
				candidateModel,
240
				actualModel: selectedModel.model,
241
				overrideReason,
242
			});
243
		}
244

245
		// Reuse the cached endpoint if the session token and model haven't changed
246
		const autoEndpoint = (entry?.endpoint && entry.lastSessionToken === token.session_token && entry.endpoint.model === selectedModel.model)
247
			? entry.endpoint
248
			: this._instantiationService.createInstance(AutoChatEndpoint, selectedModel, token.session_token, token.discounted_costs?.[selectedModel.model] || 0, this._calculateDiscountRange(token.discounted_costs));
249

250
		const isNewTurn = !entry || lastRoutedPrompt !== entry.lastRoutedPrompt;
251
		this._autoModelCache.set(conversationId, {
252
			endpoint: autoEndpoint,
253
			tokenBank,
254
			lastSessionToken: token.session_token,
255
			lastRoutedPrompt,
256
			routerFallbackReason,
257
			turnCount: (entry?.turnCount ?? 0) + (isNewTurn ? 1 : 0),
258
			needsReEval: false,
259
		});
260
		return autoEndpoint;
261
	}
262

263
	private _acquireTokenBank(entry: AutoModelCacheEntry | undefined, location: ChatLocation | undefined, conversationId: string): AutoModeTokenBank {
264
		if (entry) {
265
			return entry.tokenBank;
266
		}
267
		const loc = location ?? ChatLocation.Panel;
268
		const tokenBank = this._reserveTokens.deleteAndLeak(loc) || new AutoModeTokenBank('reserve', loc, this._capiClientService, this._authService, this._logService, this._expService, this._envService);
269
		this._reserveTokens.set(loc, new AutoModeTokenBank('reserve', loc, this._capiClientService, this._authService, this._logService, this._expService, this._envService));
270
		tokenBank.debugName = conversationId;
271
		return tokenBank;
272
	}
273

274
	private async _tryRouterSelection(
275
		chatRequest: ChatRequest | undefined,
276
		conversationId: string,
277
		entry: AutoModelCacheEntry | undefined,
278
		token: AutoModeAPIResponse,
279
		knownEndpoints: IChatEndpoint[],
280
	): Promise<{ selectedModel?: IChatEndpoint; lastRoutedPrompt?: string; fallbackReason?: string; candidateModel?: string }> {
281
		const prompt = chatRequest?.prompt?.trim();
282
		const lastRoutedPrompt = entry?.lastRoutedPrompt ?? prompt;
283

284
		if (!this._isRouterEnabled(chatRequest) || conversationId === 'unknown') {
285
			return { lastRoutedPrompt };
286
		}
287

288
		if (!prompt?.length) {
289
			return { lastRoutedPrompt, fallbackReason: 'emptyPrompt' };
290
		}
291

292
		// Prompt hasn't changed since last decision — skip router but allow endpoint refresh
293
		if (entry && entry.lastRoutedPrompt === prompt) {
294
			return { lastRoutedPrompt };
295
		}
296

297
		try {
298
			const contextSignals: RoutingContextSignals = {
299
				session_id: conversationId !== 'unknown' ? conversationId : undefined,
300
				reference_count: chatRequest?.references?.length,
301
				prompt_char_count: prompt.length,
302
				previous_model: entry?.endpoint?.model,
303
				turn_number: (entry?.turnCount ?? 0) + 1,
304
			};
305
			const routingMethod = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.AutoModeRoutingMethod, this._expService) || undefined;
306

307
			// Filter available_models to only those the client can actually serve.
308
			// The AutoModels API and Models API are separate CAPI calls that can be
309
			// out of sync (e.g. a new model appears in available_models before the
310
			// Models API returns it). Sending unresolvable models to the router
311
			// causes it to recommend models the client must silently discard.
312
			const knownModelIds = new Set(knownEndpoints.map(e => e.model));
313
			const routableModels: string[] = [];
314
			const droppedModels: string[] = [];
315
			for (const m of token.available_models) {
316
				(knownModelIds.has(m) ? routableModels : droppedModels).push(m);
317
			}
318
			if (!routableModels.length) {
319
				this._logService.warn(`[AutomodeService] No available_models matched knownEndpoints. available_models=[${token.available_models.join(', ')}], knownEndpoints=[${knownEndpoints.map(e => e.model).join(', ')}]`);
320
				return { lastRoutedPrompt: prompt, fallbackReason: 'noMatchingEndpoint' };
321
			}
322
			if (droppedModels.length) {
323
				this._logService.info(`[AutomodeService] Filtered ${droppedModels.length} unresolvable model(s) before routing: [${droppedModels.join(', ')}]`);
324
			}
325

326
			const result = await this._routerDecisionFetcher.getRouterDecision(prompt, token.session_token, routableModels, undefined, contextSignals, conversationId, chatRequest?.id, routingMethod, hasImage(chatRequest));
327

328
			if (result.fallback) {
329
				this._logService.info(`[AutomodeService] Router signaled fallback: ${result.fallback_reason ?? 'unknown'}, routing_method=${result.routing_method ?? 'n/a'}`);
330
				return { lastRoutedPrompt: prompt, fallbackReason: 'routerFallback' };
331
			}
332

333
			if (!result.candidate_models.length) {
334
				return { lastRoutedPrompt: prompt, fallbackReason: 'emptyCandidateList' };
335
			}
336

337
			// Trust the router's ranked candidate list directly.
338
			// Same-provider preference is intentionally NOT applied here — the router
339
			// already accounts for available models and re-runs after /compact, so
340
			// overriding its pick with same-provider negates cost-saving decisions.
341
			// Same-provider is still used in _selectDefaultModel (the non-router fallback).
342
			const selectedModel = this._findFirstAvailableModel(result.candidate_models, knownEndpoints);
343

344
			if (!selectedModel) {
345
				this._logService.warn(`[AutomodeService] None of the router's candidate_models matched knownEndpoints: [${result.candidate_models.join(', ')}]`);
346
				return { lastRoutedPrompt: prompt, fallbackReason: 'noMatchingEndpoint' };
347
			}
348

349
			if (result.sticky_override) {
350
				this._logService.trace(`[AutomodeService] Sticky routing override: confidence=${(result.confidence * 100).toFixed(1)}%, label=${result.predicted_label}, router_model=${result.candidate_models[0]}, actual_model=${selectedModel.model}`);
351
			}
352
			return { selectedModel, lastRoutedPrompt: prompt, candidateModel: result.candidate_models[0] };
353
		} catch (e) {
354
			const isTimeout = isAbortError(e);
355
			let fallbackReason: string;
356
			if (isTimeout) {
357
				fallbackReason = 'routerTimeout';
358
			} else if (e instanceof RouterDecisionError && e.errorCode) {
359
				fallbackReason = e.errorCode;
360
			} else {
361
				fallbackReason = 'routerError';
362
			}
363
			this._logService.error(`Failed to get routed model for conversation ${conversationId} (${fallbackReason}):`, (e as Error).message);
364
			return { lastRoutedPrompt: prompt, fallbackReason };
365
		}
366
	}
367

368
	private _selectDefaultModel(currentModelProvider: string | undefined, availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint {
369
		const selectedModel = (currentModelProvider && this._findSameProviderModel(currentModelProvider, availableModels, knownEndpoints))
370
			?? this._findFirstAvailableModel(availableModels, knownEndpoints);
371
		if (!selectedModel) {
372
			const errorMsg = 'Auto mode failed: no available model found in known endpoints.';
373
			this._logService.error(errorMsg);
374
			throw new Error(errorMsg);
375
		}
376
		return selectedModel;
377
	}
378

379
	private _isRouterEnabled(chatRequest: ChatRequest | undefined): boolean {
380
		const isPanelChat = !chatRequest?.location || chatRequest?.location === ChatLocation.Panel;
381
		return isPanelChat && this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.UseAutoModeRouting, this._expService);
382
	}
383

384
	/**
385
	 * Find the first model in available_models that has a known endpoint.
386
	 */
387
	private _findFirstAvailableModel(availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint | undefined {
388
		for (const model of availableModels) {
389
			const endpoint = knownEndpoints.find(e => e.model === model);
390
			if (endpoint) {
391
				return endpoint;
392
			}
393
		}
394
		return undefined;
395
	}
396

397
	/**
398
	 * Find the first model in available_models whose knownEndpoint has the same modelProvider
399
	 * as the current model. Skips any model that doesn't have a known endpoint.
400
	 */
401
	private _findSameProviderModel(currentModelProvider: string, availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint | undefined {
402
		for (const model of availableModels) {
403
			const endpoint = knownEndpoints.find(e => e.model === model);
404
			if (endpoint && endpoint.modelProvider === currentModelProvider) {
405
				return endpoint;
406
			}
407
		}
408
		return undefined;
409
	}
410

411
	/**
412
	 * If the request contains an image and the selected model doesn't support vision,
413
	 * fall back to the first vision-capable model from the available models.
414
	 */
415
	private _applyVisionFallback(chatRequest: ChatRequest | undefined, selectedModel: IChatEndpoint, availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint {
416
		if (!hasImage(chatRequest) || selectedModel.supportsVision) {
417
			return selectedModel;
418
		}
419
		const visionModel = availableModels
420
			.map(model => knownEndpoints.find(e => e.model === model))
421
			.find(endpoint => endpoint?.supportsVision);
422
		if (visionModel) {
423
			this._logService.trace(`Selected model '${selectedModel.model}' does not support vision, falling back to '${visionModel.model}'.`);
424
			return visionModel;
425
		}
426
		this._logService.warn(`Request contains an image but no vision-capable model is available.`);
427
		return selectedModel;
428
	}
429

430
	private _calculateDiscountRange(discounts: Record<string, number> | undefined): { low: number; high: number } {
431
		if (!discounts) {
432
			return { low: 0, high: 0 };
433
		}
434
		let low = Infinity;
435
		let high = -Infinity;
436
		let hasValues = false;
437

438
		for (const value of Object.values(discounts)) {
439
			hasValues = true;
440
			if (value < low) {
441
				low = value;
442
			}
443
			if (value > high) {
444
				high = value;
445
			}
446
		}
447
		return hasValues ? { low, high } : { low: 0, high: 0 };
448
	}
449
}
450

451
function hasImage(chatRequest: ChatRequest | undefined): boolean {
452
	if (!chatRequest || !chatRequest.references) {
453
		return false;
454
	}
455
	return chatRequest.references.some(ref => {
456
		const value = ref.value;
457
		return typeof value === 'object' &&
458
			value !== null &&
459
			'mimeType' in value &&
460
			typeof value.mimeType === 'string'
461
			&& value.mimeType.startsWith('image/');
462
	});
463
}
464

465
Product

Resources

Company