Path: blob/main/extensions/copilot/src/platform/endpoint/node/automodeService.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { RequestType } from '@vscode/copilot-api';6import type { ChatRequest } from 'vscode';7import { FetchedValue } from '../../../shared-fetch-utils/common/fetchedValue';8import { createServiceIdentifier } from '../../../util/common/services';9import { Disposable, DisposableMap } from '../../../util/vs/base/common/lifecycle';10import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';11import { ChatLocation } from '../../../vscodeTypes';12import { IAuthenticationService } from '../../authentication/common/authentication';13import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';14import { IEnvService } from '../../env/common/envService';15import { ILogService } from '../../log/common/logService';16import { createCapiClientFetchedValue } from '../../networking/common/capiClientFetchedValue';17import { isAbortError } from '../../networking/common/fetcherService';18import { IChatEndpoint } from '../../networking/common/networking';19import { IRequestLogger } from '../../requestLogger/common/requestLogger';20import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';21import { ITelemetryService } from '../../telemetry/common/telemetry';22import { ICAPIClientService } from '../common/capiClient';23import { AutoChatEndpoint } from './autoChatEndpoint';24import { RouterDecisionError, RouterDecisionFetcher, RoutingContextSignals } from './routerDecisionFetcher';2526interface AutoModeAPIResponse {27available_models: string[];28expires_at: number;29discounted_costs?: { [key: string]: number };30session_token: string;31}3233interface AutoModelCacheEntry {34endpoint: AutoChatEndpoint;35tokenBank: AutoModeTokenBank;36lastSessionToken?: string;37lastRoutedPrompt?: string;38routerFallbackReason?: string;39turnCount: number;40needsReEval: boolean;41}4243class AutoModeTokenBank extends Disposable {44private readonly _fetchedValue: FetchedValue<AutoModeAPIResponse>;45private _usedSinceLastFetch = false;4647constructor(48public debugName: string,49location: ChatLocation,50capiClientService: ICAPIClientService,51authService: IAuthenticationService,52_logService: ILogService,53expService: IExperimentationService,54envService: IEnvService,55) {56super();5758const expName = location === ChatLocation.Editor59? 'copilotchat.autoModelHint.editor'60: 'copilotchat.autoModelHint';6162this._fetchedValue = this._register(createCapiClientFetchedValue<AutoModeAPIResponse>(capiClientService, envService, {63request: async () => {64const authToken = (await authService.getCopilotToken()).token;65const extValue = expService.getTreatmentVariable<string>(expName);66const model_hints = [extValue || 'auto'];67if (location === ChatLocation.Editor && model_hints[0] !== 'auto') {68model_hints.push('auto');69}70return {71headers: {72'Content-Type': 'application/json',73'Authorization': `Bearer ${authToken}`,74},75method: 'POST' as const,76json: { auto_mode: { model_hints } },77};78},79requestMetadata: { type: RequestType.AutoModels },80parseResponse: async (res) => {81if (res.status < 200 || res.status >= 300) {82const text = await res.text().catch(() => '');83throw new Error(`AutoMode token response status: ${res.status}${text ? `, body: ${text}` : ''}`);84}85const data = await res.json() as AutoModeAPIResponse;86this._usedSinceLastFetch = false;87return data;88},89isStale: (token) => {90if (!this._usedSinceLastFetch) {91return false;92}93return token.expires_at * 1000 - Date.now() < 5 * 60 * 1000;94},95keepCacheHot: true,96}));97}9899async getToken(): Promise<AutoModeAPIResponse> {100this._usedSinceLastFetch = true;101return this._fetchedValue.resolve();102}103}104105export const IAutomodeService = createServiceIdentifier<IAutomodeService>('IAutomodeService');106107export interface IAutomodeService {108readonly _serviceBrand: undefined;109110resolveAutoModeEndpoint(chatRequest: ChatRequest | undefined, knownEndpoints: IChatEndpoint[]): Promise<IChatEndpoint>;111112/**113* Marks the router cache for this conversation as needing re-evaluation.114* The next call to {@link resolveAutoModeEndpoint} will re-run the router115* instead of returning the cached endpoint.116*/117invalidateRouterCache(chatRequest: ChatRequest): void;118}119120export class AutomodeService extends Disposable implements IAutomodeService {121readonly _serviceBrand: undefined;122private readonly _autoModelCache: Map<string, AutoModelCacheEntry> = new Map();123private _reserveTokens: DisposableMap<ChatLocation, AutoModeTokenBank> = new DisposableMap();124private readonly _routerDecisionFetcher: RouterDecisionFetcher;125126constructor(127@ICAPIClientService private readonly _capiClientService: ICAPIClientService,128@IAuthenticationService private readonly _authService: IAuthenticationService,129@ILogService private readonly _logService: ILogService,130@IInstantiationService private readonly _instantiationService: IInstantiationService,131@IExperimentationService private readonly _expService: IExperimentationService,132@IConfigurationService private readonly _configurationService: IConfigurationService,133@IEnvService private readonly _envService: IEnvService,134@ITelemetryService private readonly _telemetryService: ITelemetryService,135@IRequestLogger private readonly _requestLogger: IRequestLogger,136) {137super();138this._register(this._authService.onDidAuthenticationChange(() => {139for (const entry of this._autoModelCache.values()) {140entry.tokenBank.dispose();141}142this._autoModelCache.clear();143const keys = Array.from(this._reserveTokens.keys());144this._reserveTokens.clearAndDisposeAll();145for (const location of keys) {146this._reserveTokens.set(location, new AutoModeTokenBank('reserve', location, this._capiClientService, this._authService, this._logService, this._expService, this._envService));147}148}));149this._serviceBrand = undefined;150this._routerDecisionFetcher = new RouterDecisionFetcher(this._capiClientService, this._authService, this._logService, this._telemetryService, this._requestLogger);151}152153override dispose(): void {154for (const entry of this._autoModelCache.values()) {155entry.tokenBank.dispose();156}157this._autoModelCache.clear();158this._reserveTokens.dispose();159super.dispose();160}161162/**163* Resolve an auto mode endpoint164* Optionally uses a router model to select the best endpoint based on the prompt.165*/166invalidateRouterCache(chatRequest: ChatRequest): void {167const conversationId = chatRequest.sessionResource?.toString() ?? chatRequest.sessionId ?? 'unknown';168const entry = this._autoModelCache.get(conversationId);169if (entry) {170entry.needsReEval = true;171this._logService.trace(`[AutomodeService] Router cache invalidated for conversation ${conversationId}`);172}173}174175async resolveAutoModeEndpoint(chatRequest: ChatRequest | undefined, knownEndpoints: IChatEndpoint[]): Promise<IChatEndpoint> {176if (!knownEndpoints.length) {177throw new Error('No auto mode endpoints provided.');178}179180const conversationId = chatRequest?.sessionResource?.toString() ?? chatRequest?.sessionId ?? 'unknown';181const entry = this._autoModelCache.get(conversationId);182const tokenBank = this._acquireTokenBank(entry, chatRequest?.location, conversationId);183const token = await tokenBank.getToken();184185// After the first turn, skip the router unless explicitly invalidated186// (e.g. after conversation compaction/summarization). Token refresh and187// default model selection still run so available-model changes are respected.188const skipRouter = entry !== undefined && entry.turnCount > 0 && !entry.needsReEval;189if (entry?.needsReEval) {190entry.needsReEval = false;191}192193const routerResult = skipRouter194? { lastRoutedPrompt: chatRequest?.prompt?.trim() ?? entry?.lastRoutedPrompt }195: await this._tryRouterSelection(chatRequest, conversationId, entry, token, knownEndpoints);196let selectedModel = routerResult.selectedModel;197const lastRoutedPrompt = routerResult.lastRoutedPrompt;198const routerFallbackReason = routerResult.fallbackReason;199200// Default model selection when router was skipped or failed201if (!selectedModel) {202if (routerFallbackReason) {203/* __GDPR__204"automode.routerFallback" : {205"owner": "lramos15",206"comment": "Reports when the auto mode router is skipped or fails and falls back to default model selection",207"reason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The reason the router was skipped or failed, e.g. emptyPrompt, emptyCandidateList, noMatchingEndpoint, routerError, routerTimeout, or a server error code" },208"hasImage": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether the request contained an attached image" }209}210*/211this._telemetryService.sendMSFTTelemetryEvent('automode.routerFallback', {212reason: routerFallbackReason,213hasImage: String(hasImage(chatRequest)),214});215}216selectedModel = this._selectDefaultModel(entry?.endpoint?.modelProvider, token.available_models, knownEndpoints);217}218219selectedModel = this._applyVisionFallback(chatRequest, selectedModel, token.available_models, knownEndpoints);220221// Emit the final model selection alongside the router's recommendation222// so analysts can detect overrides without fragile telemetry joins223if (!skipRouter && routerResult.candidateModel) {224/* __GDPR__225"automode.routerModelSelection" : {226"owner": "aashnagarg",227"comment": "Reports the router's recommended model vs the actual model used after all client-side overrides",228"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The conversation ID" },229"candidateModel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The router's top candidate model (candidate_models[0])" },230"actualModel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model actually selected after all client-side overrides" },231"overrideReason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Why the actual model differs from the candidate: none or clientOverride" }232}233*/234const candidateModel = routerResult.candidateModel;235const overrideReason = candidateModel === selectedModel.model ? 'none' : 'clientOverride';236this._telemetryService.sendMSFTTelemetryEvent('automode.routerModelSelection', {237conversationId: conversationId ?? '',238candidateModel,239actualModel: selectedModel.model,240overrideReason,241});242}243244// Reuse the cached endpoint if the session token and model haven't changed245const autoEndpoint = (entry?.endpoint && entry.lastSessionToken === token.session_token && entry.endpoint.model === selectedModel.model)246? entry.endpoint247: this._instantiationService.createInstance(AutoChatEndpoint, selectedModel, token.session_token, token.discounted_costs?.[selectedModel.model] || 0, this._calculateDiscountRange(token.discounted_costs));248249const isNewTurn = !entry || lastRoutedPrompt !== entry.lastRoutedPrompt;250this._autoModelCache.set(conversationId, {251endpoint: autoEndpoint,252tokenBank,253lastSessionToken: token.session_token,254lastRoutedPrompt,255routerFallbackReason,256turnCount: (entry?.turnCount ?? 0) + (isNewTurn ? 1 : 0),257needsReEval: false,258});259return autoEndpoint;260}261262private _acquireTokenBank(entry: AutoModelCacheEntry | undefined, location: ChatLocation | undefined, conversationId: string): AutoModeTokenBank {263if (entry) {264return entry.tokenBank;265}266const loc = location ?? ChatLocation.Panel;267const tokenBank = this._reserveTokens.deleteAndLeak(loc) || new AutoModeTokenBank('reserve', loc, this._capiClientService, this._authService, this._logService, this._expService, this._envService);268this._reserveTokens.set(loc, new AutoModeTokenBank('reserve', loc, this._capiClientService, this._authService, this._logService, this._expService, this._envService));269tokenBank.debugName = conversationId;270return tokenBank;271}272273private async _tryRouterSelection(274chatRequest: ChatRequest | undefined,275conversationId: string,276entry: AutoModelCacheEntry | undefined,277token: AutoModeAPIResponse,278knownEndpoints: IChatEndpoint[],279): Promise<{ selectedModel?: IChatEndpoint; lastRoutedPrompt?: string; fallbackReason?: string; candidateModel?: string }> {280const prompt = chatRequest?.prompt?.trim();281const lastRoutedPrompt = entry?.lastRoutedPrompt ?? prompt;282283if (!this._isRouterEnabled(chatRequest) || conversationId === 'unknown') {284return { lastRoutedPrompt };285}286287if (!prompt?.length) {288return { lastRoutedPrompt, fallbackReason: 'emptyPrompt' };289}290291// Prompt hasn't changed since last decision — skip router but allow endpoint refresh292if (entry && entry.lastRoutedPrompt === prompt) {293return { lastRoutedPrompt };294}295296try {297const contextSignals: RoutingContextSignals = {298session_id: conversationId !== 'unknown' ? conversationId : undefined,299reference_count: chatRequest?.references?.length,300prompt_char_count: prompt.length,301previous_model: entry?.endpoint?.model,302turn_number: (entry?.turnCount ?? 0) + 1,303};304const routingMethod = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.AutoModeRoutingMethod, this._expService) || undefined;305306// Filter available_models to only those the client can actually serve.307// The AutoModels API and Models API are separate CAPI calls that can be308// out of sync (e.g. a new model appears in available_models before the309// Models API returns it). Sending unresolvable models to the router310// causes it to recommend models the client must silently discard.311const knownModelIds = new Set(knownEndpoints.map(e => e.model));312const routableModels: string[] = [];313const droppedModels: string[] = [];314for (const m of token.available_models) {315(knownModelIds.has(m) ? routableModels : droppedModels).push(m);316}317if (!routableModels.length) {318this._logService.warn(`[AutomodeService] No available_models matched knownEndpoints. available_models=[${token.available_models.join(', ')}], knownEndpoints=[${knownEndpoints.map(e => e.model).join(', ')}]`);319return { lastRoutedPrompt: prompt, fallbackReason: 'noMatchingEndpoint' };320}321if (droppedModels.length) {322this._logService.info(`[AutomodeService] Filtered ${droppedModels.length} unresolvable model(s) before routing: [${droppedModels.join(', ')}]`);323}324325const result = await this._routerDecisionFetcher.getRouterDecision(prompt, token.session_token, routableModels, undefined, contextSignals, conversationId, chatRequest?.id, routingMethod, hasImage(chatRequest));326327if (result.fallback) {328this._logService.info(`[AutomodeService] Router signaled fallback: ${result.fallback_reason ?? 'unknown'}, routing_method=${result.routing_method ?? 'n/a'}`);329return { lastRoutedPrompt: prompt, fallbackReason: 'routerFallback' };330}331332if (!result.candidate_models.length) {333return { lastRoutedPrompt: prompt, fallbackReason: 'emptyCandidateList' };334}335336// Trust the router's ranked candidate list directly.337// Same-provider preference is intentionally NOT applied here — the router338// already accounts for available models and re-runs after /compact, so339// overriding its pick with same-provider negates cost-saving decisions.340// Same-provider is still used in _selectDefaultModel (the non-router fallback).341const selectedModel = this._findFirstAvailableModel(result.candidate_models, knownEndpoints);342343if (!selectedModel) {344this._logService.warn(`[AutomodeService] None of the router's candidate_models matched knownEndpoints: [${result.candidate_models.join(', ')}]`);345return { lastRoutedPrompt: prompt, fallbackReason: 'noMatchingEndpoint' };346}347348if (result.sticky_override) {349this._logService.trace(`[AutomodeService] Sticky routing override: confidence=${(result.confidence * 100).toFixed(1)}%, label=${result.predicted_label}, router_model=${result.candidate_models[0]}, actual_model=${selectedModel.model}`);350}351return { selectedModel, lastRoutedPrompt: prompt, candidateModel: result.candidate_models[0] };352} catch (e) {353const isTimeout = isAbortError(e);354let fallbackReason: string;355if (isTimeout) {356fallbackReason = 'routerTimeout';357} else if (e instanceof RouterDecisionError && e.errorCode) {358fallbackReason = e.errorCode;359} else {360fallbackReason = 'routerError';361}362this._logService.error(`Failed to get routed model for conversation ${conversationId} (${fallbackReason}):`, (e as Error).message);363return { lastRoutedPrompt: prompt, fallbackReason };364}365}366367private _selectDefaultModel(currentModelProvider: string | undefined, availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint {368const selectedModel = (currentModelProvider && this._findSameProviderModel(currentModelProvider, availableModels, knownEndpoints))369?? this._findFirstAvailableModel(availableModels, knownEndpoints);370if (!selectedModel) {371const errorMsg = 'Auto mode failed: no available model found in known endpoints.';372this._logService.error(errorMsg);373throw new Error(errorMsg);374}375return selectedModel;376}377378private _isRouterEnabled(chatRequest: ChatRequest | undefined): boolean {379const isPanelChat = !chatRequest?.location || chatRequest?.location === ChatLocation.Panel;380return isPanelChat && this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.UseAutoModeRouting, this._expService);381}382383/**384* Find the first model in available_models that has a known endpoint.385*/386private _findFirstAvailableModel(availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint | undefined {387for (const model of availableModels) {388const endpoint = knownEndpoints.find(e => e.model === model);389if (endpoint) {390return endpoint;391}392}393return undefined;394}395396/**397* Find the first model in available_models whose knownEndpoint has the same modelProvider398* as the current model. Skips any model that doesn't have a known endpoint.399*/400private _findSameProviderModel(currentModelProvider: string, availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint | undefined {401for (const model of availableModels) {402const endpoint = knownEndpoints.find(e => e.model === model);403if (endpoint && endpoint.modelProvider === currentModelProvider) {404return endpoint;405}406}407return undefined;408}409410/**411* If the request contains an image and the selected model doesn't support vision,412* fall back to the first vision-capable model from the available models.413*/414private _applyVisionFallback(chatRequest: ChatRequest | undefined, selectedModel: IChatEndpoint, availableModels: string[], knownEndpoints: IChatEndpoint[]): IChatEndpoint {415if (!hasImage(chatRequest) || selectedModel.supportsVision) {416return selectedModel;417}418const visionModel = availableModels419.map(model => knownEndpoints.find(e => e.model === model))420.find(endpoint => endpoint?.supportsVision);421if (visionModel) {422this._logService.trace(`Selected model '${selectedModel.model}' does not support vision, falling back to '${visionModel.model}'.`);423return visionModel;424}425this._logService.warn(`Request contains an image but no vision-capable model is available.`);426return selectedModel;427}428429private _calculateDiscountRange(discounts: Record<string, number> | undefined): { low: number; high: number } {430if (!discounts) {431return { low: 0, high: 0 };432}433let low = Infinity;434let high = -Infinity;435let hasValues = false;436437for (const value of Object.values(discounts)) {438hasValues = true;439if (value < low) {440low = value;441}442if (value > high) {443high = value;444}445}446return hasValues ? { low, high } : { low: 0, high: 0 };447}448}449450function hasImage(chatRequest: ChatRequest | undefined): boolean {451if (!chatRequest || !chatRequest.references) {452return false;453}454return chatRequest.references.some(ref => {455const value = ref.value;456return typeof value === 'object' &&457value !== null &&458'mimeType' in value &&459typeof value.mimeType === 'string'460&& value.mimeType.startsWith('image/');461});462}463464465