Path: blob/main/extensions/copilot/src/platform/endpoint/node/modelMetadataFetcher.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { RequestMetadata, RequestType } from '@vscode/copilot-api';6import type { LanguageModelChat } from 'vscode';7import { TaskSingler } from '../../../util/common/taskSingler';8import { Emitter, Event } from '../../../util/vs/base/common/event';9import { Disposable } from '../../../util/vs/base/common/lifecycle';10import { generateUuid } from '../../../util/vs/base/common/uuid';11import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';1213import { IAuthenticationService } from '../../authentication/common/authentication';14import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';15import { IEnvService } from '../../env/common/envService';16import { GitHubOutageStatus, IOctoKitService } from '../../github/common/githubService';17import { ILogService } from '../../log/common/logService';18import { getRequest } from '../../networking/common/networking';19import { IRequestLogger } from '../../requestLogger/common/requestLogger';20import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';21import { ChatEndpointFamily, IChatModelInformation, ICompletionModelInformation, IEmbeddingModelInformation, IModelAPIResponse, isChatModelInformation, isCompletionModelInformation, isEmbeddingModelInformation } from '../common/endpointProvider';22import { ModelAliasRegistry } from '../common/modelAliasRegistry';2324export interface IModelMetadataFetcher {2526/**27* Fires whenever we refresh the models from the server.28* Does not always indicate there is a change, just that the data is fresh29*/30onDidModelsRefresh: Event<void>;3132/**33* Gets all the completion models known by the model fetcher endpoint34*/35getAllCompletionModels(forceRefresh: boolean): Promise<ICompletionModelInformation[]>;3637/**38* Gets all the chat models known by the model fetcher endpoint39*/40getAllChatModels(): Promise<IChatModelInformation[]>;4142/**43* Retrieves a chat model by its family name44* @param family The family of the model to fetch45*/46getChatModelFromFamily(family: ChatEndpointFamily): Promise<IChatModelInformation>;4748/**49* Retrieves a chat model by its id50* @param id The id of the chat model you want to get51* @returns The chat model information if found, otherwise undefined52*/53getChatModelFromApiModel(model: LanguageModelChat): Promise<IChatModelInformation | undefined>;5455/**56* Retrieves an embeddings model by its family name57* @param family The family of the model to fetch58*/59getEmbeddingsModel(family: 'text-embedding-3-small'): Promise<IEmbeddingModelInformation>;60}6162/**63* Responsible for interacting with the CAPI Model API64* This is solely owned by the EndpointProvider (and TestEndpointProvider) which uses this service to power server side rollout of models65* All model acquisition should be done through the EndpointProvider66*/67export class ModelMetadataFetcher extends Disposable implements IModelMetadataFetcher {6869private static readonly ALL_MODEL_KEY = 'allModels';7071private _familyMap: Map<string, IModelAPIResponse[]> = new Map();72private _completionsFamilyMap: Map<string, IModelAPIResponse[]> = new Map();73private _copilotBaseModel: IModelAPIResponse | undefined;74private _lastFetchTime: number = 0;75private readonly _taskSingler = new TaskSingler<IModelAPIResponse | undefined | void>();76private _lastFetchError: any;7778private readonly _onDidModelRefresh = new Emitter<void>();79public onDidModelsRefresh = this._onDidModelRefresh.event;8081constructor(82protected readonly _isModelLab: boolean,83@IOctoKitService private readonly _octoKitService: IOctoKitService,84@IRequestLogger private readonly _requestLogger: IRequestLogger,85@IConfigurationService private readonly _configService: IConfigurationService,86@IExperimentationService private readonly _expService: IExperimentationService,87@IEnvService private readonly _envService: IEnvService,88@IAuthenticationService private readonly _authService: IAuthenticationService,89@ILogService private readonly _logService: ILogService,90@IInstantiationService private readonly _instantiationService: IInstantiationService,91) {92super();93this._register(this._authService.onDidAuthenticationChange(() => {94// Auth changed so next fetch should be forced to get a new list95this._familyMap.clear();96this._completionsFamilyMap.clear();97this._lastFetchTime = 0;98}));99}100101public async getAllCompletionModels(forceRefresh: boolean): Promise<ICompletionModelInformation[]> {102await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, () => this._fetchModels(forceRefresh));103const completionModels: ICompletionModelInformation[] = [];104for (const [, models] of this._completionsFamilyMap) {105for (const model of models) {106if (isCompletionModelInformation(model)) {107completionModels.push(model);108}109}110}111return completionModels;112}113114public async getAllChatModels(): Promise<IChatModelInformation[]> {115await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));116const chatModels: IChatModelInformation[] = [];117for (const [, models] of this._familyMap) {118for (const model of models) {119if (isChatModelInformation(model)) {120chatModels.push(model);121}122}123}124return chatModels;125}126127/**128* Hydrates a model API response from the `/models` endpoint with proper exp overrides and error handling129* @param resolvedModel The resolved model to hydrate130* @returns The resolved model with proper exp overrides and token counts131*/132private async _hydrateResolvedModel(resolvedModel: IModelAPIResponse | undefined): Promise<IModelAPIResponse> {133if (!resolvedModel) {134throw this._lastFetchError ?? new Error(await this._getErrorMessage('Unable to resolve model'));135}136137// If it's a chat model, update max prompt tokens based on settings + exp138if (isChatModelInformation(resolvedModel) && (resolvedModel.capabilities.limits)) {139resolvedModel.capabilities.limits.max_prompt_tokens = this._getMaxPromptTokensOverride(resolvedModel);140// Also ensure prompt tokens + output tokens <= context window. Output tokens is capped to max 15% input tokens141const outputTokens = Math.floor(Math.min(resolvedModel.capabilities.limits.max_output_tokens ?? 4096, resolvedModel.capabilities.limits.max_prompt_tokens * 0.15));142const contextWindow = resolvedModel.capabilities.limits.max_context_window_tokens ?? (outputTokens + resolvedModel.capabilities.limits.max_prompt_tokens);143resolvedModel.capabilities.limits.max_prompt_tokens = Math.min(resolvedModel.capabilities.limits.max_prompt_tokens, contextWindow - outputTokens);144}145146// If it's a chat model, update showInModelPicker based on experiment overrides147if (isChatModelInformation(resolvedModel)) {148resolvedModel.model_picker_enabled = this._getShowInModelPickerOverride(resolvedModel);149}150151if (resolvedModel.preview && !resolvedModel.name.endsWith('(Preview)')) {152// If the model is a preview model, we append (Preview) to the name153resolvedModel.name = `${resolvedModel.name} (Preview)`;154}155return resolvedModel;156}157158public async getChatModelFromFamily(family: ChatEndpointFamily): Promise<IChatModelInformation> {159await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));160let resolvedModel: IModelAPIResponse | undefined;161family = ModelAliasRegistry.resolveAlias(family) as ChatEndpointFamily;162163if (family === 'copilot-base') {164resolvedModel = this._copilotBaseModel;165} else {166resolvedModel = this._familyMap.get(family)?.[0];167}168if (!resolvedModel || !isChatModelInformation(resolvedModel)) {169throw new Error(await this._getErrorMessage(`Unable to resolve chat model with family selection: ${family}`));170}171return resolvedModel;172}173174public async getChatModelFromApiModel(apiModel: LanguageModelChat): Promise<IChatModelInformation | undefined> {175await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));176let resolvedModel: IModelAPIResponse | undefined;177for (const models of this._familyMap.values()) {178resolvedModel = models.find(model =>179model.id === apiModel.id &&180model.version === apiModel.version &&181model.capabilities.family === apiModel.family);182if (resolvedModel) {183break;184}185}186if (!resolvedModel) {187return;188}189if (!isChatModelInformation(resolvedModel)) {190throw new Error(await this._getErrorMessage(`Unable to resolve chat model: ${apiModel.id},${apiModel.name},${apiModel.version},${apiModel.family}`));191}192return resolvedModel;193}194195public async getEmbeddingsModel(family: 'text-embedding-3-small'): Promise<IEmbeddingModelInformation> {196await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));197const resolvedModel = this._familyMap.get(family)?.[0];198if (!resolvedModel || !isEmbeddingModelInformation(resolvedModel)) {199throw new Error(await this._getErrorMessage(`Unable to resolve embeddings model with family selection: ${family}`));200}201return resolvedModel;202}203204private _shouldRefreshModels(): boolean {205if (this._familyMap.size === 0) {206// Always refresh if we have no models as this means the last fetch failed in some way207return true;208}209const tenMinutes = 10 * 60 * 1000; // 10 minutes in milliseconds210const now = Date.now();211212if (!this._lastFetchTime) {213return true; // If there's no last fetch time, we should refresh214}215216// Only fetch if the current session is active.217// This avoids unnecessary network calls when VS Code is in the background.218if (!this._envService.isActive) {219return false;220}221222const timeSinceLastFetch = now - this._lastFetchTime;223224return timeSinceLastFetch > tenMinutes;225}226227private async _fetchModels(force?: boolean): Promise<void> {228if (!force && !this._shouldRefreshModels()) {229return;230}231const requestStartTime = Date.now();232233const copilotToken = (await this._authService.getCopilotToken()).token;234const requestId = generateUuid();235const requestMetadata: RequestMetadata = { type: RequestType.Models, isModelLab: this._isModelLab };236237try {238const response = await this._instantiationService.invokeFunction(getRequest, {239endpointOrUrl: requestMetadata,240secretKey: copilotToken,241intent: 'model-access',242requestId,243});244245this._lastFetchTime = Date.now();246this._logService.info(`Fetched model metadata in ${Date.now() - requestStartTime}ms ${requestId}`);247248if (response.status < 200 || response.status >= 300) {249// If we're rate limited and have models, we should just return250if (response.status === 429 && this._familyMap.size > 0) {251this._logService.warn(`Rate limited while fetching models ${requestId}`);252return;253}254throw new Error(await this._getErrorMessage(`Failed to fetch models (${requestId}): ${(await response.text()) || response.statusText || `HTTP ${response.status}`}`));255}256257this._familyMap.clear();258259const data: IModelAPIResponse[] = (await response.json()).data;260this._requestLogger.logModelListCall(requestId, requestMetadata, data);261for (let model of data) {262model = await this._hydrateResolvedModel(model);263const isCompletionModel = isCompletionModelInformation(model);264// The base model is whatever model is deemed "fallback" by the server265if (model.is_chat_fallback && !isCompletionModel) {266this._copilotBaseModel = model;267}268const family = model.capabilities.family;269const familyMap = isCompletionModel ? this._completionsFamilyMap : this._familyMap;270if (!familyMap.has(family)) {271familyMap.set(family, []);272}273familyMap.get(family)?.push(model);274}275this._lastFetchError = undefined;276this._onDidModelRefresh.fire();277} catch (e) {278this._logService.error(e, `Failed to fetch models (${requestId})`);279this._lastFetchError = e;280this._lastFetchTime = 0;281}282}283284// get ChatMaxNumTokens from config for experimentation285private _getMaxPromptTokensOverride(chatModelInfo: IChatModelInformation): number {286// check debug override ChatMaxTokenNum287const chatMaxTokenNumOverride = this._configService.getConfig(ConfigKey.TeamInternal.DebugOverrideChatMaxTokenNum); // can only be set by internal users288// Base 3 tokens for each OpenAI completion289let modelLimit = -3;290// if option is set, takes precedence over any other logic291if (chatMaxTokenNumOverride > 0) {292modelLimit += chatMaxTokenNumOverride;293return modelLimit;294}295296let experimentalOverrides: Record<string, number> = {};297try {298const expValue = this._expService.getTreatmentVariable<string>('copilotchat.contextWindows');299experimentalOverrides = JSON.parse(expValue ?? '{}');300} catch {301// If the experiment service either is not available or returns a bad value we ignore the overrides302}303304// If there's an experiment that takes precedence over what comes back from CAPI305if (experimentalOverrides[chatModelInfo.id]) {306modelLimit += experimentalOverrides[chatModelInfo.id];307return modelLimit;308}309310// Check if CAPI has prompt token limits and return those311if (chatModelInfo.capabilities?.limits?.max_prompt_tokens) {312modelLimit += chatModelInfo.capabilities.limits.max_prompt_tokens;313return modelLimit;314} else if (chatModelInfo.capabilities.limits?.max_context_window_tokens) {315// Otherwise return the context window as the prompt tokens for cases where CAPI doesn't configure the prompt tokens316modelLimit += chatModelInfo.capabilities.limits.max_context_window_tokens;317return modelLimit;318}319320return modelLimit;321}322323private async _getErrorMessage(fallback: string): Promise<string> {324try {325const status = await this._octoKitService.getGitHubOutageStatus();326if (status !== GitHubOutageStatus.None) {327return 'Error fetching models! It appears that GitHub is experiencing an outage. Please check the [GitHub Status Page](https://githubstatus.com) for more info';328}329} catch {330// Don't let status check failures block the original error331}332return fallback;333}334335private _getShowInModelPickerOverride(resolvedModel: IModelAPIResponse): boolean {336let modelPickerOverrides: Record<string, boolean> = {};337const expResult = this._expService.getTreatmentVariable<string>('copilotchat.showInModelPicker');338try {339modelPickerOverrides = JSON.parse(expResult || '{}');340} catch {341// No-op if parsing experiment fails342}343344return modelPickerOverrides[resolvedModel.id] ?? resolvedModel.model_picker_enabled;345}346}347348//#endregion349350351