CoCalc -- modelMetadataFetcher.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/modelMetadataFetcher.ts
¹³⁴⁰¹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { RequestMetadata, RequestType } from '@vscode/copilot-api';
7
import type { LanguageModelChat } from 'vscode';
8
import { TaskSingler } from '../../../util/common/taskSingler';
9
import { Emitter, Event } from '../../../util/vs/base/common/event';
10
import { Disposable } from '../../../util/vs/base/common/lifecycle';
11
import { generateUuid } from '../../../util/vs/base/common/uuid';
12
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
13

14
import { IAuthenticationService } from '../../authentication/common/authentication';
15
import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';
16
import { IEnvService } from '../../env/common/envService';
17
import { GitHubOutageStatus, IOctoKitService } from '../../github/common/githubService';
18
import { ILogService } from '../../log/common/logService';
19
import { getRequest } from '../../networking/common/networking';
20
import { IRequestLogger } from '../../requestLogger/common/requestLogger';
21
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
22
import { ChatEndpointFamily, IChatModelInformation, ICompletionModelInformation, IEmbeddingModelInformation, IModelAPIResponse, isChatModelInformation, isCompletionModelInformation, isEmbeddingModelInformation } from '../common/endpointProvider';
23
import { ModelAliasRegistry } from '../common/modelAliasRegistry';
24

25
export interface IModelMetadataFetcher {
26

27
	/**
28
	 * Fires whenever we refresh the models from the server.
29
	 * Does not always indicate there is a change, just that the data is fresh
30
	 */
31
	onDidModelsRefresh: Event<void>;
32

33
	/**
34
	 * Gets all the completion models known by the model fetcher endpoint
35
	 */
36
	getAllCompletionModels(forceRefresh: boolean): Promise<ICompletionModelInformation[]>;
37

38
	/**
39
	 * Gets all the chat models known by the model fetcher endpoint
40
	 */
41
	getAllChatModels(): Promise<IChatModelInformation[]>;
42

43
	/**
44
	 * Retrieves a chat model by its family name
45
	 * @param family The family of the model to fetch
46
	 */
47
	getChatModelFromFamily(family: ChatEndpointFamily): Promise<IChatModelInformation>;
48

49
	/**
50
	 * Retrieves a chat model by its id
51
	 * @param id The id of the chat model you want to get
52
	 * @returns The chat model information if found, otherwise undefined
53
	 */
54
	getChatModelFromApiModel(model: LanguageModelChat): Promise<IChatModelInformation | undefined>;
55

56
	/**
57
	 * Retrieves an embeddings model by its family name
58
	 * @param family The family of the model to fetch
59
	 */
60
	getEmbeddingsModel(family: 'text-embedding-3-small'): Promise<IEmbeddingModelInformation>;
61
}
62

63
/**
64
 * Responsible for interacting with the CAPI Model API
65
 * This is solely owned by the EndpointProvider (and TestEndpointProvider) which uses this service to power server side rollout of models
66
 * All model acquisition should be done through the EndpointProvider
67
 */
68
export class ModelMetadataFetcher extends Disposable implements IModelMetadataFetcher {
69

70
	private static readonly ALL_MODEL_KEY = 'allModels';
71

72
	private _familyMap: Map<string, IModelAPIResponse[]> = new Map();
73
	private _completionsFamilyMap: Map<string, IModelAPIResponse[]> = new Map();
74
	private _copilotBaseModel: IModelAPIResponse | undefined;
75
	private _lastFetchTime: number = 0;
76
	private readonly _taskSingler = new TaskSingler<IModelAPIResponse | undefined | void>();
77
	private _lastFetchError: any;
78

79
	private readonly _onDidModelRefresh = new Emitter<void>();
80
	public onDidModelsRefresh = this._onDidModelRefresh.event;
81

82
	constructor(
83
		protected readonly _isModelLab: boolean,
84
		@IOctoKitService private readonly _octoKitService: IOctoKitService,
85
		@IRequestLogger private readonly _requestLogger: IRequestLogger,
86
		@IConfigurationService private readonly _configService: IConfigurationService,
87
		@IExperimentationService private readonly _expService: IExperimentationService,
88
		@IEnvService private readonly _envService: IEnvService,
89
		@IAuthenticationService private readonly _authService: IAuthenticationService,
90
		@ILogService private readonly _logService: ILogService,
91
		@IInstantiationService private readonly _instantiationService: IInstantiationService,
92
	) {
93
		super();
94
		this._register(this._authService.onDidAuthenticationChange(() => {
95
			// Auth changed so next fetch should be forced to get a new list
96
			this._familyMap.clear();
97
			this._completionsFamilyMap.clear();
98
			this._lastFetchTime = 0;
99
		}));
100
	}
101

102
	public async getAllCompletionModels(forceRefresh: boolean): Promise<ICompletionModelInformation[]> {
103
		await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, () => this._fetchModels(forceRefresh));
104
		const completionModels: ICompletionModelInformation[] = [];
105
		for (const [, models] of this._completionsFamilyMap) {
106
			for (const model of models) {
107
				if (isCompletionModelInformation(model)) {
108
					completionModels.push(model);
109
				}
110
			}
111
		}
112
		return completionModels;
113
	}
114

115
	public async getAllChatModels(): Promise<IChatModelInformation[]> {
116
		await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));
117
		const chatModels: IChatModelInformation[] = [];
118
		for (const [, models] of this._familyMap) {
119
			for (const model of models) {
120
				if (isChatModelInformation(model)) {
121
					chatModels.push(model);
122
				}
123
			}
124
		}
125
		return chatModels;
126
	}
127

128
	/**
129
	 * Hydrates a model API response from the `/models` endpoint with proper exp overrides and error handling
130
	 * @param resolvedModel The resolved model to hydrate
131
	 * @returns The resolved model with proper exp overrides and token counts
132
	 */
133
	private async _hydrateResolvedModel(resolvedModel: IModelAPIResponse | undefined): Promise<IModelAPIResponse> {
134
		if (!resolvedModel) {
135
			throw this._lastFetchError ?? new Error(await this._getErrorMessage('Unable to resolve model'));
136
		}
137

138
		// If it's a chat model, update max prompt tokens based on settings + exp
139
		if (isChatModelInformation(resolvedModel) && (resolvedModel.capabilities.limits)) {
140
			resolvedModel.capabilities.limits.max_prompt_tokens = this._getMaxPromptTokensOverride(resolvedModel);
141
			// Also ensure prompt tokens + output tokens <= context window. Output tokens is capped to max 15% input tokens
142
			const outputTokens = Math.floor(Math.min(resolvedModel.capabilities.limits.max_output_tokens ?? 4096, resolvedModel.capabilities.limits.max_prompt_tokens * 0.15));
143
			const contextWindow = resolvedModel.capabilities.limits.max_context_window_tokens ?? (outputTokens + resolvedModel.capabilities.limits.max_prompt_tokens);
144
			resolvedModel.capabilities.limits.max_prompt_tokens = Math.min(resolvedModel.capabilities.limits.max_prompt_tokens, contextWindow - outputTokens);
145
		}
146

147
		// If it's a chat model, update showInModelPicker based on experiment overrides
148
		if (isChatModelInformation(resolvedModel)) {
149
			resolvedModel.model_picker_enabled = this._getShowInModelPickerOverride(resolvedModel);
150
		}
151

152
		if (resolvedModel.preview && !resolvedModel.name.endsWith('(Preview)')) {
153
			// If the model is a preview model, we append (Preview) to the name
154
			resolvedModel.name = `${resolvedModel.name} (Preview)`;
155
		}
156
		return resolvedModel;
157
	}
158

159
	public async getChatModelFromFamily(family: ChatEndpointFamily): Promise<IChatModelInformation> {
160
		await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));
161
		let resolvedModel: IModelAPIResponse | undefined;
162
		family = ModelAliasRegistry.resolveAlias(family) as ChatEndpointFamily;
163

164
		if (family === 'copilot-base') {
165
			resolvedModel = this._copilotBaseModel;
166
		} else {
167
			resolvedModel = this._familyMap.get(family)?.[0];
168
		}
169
		if (!resolvedModel || !isChatModelInformation(resolvedModel)) {
170
			throw new Error(await this._getErrorMessage(`Unable to resolve chat model with family selection: ${family}`));
171
		}
172
		return resolvedModel;
173
	}
174

175
	public async getChatModelFromApiModel(apiModel: LanguageModelChat): Promise<IChatModelInformation | undefined> {
176
		await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));
177
		let resolvedModel: IModelAPIResponse | undefined;
178
		for (const models of this._familyMap.values()) {
179
			resolvedModel = models.find(model =>
180
				model.id === apiModel.id &&
181
				model.version === apiModel.version &&
182
				model.capabilities.family === apiModel.family);
183
			if (resolvedModel) {
184
				break;
185
			}
186
		}
187
		if (!resolvedModel) {
188
			return;
189
		}
190
		if (!isChatModelInformation(resolvedModel)) {
191
			throw new Error(await this._getErrorMessage(`Unable to resolve chat model: ${apiModel.id},${apiModel.name},${apiModel.version},${apiModel.family}`));
192
		}
193
		return resolvedModel;
194
	}
195

196
	public async getEmbeddingsModel(family: 'text-embedding-3-small'): Promise<IEmbeddingModelInformation> {
197
		await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));
198
		const resolvedModel = this._familyMap.get(family)?.[0];
199
		if (!resolvedModel || !isEmbeddingModelInformation(resolvedModel)) {
200
			throw new Error(await this._getErrorMessage(`Unable to resolve embeddings model with family selection: ${family}`));
201
		}
202
		return resolvedModel;
203
	}
204

205
	private _shouldRefreshModels(): boolean {
206
		if (this._familyMap.size === 0) {
207
			// Always refresh if we have no models as this means the last fetch failed in some way
208
			return true;
209
		}
210
		const tenMinutes = 10 * 60 * 1000; // 10 minutes in milliseconds
211
		const now = Date.now();
212

213
		if (!this._lastFetchTime) {
214
			return true; // If there's no last fetch time, we should refresh
215
		}
216

217
		// Only fetch if the current session is active.
218
		// This avoids unnecessary network calls when VS Code is in the background.
219
		if (!this._envService.isActive) {
220
			return false;
221
		}
222

223
		const timeSinceLastFetch = now - this._lastFetchTime;
224

225
		return timeSinceLastFetch > tenMinutes;
226
	}
227

228
	private async _fetchModels(force?: boolean): Promise<void> {
229
		if (!force && !this._shouldRefreshModels()) {
230
			return;
231
		}
232
		const requestStartTime = Date.now();
233

234
		const copilotToken = (await this._authService.getCopilotToken()).token;
235
		const requestId = generateUuid();
236
		const requestMetadata: RequestMetadata = { type: RequestType.Models, isModelLab: this._isModelLab };
237

238
		try {
239
			const response = await this._instantiationService.invokeFunction(getRequest, {
240
				endpointOrUrl: requestMetadata,
241
				secretKey: copilotToken,
242
				intent: 'model-access',
243
				requestId,
244
			});
245

246
			this._lastFetchTime = Date.now();
247
			this._logService.info(`Fetched model metadata in ${Date.now() - requestStartTime}ms ${requestId}`);
248

249
			if (response.status < 200 || response.status >= 300) {
250
				// If we're rate limited and have models, we should just return
251
				if (response.status === 429 && this._familyMap.size > 0) {
252
					this._logService.warn(`Rate limited while fetching models ${requestId}`);
253
					return;
254
				}
255
				throw new Error(await this._getErrorMessage(`Failed to fetch models (${requestId}): ${(await response.text()) || response.statusText || `HTTP ${response.status}`}`));
256
			}
257

258
			this._familyMap.clear();
259

260
			const data: IModelAPIResponse[] = (await response.json()).data;
261
			this._requestLogger.logModelListCall(requestId, requestMetadata, data);
262
			for (let model of data) {
263
				model = await this._hydrateResolvedModel(model);
264
				const isCompletionModel = isCompletionModelInformation(model);
265
				// The base model is whatever model is deemed "fallback" by the server
266
				if (model.is_chat_fallback && !isCompletionModel) {
267
					this._copilotBaseModel = model;
268
				}
269
				const family = model.capabilities.family;
270
				const familyMap = isCompletionModel ? this._completionsFamilyMap : this._familyMap;
271
				if (!familyMap.has(family)) {
272
					familyMap.set(family, []);
273
				}
274
				familyMap.get(family)?.push(model);
275
			}
276
			this._lastFetchError = undefined;
277
			this._onDidModelRefresh.fire();
278
		} catch (e) {
279
			this._logService.error(e, `Failed to fetch models (${requestId})`);
280
			this._lastFetchError = e;
281
			this._lastFetchTime = 0;
282
		}
283
	}
284

285
	// get ChatMaxNumTokens from config for experimentation
286
	private _getMaxPromptTokensOverride(chatModelInfo: IChatModelInformation): number {
287
		// check debug override ChatMaxTokenNum
288
		const chatMaxTokenNumOverride = this._configService.getConfig(ConfigKey.TeamInternal.DebugOverrideChatMaxTokenNum); // can only be set by internal users
289
		// Base 3 tokens for each OpenAI completion
290
		let modelLimit = -3;
291
		// if option is set, takes precedence over any other logic
292
		if (chatMaxTokenNumOverride > 0) {
293
			modelLimit += chatMaxTokenNumOverride;
294
			return modelLimit;
295
		}
296

297
		let experimentalOverrides: Record<string, number> = {};
298
		try {
299
			const expValue = this._expService.getTreatmentVariable<string>('copilotchat.contextWindows');
300
			experimentalOverrides = JSON.parse(expValue ?? '{}');
301
		} catch {
302
			// If the experiment service either is not available or returns a bad value we ignore the overrides
303
		}
304

305
		// If there's an experiment that takes precedence over what comes back from CAPI
306
		if (experimentalOverrides[chatModelInfo.id]) {
307
			modelLimit += experimentalOverrides[chatModelInfo.id];
308
			return modelLimit;
309
		}
310

311
		// Check if CAPI has prompt token limits and return those
312
		if (chatModelInfo.capabilities?.limits?.max_prompt_tokens) {
313
			modelLimit += chatModelInfo.capabilities.limits.max_prompt_tokens;
314
			return modelLimit;
315
		} else if (chatModelInfo.capabilities.limits?.max_context_window_tokens) {
316
			// Otherwise return the context window as the prompt tokens for cases where CAPI doesn't configure the prompt tokens
317
			modelLimit += chatModelInfo.capabilities.limits.max_context_window_tokens;
318
			return modelLimit;
319
		}
320

321
		return modelLimit;
322
	}
323

324
	private async _getErrorMessage(fallback: string): Promise<string> {
325
		try {
326
			const status = await this._octoKitService.getGitHubOutageStatus();
327
			if (status !== GitHubOutageStatus.None) {
328
				return 'Error fetching models! It appears that GitHub is experiencing an outage. Please check the [GitHub Status Page](https://githubstatus.com) for more info';
329
			}
330
		} catch {
331
			// Don't let status check failures block the original error
332
		}
333
		return fallback;
334
	}
335

336
	private _getShowInModelPickerOverride(resolvedModel: IModelAPIResponse): boolean {
337
		let modelPickerOverrides: Record<string, boolean> = {};
338
		const expResult = this._expService.getTreatmentVariable<string>('copilotchat.showInModelPicker');
339
		try {
340
			modelPickerOverrides = JSON.parse(expResult || '{}');
341
		} catch {
342
			// No-op if parsing experiment fails
343
		}
344

345
		return modelPickerOverrides[resolvedModel.id] ?? resolvedModel.model_picker_enabled;
346
	}
347
}
348

349
//#endregion
350

351
Product

Resources

Company