Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/modelMetadataFetcher.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { RequestMetadata, RequestType } from '@vscode/copilot-api';
7
import type { LanguageModelChat } from 'vscode';
8
import { TaskSingler } from '../../../util/common/taskSingler';
9
import { Emitter, Event } from '../../../util/vs/base/common/event';
10
import { Disposable } from '../../../util/vs/base/common/lifecycle';
11
import { generateUuid } from '../../../util/vs/base/common/uuid';
12
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
13
14
import { IAuthenticationService } from '../../authentication/common/authentication';
15
import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';
16
import { IEnvService } from '../../env/common/envService';
17
import { GitHubOutageStatus, IOctoKitService } from '../../github/common/githubService';
18
import { ILogService } from '../../log/common/logService';
19
import { getRequest } from '../../networking/common/networking';
20
import { IRequestLogger } from '../../requestLogger/common/requestLogger';
21
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
22
import { ChatEndpointFamily, IChatModelInformation, ICompletionModelInformation, IEmbeddingModelInformation, IModelAPIResponse, isChatModelInformation, isCompletionModelInformation, isEmbeddingModelInformation } from '../common/endpointProvider';
23
import { ModelAliasRegistry } from '../common/modelAliasRegistry';
24
25
export interface IModelMetadataFetcher {
26
27
/**
28
* Fires whenever we refresh the models from the server.
29
* Does not always indicate there is a change, just that the data is fresh
30
*/
31
onDidModelsRefresh: Event<void>;
32
33
/**
34
* Gets all the completion models known by the model fetcher endpoint
35
*/
36
getAllCompletionModels(forceRefresh: boolean): Promise<ICompletionModelInformation[]>;
37
38
/**
39
* Gets all the chat models known by the model fetcher endpoint
40
*/
41
getAllChatModels(): Promise<IChatModelInformation[]>;
42
43
/**
44
* Retrieves a chat model by its family name
45
* @param family The family of the model to fetch
46
*/
47
getChatModelFromFamily(family: ChatEndpointFamily): Promise<IChatModelInformation>;
48
49
/**
50
* Retrieves a chat model by its id
51
* @param id The id of the chat model you want to get
52
* @returns The chat model information if found, otherwise undefined
53
*/
54
getChatModelFromApiModel(model: LanguageModelChat): Promise<IChatModelInformation | undefined>;
55
56
/**
57
* Retrieves an embeddings model by its family name
58
* @param family The family of the model to fetch
59
*/
60
getEmbeddingsModel(family: 'text-embedding-3-small'): Promise<IEmbeddingModelInformation>;
61
}
62
63
/**
64
* Responsible for interacting with the CAPI Model API
65
* This is solely owned by the EndpointProvider (and TestEndpointProvider) which uses this service to power server side rollout of models
66
* All model acquisition should be done through the EndpointProvider
67
*/
68
export class ModelMetadataFetcher extends Disposable implements IModelMetadataFetcher {
69
70
private static readonly ALL_MODEL_KEY = 'allModels';
71
72
private _familyMap: Map<string, IModelAPIResponse[]> = new Map();
73
private _completionsFamilyMap: Map<string, IModelAPIResponse[]> = new Map();
74
private _copilotBaseModel: IModelAPIResponse | undefined;
75
private _lastFetchTime: number = 0;
76
private readonly _taskSingler = new TaskSingler<IModelAPIResponse | undefined | void>();
77
private _lastFetchError: any;
78
79
private readonly _onDidModelRefresh = new Emitter<void>();
80
public onDidModelsRefresh = this._onDidModelRefresh.event;
81
82
constructor(
83
protected readonly _isModelLab: boolean,
84
@IOctoKitService private readonly _octoKitService: IOctoKitService,
85
@IRequestLogger private readonly _requestLogger: IRequestLogger,
86
@IConfigurationService private readonly _configService: IConfigurationService,
87
@IExperimentationService private readonly _expService: IExperimentationService,
88
@IEnvService private readonly _envService: IEnvService,
89
@IAuthenticationService private readonly _authService: IAuthenticationService,
90
@ILogService private readonly _logService: ILogService,
91
@IInstantiationService private readonly _instantiationService: IInstantiationService,
92
) {
93
super();
94
this._register(this._authService.onDidAuthenticationChange(() => {
95
// Auth changed so next fetch should be forced to get a new list
96
this._familyMap.clear();
97
this._completionsFamilyMap.clear();
98
this._lastFetchTime = 0;
99
}));
100
}
101
102
public async getAllCompletionModels(forceRefresh: boolean): Promise<ICompletionModelInformation[]> {
103
await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, () => this._fetchModels(forceRefresh));
104
const completionModels: ICompletionModelInformation[] = [];
105
for (const [, models] of this._completionsFamilyMap) {
106
for (const model of models) {
107
if (isCompletionModelInformation(model)) {
108
completionModels.push(model);
109
}
110
}
111
}
112
return completionModels;
113
}
114
115
public async getAllChatModels(): Promise<IChatModelInformation[]> {
116
await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));
117
const chatModels: IChatModelInformation[] = [];
118
for (const [, models] of this._familyMap) {
119
for (const model of models) {
120
if (isChatModelInformation(model)) {
121
chatModels.push(model);
122
}
123
}
124
}
125
return chatModels;
126
}
127
128
/**
129
* Hydrates a model API response from the `/models` endpoint with proper exp overrides and error handling
130
* @param resolvedModel The resolved model to hydrate
131
* @returns The resolved model with proper exp overrides and token counts
132
*/
133
private async _hydrateResolvedModel(resolvedModel: IModelAPIResponse | undefined): Promise<IModelAPIResponse> {
134
if (!resolvedModel) {
135
throw this._lastFetchError ?? new Error(await this._getErrorMessage('Unable to resolve model'));
136
}
137
138
// If it's a chat model, update max prompt tokens based on settings + exp
139
if (isChatModelInformation(resolvedModel) && (resolvedModel.capabilities.limits)) {
140
resolvedModel.capabilities.limits.max_prompt_tokens = this._getMaxPromptTokensOverride(resolvedModel);
141
// Also ensure prompt tokens + output tokens <= context window. Output tokens is capped to max 15% input tokens
142
const outputTokens = Math.floor(Math.min(resolvedModel.capabilities.limits.max_output_tokens ?? 4096, resolvedModel.capabilities.limits.max_prompt_tokens * 0.15));
143
const contextWindow = resolvedModel.capabilities.limits.max_context_window_tokens ?? (outputTokens + resolvedModel.capabilities.limits.max_prompt_tokens);
144
resolvedModel.capabilities.limits.max_prompt_tokens = Math.min(resolvedModel.capabilities.limits.max_prompt_tokens, contextWindow - outputTokens);
145
}
146
147
// If it's a chat model, update showInModelPicker based on experiment overrides
148
if (isChatModelInformation(resolvedModel)) {
149
resolvedModel.model_picker_enabled = this._getShowInModelPickerOverride(resolvedModel);
150
}
151
152
if (resolvedModel.preview && !resolvedModel.name.endsWith('(Preview)')) {
153
// If the model is a preview model, we append (Preview) to the name
154
resolvedModel.name = `${resolvedModel.name} (Preview)`;
155
}
156
return resolvedModel;
157
}
158
159
public async getChatModelFromFamily(family: ChatEndpointFamily): Promise<IChatModelInformation> {
160
await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));
161
let resolvedModel: IModelAPIResponse | undefined;
162
family = ModelAliasRegistry.resolveAlias(family) as ChatEndpointFamily;
163
164
if (family === 'copilot-base') {
165
resolvedModel = this._copilotBaseModel;
166
} else {
167
resolvedModel = this._familyMap.get(family)?.[0];
168
}
169
if (!resolvedModel || !isChatModelInformation(resolvedModel)) {
170
throw new Error(await this._getErrorMessage(`Unable to resolve chat model with family selection: ${family}`));
171
}
172
return resolvedModel;
173
}
174
175
public async getChatModelFromApiModel(apiModel: LanguageModelChat): Promise<IChatModelInformation | undefined> {
176
await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));
177
let resolvedModel: IModelAPIResponse | undefined;
178
for (const models of this._familyMap.values()) {
179
resolvedModel = models.find(model =>
180
model.id === apiModel.id &&
181
model.version === apiModel.version &&
182
model.capabilities.family === apiModel.family);
183
if (resolvedModel) {
184
break;
185
}
186
}
187
if (!resolvedModel) {
188
return;
189
}
190
if (!isChatModelInformation(resolvedModel)) {
191
throw new Error(await this._getErrorMessage(`Unable to resolve chat model: ${apiModel.id},${apiModel.name},${apiModel.version},${apiModel.family}`));
192
}
193
return resolvedModel;
194
}
195
196
public async getEmbeddingsModel(family: 'text-embedding-3-small'): Promise<IEmbeddingModelInformation> {
197
await this._taskSingler.getOrCreate(ModelMetadataFetcher.ALL_MODEL_KEY, this._fetchModels.bind(this));
198
const resolvedModel = this._familyMap.get(family)?.[0];
199
if (!resolvedModel || !isEmbeddingModelInformation(resolvedModel)) {
200
throw new Error(await this._getErrorMessage(`Unable to resolve embeddings model with family selection: ${family}`));
201
}
202
return resolvedModel;
203
}
204
205
private _shouldRefreshModels(): boolean {
206
if (this._familyMap.size === 0) {
207
// Always refresh if we have no models as this means the last fetch failed in some way
208
return true;
209
}
210
const tenMinutes = 10 * 60 * 1000; // 10 minutes in milliseconds
211
const now = Date.now();
212
213
if (!this._lastFetchTime) {
214
return true; // If there's no last fetch time, we should refresh
215
}
216
217
// Only fetch if the current session is active.
218
// This avoids unnecessary network calls when VS Code is in the background.
219
if (!this._envService.isActive) {
220
return false;
221
}
222
223
const timeSinceLastFetch = now - this._lastFetchTime;
224
225
return timeSinceLastFetch > tenMinutes;
226
}
227
228
private async _fetchModels(force?: boolean): Promise<void> {
229
if (!force && !this._shouldRefreshModels()) {
230
return;
231
}
232
const requestStartTime = Date.now();
233
234
const copilotToken = (await this._authService.getCopilotToken()).token;
235
const requestId = generateUuid();
236
const requestMetadata: RequestMetadata = { type: RequestType.Models, isModelLab: this._isModelLab };
237
238
try {
239
const response = await this._instantiationService.invokeFunction(getRequest, {
240
endpointOrUrl: requestMetadata,
241
secretKey: copilotToken,
242
intent: 'model-access',
243
requestId,
244
});
245
246
this._lastFetchTime = Date.now();
247
this._logService.info(`Fetched model metadata in ${Date.now() - requestStartTime}ms ${requestId}`);
248
249
if (response.status < 200 || response.status >= 300) {
250
// If we're rate limited and have models, we should just return
251
if (response.status === 429 && this._familyMap.size > 0) {
252
this._logService.warn(`Rate limited while fetching models ${requestId}`);
253
return;
254
}
255
throw new Error(await this._getErrorMessage(`Failed to fetch models (${requestId}): ${(await response.text()) || response.statusText || `HTTP ${response.status}`}`));
256
}
257
258
this._familyMap.clear();
259
260
const data: IModelAPIResponse[] = (await response.json()).data;
261
this._requestLogger.logModelListCall(requestId, requestMetadata, data);
262
for (let model of data) {
263
model = await this._hydrateResolvedModel(model);
264
const isCompletionModel = isCompletionModelInformation(model);
265
// The base model is whatever model is deemed "fallback" by the server
266
if (model.is_chat_fallback && !isCompletionModel) {
267
this._copilotBaseModel = model;
268
}
269
const family = model.capabilities.family;
270
const familyMap = isCompletionModel ? this._completionsFamilyMap : this._familyMap;
271
if (!familyMap.has(family)) {
272
familyMap.set(family, []);
273
}
274
familyMap.get(family)?.push(model);
275
}
276
this._lastFetchError = undefined;
277
this._onDidModelRefresh.fire();
278
} catch (e) {
279
this._logService.error(e, `Failed to fetch models (${requestId})`);
280
this._lastFetchError = e;
281
this._lastFetchTime = 0;
282
}
283
}
284
285
// get ChatMaxNumTokens from config for experimentation
286
private _getMaxPromptTokensOverride(chatModelInfo: IChatModelInformation): number {
287
// check debug override ChatMaxTokenNum
288
const chatMaxTokenNumOverride = this._configService.getConfig(ConfigKey.TeamInternal.DebugOverrideChatMaxTokenNum); // can only be set by internal users
289
// Base 3 tokens for each OpenAI completion
290
let modelLimit = -3;
291
// if option is set, takes precedence over any other logic
292
if (chatMaxTokenNumOverride > 0) {
293
modelLimit += chatMaxTokenNumOverride;
294
return modelLimit;
295
}
296
297
let experimentalOverrides: Record<string, number> = {};
298
try {
299
const expValue = this._expService.getTreatmentVariable<string>('copilotchat.contextWindows');
300
experimentalOverrides = JSON.parse(expValue ?? '{}');
301
} catch {
302
// If the experiment service either is not available or returns a bad value we ignore the overrides
303
}
304
305
// If there's an experiment that takes precedence over what comes back from CAPI
306
if (experimentalOverrides[chatModelInfo.id]) {
307
modelLimit += experimentalOverrides[chatModelInfo.id];
308
return modelLimit;
309
}
310
311
// Check if CAPI has prompt token limits and return those
312
if (chatModelInfo.capabilities?.limits?.max_prompt_tokens) {
313
modelLimit += chatModelInfo.capabilities.limits.max_prompt_tokens;
314
return modelLimit;
315
} else if (chatModelInfo.capabilities.limits?.max_context_window_tokens) {
316
// Otherwise return the context window as the prompt tokens for cases where CAPI doesn't configure the prompt tokens
317
modelLimit += chatModelInfo.capabilities.limits.max_context_window_tokens;
318
return modelLimit;
319
}
320
321
return modelLimit;
322
}
323
324
private async _getErrorMessage(fallback: string): Promise<string> {
325
try {
326
const status = await this._octoKitService.getGitHubOutageStatus();
327
if (status !== GitHubOutageStatus.None) {
328
return 'Error fetching models! It appears that GitHub is experiencing an outage. Please check the [GitHub Status Page](https://githubstatus.com) for more info';
329
}
330
} catch {
331
// Don't let status check failures block the original error
332
}
333
return fallback;
334
}
335
336
private _getShowInModelPickerOverride(resolvedModel: IModelAPIResponse): boolean {
337
let modelPickerOverrides: Record<string, boolean> = {};
338
const expResult = this._expService.getTreatmentVariable<string>('copilotchat.showInModelPicker');
339
try {
340
modelPickerOverrides = JSON.parse(expResult || '{}');
341
} catch {
342
// No-op if parsing experiment fails
343
}
344
345
return modelPickerOverrides[resolvedModel.id] ?? resolvedModel.model_picker_enabled;
346
}
347
}
348
349
//#endregion
350
351