Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/util/db-schema/llm-utils.ts
5805 views
1
// this contains bits and pieces from the wrongly named openai.ts file
2
3
import { isEmpty } from "lodash";
4
import LRU from "lru-cache";
5
6
import { unreachable } from "@cocalc/util/misc";
7
8
// these can be defined by admins and users
9
export const SERVICES = [
10
"openai",
11
"google",
12
"mistralai", // the "*ai" is deliberately, because their model names start with "mistral-..." and we have to distinguish it from the prefix
13
"anthropic",
14
"ollama",
15
"custom_openai",
16
"xai",
17
] as const;
18
19
// a "user-*" model is a wrapper for all the model services
20
export const LANGUAGE_MODEL_SERVICES = [...SERVICES, "user"] as const;
21
22
export type UserDefinedLLMService = (typeof SERVICES)[number];
23
24
export function isUserDefinedModelType(
25
model: unknown,
26
): model is UserDefinedLLMService {
27
return SERVICES.includes(model as any);
28
}
29
30
// "User LLMs" are defined in the user's account settings.
31
// They query an external LLM service of given type, endpoint, and API key.
32
export interface UserDefinedLLM {
33
id: number; // a unique number
34
service: UserDefinedLLMService;
35
model: string; // non-empty string
36
display: string; // short user-visible string
37
endpoint: string; // URL to the LLM service
38
apiKey: string;
39
icon?: string; // https://.../...png
40
max_tokens?: number; // optional context window size in tokens
41
}
42
43
export const USER_LLM_PREFIX = "user-";
44
45
// This basically prefixes the "model" defined by the user with the USER and service prefix.
46
// We do not use the to*() functions, because the names of the models could be arbitrary – for each service
47
export function toUserLLMModelName(llm: UserDefinedLLM) {
48
const { service } = llm;
49
const model: string = (() => {
50
switch (service) {
51
case "custom_openai":
52
return `${CUSTOM_OPENAI_PREFIX}${llm.model}`;
53
case "ollama":
54
return toOllamaModel(llm.model);
55
case "anthropic":
56
return `${ANTHROPIC_PREFIX}${llm.model}`;
57
case "google":
58
return `${GOOGLE_PREFIX}${llm.model}`;
59
case "mistralai":
60
return `${MISTRAL_PREFIX}${llm.model}`;
61
case "openai":
62
return `${OPENAI_PREFIX}${llm.model}`;
63
case "xai":
64
return `${XAI_PREFIX}${llm.model}`;
65
default:
66
unreachable(service);
67
throw new Error(
68
`toUserLLMModelName of service ${service} not supported`,
69
);
70
}
71
})();
72
return `${USER_LLM_PREFIX}${model}`;
73
}
74
75
export function fromUserDefinedLLMModel(m: string): string | null {
76
if (isUserDefinedModel(m)) {
77
return m.slice(USER_LLM_PREFIX.length);
78
}
79
return null;
80
}
81
82
export function isUserDefinedModel(model: unknown): boolean {
83
if (typeof model !== "string") return false;
84
if (model.startsWith(USER_LLM_PREFIX)) {
85
const m2 = model.slice(USER_LLM_PREFIX.length);
86
return SERVICES.some((svc) => m2.startsWith(`${svc}-`));
87
}
88
return false;
89
}
90
91
export function unpackUserDefinedLLMModel(model: string): {
92
service: UserDefinedLLMService;
93
model: string;
94
} | null {
95
const um = fromUserDefinedLLMModel(model);
96
if (um === null) return null;
97
for (const service of SERVICES) {
98
if (um.startsWith(`${service}-`)) {
99
return { service, model: um.slice(service.length + 1) };
100
}
101
}
102
return null;
103
}
104
105
export const OPENAI_PREFIX = "openai-";
106
107
// NOTE: all arrays of model names should order them by the "simples and fastest" to the "complex, slowest, most expensive"
108
// that way, the ordering the UI isn't looking arbitrary, but has a clear logic
109
110
export const MODELS_OPENAI = [
111
"gpt-3.5-turbo",
112
"gpt-4o-mini-8k", // context limited
113
"gpt-4o-mini", // Released 2024-07-18
114
"gpt-4o-8k", // context limited, similar to gpt-4-turbo-8k
115
"gpt-4o", // Released 2024-05-13
116
// the "preview" variants are disabled, because the preview is over
117
"gpt-4-turbo-preview-8k", // like below, but artificially limited to 8k tokens
118
"gpt-4-turbo-preview",
119
"gpt-4-turbo-8k", // Released 2024-04-11
120
"gpt-4-turbo",
121
"gpt-4",
122
"gpt-4.1",
123
"gpt-4.1-mini",
124
"gpt-4-32k",
125
"gpt-3.5-turbo-16k",
126
"text-embedding-ada-002", // TODO: this is for embeddings, should be moved to a different place
127
"o1-mini-8k",
128
"o1-mini",
129
"o1-8k",
130
"o1",
131
"o3-8k", // context limited
132
"o3",
133
"o4-mini-8k", // context limited
134
"o4-mini",
135
"gpt-5-8k", // context limited
136
"gpt-5",
137
"gpt-5.2-8k", // context limited
138
"gpt-5.2",
139
"gpt-5-mini-8k", // context limited
140
"gpt-5-mini",
141
] as const;
142
143
export type OpenAIModel = (typeof MODELS_OPENAI)[number];
144
145
export function isOpenAIModel(model: unknown): model is OpenAIModel {
146
return MODELS_OPENAI.includes(model as any);
147
}
148
149
// ATTN: when you modify this list, also change frontend/.../llm/llm-selector.tsx!
150
export const MISTRAL_MODELS = [
151
// yes, all of them have an extra mistral-prefix, on top of the vendor prefix
152
"mistral-small-latest",
153
"mistral-medium-latest",
154
"mistral-large-latest",
155
"devstral-medium-2507",
156
// "magistral-medium-latest", // throws error
157
] as const;
158
159
export type MistralModel = (typeof MISTRAL_MODELS)[number];
160
161
export function isMistralModel(model: unknown): model is MistralModel {
162
return MISTRAL_MODELS.includes(model as any);
163
}
164
165
// google's are taken from here – we use the generative AI client lib
166
// https://developers.generativeai.google/models/language
167
// $ curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=$GOOGLE_GENAI" | jq
168
export const GOOGLE_MODELS = [
169
"gemini-1.5-flash-8k", // introduced 2024-05-15
170
"gemini-1.5-flash", // for user defined models
171
"gemini-pro", // Discontinued Feb'25. Keep it to avoid breaking old references!
172
"gemini-1.0-ultra", // hangs
173
"gemini-1.5-pro-8k", // works now with langchaing
174
"gemini-1.5-pro", // works now with langchaing
175
"gemini-2.5-flash-8k",
176
"gemini-2.5-pro-8k",
177
"gemini-2.0-flash-8k",
178
"gemini-2.0-flash-lite-8k",
179
"gemini-3-flash-preview-16k", // Preview model, context limited to 16k
180
"gemini-3-pro-preview-8k", // Preview model, context limited to 8k
181
] as const;
182
export type GoogleModel = (typeof GOOGLE_MODELS)[number];
183
export function isGoogleModel(model: unknown): model is GoogleModel {
184
return GOOGLE_MODELS.includes(model as any);
185
}
186
// Canonical Google models (non-thinking)
187
const CANONICAL_GOOGLE_MODELS = [
188
"gemini-1.5-pro-latest",
189
"gemini-1.5-flash-latest",
190
"gemini-2.0-flash",
191
"gemini-2.0-flash-lite",
192
] as const;
193
194
// Canonical Google models that support thinking/reasoning tokens (Gemini 2.5+ and 3+)
195
const CANONICAL_GOOGLE_MODELS_THINKING = [
196
"gemini-2.5-flash",
197
"gemini-2.5-pro",
198
"gemini-3-flash-preview",
199
"gemini-3-pro-preview",
200
] as const;
201
202
export type CanonicalGoogleModel = (typeof CANONICAL_GOOGLE_MODELS)[number];
203
export type CanonicalGoogleThinkingModel =
204
(typeof CANONICAL_GOOGLE_MODELS_THINKING)[number];
205
206
// Union type for all canonical Google model IDs
207
type CanonicalGoogleModelId =
208
| CanonicalGoogleModel
209
| CanonicalGoogleThinkingModel;
210
211
export const GOOGLE_MODEL_TO_ID: Partial<{
212
[m in GoogleModel]: CanonicalGoogleModelId;
213
}> = {
214
"gemini-1.5-pro": "gemini-1.5-pro-latest",
215
"gemini-1.5-pro-8k": "gemini-1.5-pro-latest",
216
"gemini-1.5-flash-8k": "gemini-1.5-flash-latest",
217
"gemini-2.0-flash-8k": "gemini-2.0-flash",
218
"gemini-2.0-flash-lite-8k": "gemini-2.0-flash-lite",
219
"gemini-2.5-flash-8k": "gemini-2.5-flash",
220
"gemini-2.5-pro-8k": "gemini-2.5-pro",
221
"gemini-3-flash-preview-16k": "gemini-3-flash-preview",
222
"gemini-3-pro-preview-8k": "gemini-3-pro-preview",
223
} as const;
224
225
/**
226
* Check if a Google model supports thinking/reasoning tokens.
227
* These are Gemini 2.5+ and Gemini 3+ models.
228
* @param model - The canonical Google model name (after GOOGLE_MODEL_TO_ID mapping)
229
*/
230
export function isGoogleThinkingModel(model: string): boolean {
231
return CANONICAL_GOOGLE_MODELS_THINKING.includes(
232
model as CanonicalGoogleThinkingModel,
233
);
234
}
235
236
// https://docs.anthropic.com/en/docs/about-claude/models/overview -- stable names for the modesl ...
237
export const ANTHROPIC_MODELS = [
238
"claude-3-5-sonnet",
239
"claude-3-5-sonnet-4k", // added 2024-06-24
240
"claude-3-5-haiku-8k",
241
"claude-3-haiku",
242
"claude-3-haiku-8k", // limited context window, offered for free
243
"claude-3-sonnet",
244
"claude-3-sonnet-4k", // limited context window, offered for free
245
"claude-3-opus",
246
"claude-3-opus-8k", // same issue as the large GPT models, limit the context window to limit spending
247
"claude-4-sonnet-8k",
248
"claude-4-opus-8k",
249
"claude-4-5-sonnet-8k", // added 2025
250
"claude-4-5-opus-8k", // added 2025
251
"claude-4-6-opus-8k", // added 2026-02
252
"claude-4-5-haiku-8k", // added 2025
253
] as const;
254
255
// https://docs.anthropic.com/en/docs/about-claude/models/overview#model-aliases
256
// if it points to null, the model is no longer supported
257
export const ANTHROPIC_VERSION: { [name in AnthropicModel]: string | null } = {
258
"claude-3-5-sonnet": null,
259
"claude-3-5-sonnet-4k": null,
260
"claude-3-5-haiku-8k": null,
261
"claude-3-haiku": "claude-3-haiku-20240307",
262
"claude-3-haiku-8k": "claude-3-haiku-20240307",
263
"claude-4-sonnet-8k": "claude-sonnet-4-0",
264
"claude-4-opus-8k": "claude-opus-4-0",
265
"claude-4-5-sonnet-8k": "claude-sonnet-4-5",
266
"claude-4-5-opus-8k": "claude-opus-4-5",
267
"claude-4-6-opus-8k": "claude-opus-4-6",
268
"claude-4-5-haiku-8k": "claude-haiku-4-5",
269
"claude-3-sonnet": null,
270
"claude-3-sonnet-4k": null,
271
"claude-3-opus": null,
272
"claude-3-opus-8k": null,
273
} as const;
274
export const ANTHROPIC_PREFIX = "anthropic-";
275
export type AnthropicModel = (typeof ANTHROPIC_MODELS)[number];
276
type AnthropicService = `${typeof ANTHROPIC_PREFIX}${AnthropicModel}`;
277
export function isAnthropicModel(model: unknown): model is AnthropicModel {
278
return ANTHROPIC_MODELS.includes(model as any);
279
}
280
export function toAnthropicService(model: AnthropicModel): AnthropicService {
281
return `${ANTHROPIC_PREFIX}${model}`;
282
}
283
export function isAnthropicService(
284
service: string,
285
): service is AnthropicService {
286
return service.startsWith(ANTHROPIC_PREFIX);
287
}
288
export function fromAnthropicService(
289
service: AnthropicService,
290
): AnthropicModel {
291
if (!isAnthropicService(service)) {
292
throw new Error(`not a mistral service: ${service}`);
293
}
294
return service.slice(ANTHROPIC_PREFIX.length) as AnthropicModel;
295
}
296
297
// xAI (https://x.ai/)
298
export const XAI_MODELS = [
299
"grok-4-1-fast-non-reasoning-16k",
300
"grok-4-1-fast-reasoning-16k",
301
"grok-code-fast-1-16k",
302
] as const;
303
export const XAI_MODEL_TO_ID: Partial<{ [m in XaiModel]: string }> = {
304
"grok-4-1-fast-non-reasoning-16k": "grok-4-1-fast-non-reasoning",
305
"grok-4-1-fast-reasoning-16k": "grok-4-1-fast-reasoning",
306
"grok-code-fast-1-16k": "grok-code-fast-1",
307
};
308
export const XAI_PREFIX = "xai-";
309
export type XaiModel = (typeof XAI_MODELS)[number];
310
export type XaiService = `${typeof XAI_PREFIX}${XaiModel}`;
311
export function isXaiModel(model: unknown): model is XaiModel {
312
return XAI_MODELS.includes(model as any);
313
}
314
export function toXaiService(model: XaiModel): XaiService {
315
return `${XAI_PREFIX}${model}`;
316
}
317
export function isXaiService(service: string): service is XaiService {
318
return service.startsWith(XAI_PREFIX);
319
}
320
export function fromXaiService(service: XaiService): XaiModel {
321
if (!isXaiService(service)) {
322
throw new Error(`not an xai service: ${service}`);
323
}
324
return service.slice(XAI_PREFIX.length) as XaiModel;
325
}
326
export function toXaiProviderModel(model: string): string {
327
const mapped = XAI_MODEL_TO_ID[model as XaiModel];
328
if (mapped != null) {
329
return mapped;
330
}
331
return model.replace(/-\d+k$/, "");
332
}
333
334
// the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects
335
export const LANGUAGE_MODELS = [
336
...MODELS_OPENAI,
337
...MISTRAL_MODELS,
338
...GOOGLE_MODELS,
339
...ANTHROPIC_MODELS,
340
...XAI_MODELS,
341
] as const;
342
343
export const USER_SELECTABLE_LLMS_BY_VENDOR: {
344
[vendor in LLMServiceName]: Readonly<LanguageModelCore[]>;
345
} = {
346
openai: MODELS_OPENAI.filter(
347
(m) =>
348
m === "gpt-4" ||
349
m === "gpt-4-turbo-preview-8k" ||
350
m === "gpt-4o-8k" ||
351
m === "gpt-4o-mini-8k" ||
352
m === "gpt-4.1" ||
353
m === "gpt-4.1-mini" ||
354
m === "o3-8k" ||
355
m === "o4-mini-8k" ||
356
m === "gpt-5.2-8k" ||
357
m === "gpt-5-mini-8k",
358
),
359
google: [
360
"gemini-3-flash-preview-16k",
361
"gemini-3-pro-preview-8k",
362
"gemini-2.5-flash-8k",
363
"gemini-2.5-pro-8k",
364
],
365
mistralai: MISTRAL_MODELS.filter((m) => m !== "mistral-small-latest"),
366
anthropic: ANTHROPIC_MODELS.filter((m) => {
367
// latest of each tier; keep opus 4.5 temporarily for users who have it configured
368
return (
369
m === "claude-4-5-haiku-8k" ||
370
m === "claude-4-5-sonnet-8k" ||
371
m === "claude-4-5-opus-8k" ||
372
m === "claude-4-6-opus-8k"
373
);
374
}),
375
ollama: [], // this is empty, because these models are not hardcoded
376
custom_openai: [], // this is empty, because these models are not hardcoded]
377
xai: XAI_MODELS, // all xAI models are user-selectable
378
user: [],
379
} as const;
380
381
// This hardcodes which models can be selected by users – refine this by setting site_settings.selectable_llms!
382
// Make sure to update this when adding new models.
383
// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx
384
export const USER_SELECTABLE_LANGUAGE_MODELS = [
385
...USER_SELECTABLE_LLMS_BY_VENDOR.openai,
386
...USER_SELECTABLE_LLMS_BY_VENDOR.google,
387
...USER_SELECTABLE_LLMS_BY_VENDOR.mistralai,
388
...USER_SELECTABLE_LLMS_BY_VENDOR.anthropic,
389
...USER_SELECTABLE_LLMS_BY_VENDOR.xai,
390
] as const;
391
392
export type OllamaLLM = string;
393
export type CustomOpenAI = string;
394
395
// use the one without Ollama to get stronger typing. Ollama could be any string starting with the OLLAMA_PREFIX.
396
export type LanguageModelCore = (typeof LANGUAGE_MODELS)[number];
397
export type LanguageModel = LanguageModelCore | OllamaLLM;
398
export function isCoreLanguageModel(
399
model: unknown,
400
): model is LanguageModelCore {
401
if (typeof model !== "string") return false;
402
return LANGUAGE_MODELS.includes(model as any);
403
}
404
405
// we check if the given object is any known language model
406
export function isLanguageModel(model?: unknown): model is LanguageModel {
407
if (model == null) return false;
408
if (typeof model !== "string") return false;
409
if (isOllamaLLM(model)) return true;
410
if (isCustomOpenAI(model)) return true;
411
if (isUserDefinedModel(model)) return true; // this also checks, if there is a valid model inside
412
return LANGUAGE_MODELS.includes(model as any);
413
}
414
415
export type LLMServiceName = (typeof LANGUAGE_MODEL_SERVICES)[number];
416
417
export function isLLMServiceName(service: unknown): service is LLMServiceName {
418
if (typeof service !== "string") return false;
419
return LANGUAGE_MODEL_SERVICES.includes(service as any);
420
}
421
422
export type LLMServicesAvailable = Record<LLMServiceName, boolean>;
423
424
interface LLMService {
425
name: string;
426
short: string; // additional short text next to the company name
427
desc: string; // more detailed description
428
url: string;
429
}
430
431
export const LLM_PROVIDER: { [key in LLMServiceName]: LLMService } = {
432
openai: {
433
name: "OpenAI",
434
short: "AI research and deployment company",
435
desc: "OpenAI is an AI research and deployment company. Their mission is to ensure that artificial general intelligence benefits all of humanity.",
436
url: "https://openai.com/",
437
},
438
google: {
439
name: "Google",
440
short: "Technology company",
441
desc: "Google's mission is to organize the world's information and make it universally accessible and useful.",
442
url: "https://gemini.google.com/",
443
},
444
anthropic: {
445
name: "Anthropic",
446
short: "AI research company",
447
desc: "Anthropic is an American artificial intelligence (AI) startup company, founded by former members of OpenAI.",
448
url: "https://www.anthropic.com/",
449
},
450
mistralai: {
451
name: "Mistral AI",
452
short: "French AI company",
453
desc: "Mistral AI is a French company selling artificial intelligence (AI) products.",
454
url: "https://mistral.ai/",
455
},
456
ollama: {
457
name: "Ollama",
458
short: "Open-source software",
459
desc: "Ollama language model server at a custom API endpoint.",
460
url: "https://ollama.com/",
461
},
462
custom_openai: {
463
name: "OpenAI API",
464
short: "Custom endpoint",
465
desc: "Calls a custom OpenAI API endoint.",
466
url: "https://js.langchain.com/v0.1/docs/integrations/llms/openai/",
467
},
468
xai: {
469
name: "xAI",
470
short: "AI company by X Corp",
471
desc: "xAI is an American artificial intelligence company founded by Elon Musk.",
472
url: "https://x.ai/",
473
},
474
user: {
475
name: "User Defined",
476
short: "Account → Language Model",
477
desc: "Defined by the user in Account Settings → Language Model",
478
url: "",
479
},
480
} as const;
481
482
interface ValidLanguageModelNameProps {
483
model: string | undefined;
484
filter: LLMServicesAvailable;
485
ollama: string[]; // keys of ollama models
486
custom_openai: string[]; // keys of custom openai models
487
selectable_llms: string[]; // either empty, or an array stored in the server settings
488
}
489
490
// NOTE: these values must be in sync with the "no" vals in db-schema/site-defaults.ts
491
const DEFAULT_FILTER: Readonly<LLMServicesAvailable> = {
492
openai: false,
493
google: false,
494
ollama: false,
495
mistralai: false,
496
anthropic: false,
497
custom_openai: false,
498
xai: false,
499
user: false,
500
} as const;
501
502
// this is used in initialization functions. e.g. to get a default model depending on the overall availability
503
// usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available,
504
// then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc.
505
export function getValidLanguageModelName({
506
model,
507
filter = DEFAULT_FILTER,
508
ollama,
509
custom_openai,
510
selectable_llms,
511
}: ValidLanguageModelNameProps): LanguageModel {
512
if (typeof model === "string" && isValidModel(model)) {
513
try {
514
if (isCoreLanguageModel(model)) {
515
const v = model2vendor(model).name;
516
if (filter[v] && selectable_llms.includes(model)) {
517
return model;
518
}
519
}
520
521
if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) {
522
return model;
523
}
524
525
if (
526
isCustomOpenAI(model) &&
527
custom_openai.includes(fromCustomOpenAIModel(model))
528
) {
529
return model;
530
}
531
532
if (isUserDefinedModel(model)) {
533
return model;
534
}
535
} catch {}
536
}
537
538
for (const free of [true, false]) {
539
const dflt = getDefaultLLM(
540
selectable_llms,
541
filter,
542
ollama,
543
custom_openai,
544
free,
545
);
546
if (dflt != null) {
547
return dflt;
548
}
549
}
550
return DEFAULT_MODEL;
551
}
552
553
export const DEFAULT_LLM_PRIORITY: Readonly<UserDefinedLLMService[]> = [
554
"google",
555
"openai",
556
"anthropic",
557
"mistralai",
558
"xai",
559
"ollama",
560
"custom_openai",
561
] as const;
562
563
export function getDefaultLLM(
564
selectable_llms: string[],
565
filter: LLMServicesAvailable,
566
ollama?: { [key: string]: any },
567
custom_openai?: { [key: string]: any },
568
only_free = true,
569
): LanguageModel {
570
for (const v of DEFAULT_LLM_PRIORITY) {
571
if (!filter[v]) continue;
572
for (const m of USER_SELECTABLE_LLMS_BY_VENDOR[v]) {
573
if (selectable_llms.includes(m)) {
574
const isFree = LLM_COST[m].free ?? true;
575
if ((only_free && isFree) || !only_free) {
576
return m;
577
}
578
}
579
}
580
}
581
// none of the standard models, pick the first ollama or custom_openai
582
if (ollama != null && !isEmpty(ollama)) {
583
return toOllamaModel(Object.keys(ollama)[0]);
584
}
585
if (custom_openai != null && !isEmpty(custom_openai)) {
586
return toCustomOpenAIModel(Object.keys(custom_openai)[0]);
587
}
588
return DEFAULT_MODEL;
589
}
590
591
export interface OpenAIMessage {
592
role: "system" | "user" | "assistant";
593
content: string;
594
}
595
export type OpenAIMessages = OpenAIMessage[];
596
597
export const OLLAMA_PREFIX = "ollama-";
598
export type OllamaService = string;
599
export function isOllamaService(service: string): service is OllamaService {
600
return isOllamaLLM(service);
601
}
602
603
export const CUSTOM_OPENAI_PREFIX = "custom_openai-";
604
export type CustomOpenAIService = string;
605
export function isCustomOpenAIService(
606
service: string,
607
): service is CustomOpenAIService {
608
return isCustomOpenAI(service);
609
}
610
611
export const MISTRAL_PREFIX = "mistralai-";
612
export type MistralService = `${typeof MISTRAL_PREFIX}${MistralModel}`;
613
export function isMistralService(service: string): service is MistralService {
614
return service.startsWith(MISTRAL_PREFIX);
615
}
616
617
export const GOOGLE_PREFIX = "google-";
618
619
// we encode the in the frontend and elsewhere with the service name as a prefix
620
// ATTN: don't change the encoding pattern of [vendor]-[model]
621
// for whatever reason, it's also described that way in purchases/close.ts
622
export type LanguageServiceCore =
623
| `${typeof OPENAI_PREFIX}${OpenAIModel}`
624
| `${typeof GOOGLE_PREFIX}${
625
| "text-bison-001"
626
| "chat-bison-001"
627
| "embedding-gecko-001"}`
628
| `${typeof GOOGLE_PREFIX}${GoogleModel}`
629
| AnthropicService
630
| MistralService
631
| XaiService;
632
633
export type LanguageService =
634
| LanguageServiceCore
635
| OllamaService
636
| CustomOpenAIService;
637
638
// used e.g. for checking "account-id={string}" and other things like that
639
export const LANGUAGE_MODEL_PREFIXES = [
640
"chatgpt",
641
...LANGUAGE_MODEL_SERVICES.map((v) => `${v}-`),
642
] as const;
643
644
// we encode the in the frontend and elsewhere with the service name as a prefix
645
export function model2service(model: LanguageModel): LanguageService {
646
if (model === "text-embedding-ada-002") {
647
return `${OPENAI_PREFIX}${model}`;
648
}
649
if (
650
isOllamaLLM(model) ||
651
isCustomOpenAI(model) ||
652
isUserDefinedModel(model)
653
) {
654
return model; // already has a useful prefix
655
}
656
if (isXaiModel(model)) {
657
return toXaiService(model);
658
}
659
if (isMistralModel(model)) {
660
return toMistralService(model);
661
}
662
if (isAnthropicModel(model)) {
663
return toAnthropicService(model);
664
}
665
if (isLanguageModel(model)) {
666
if (
667
model === "text-bison-001" ||
668
model === "chat-bison-001" ||
669
model === "embedding-gecko-001" ||
670
isGoogleModel(model)
671
) {
672
return `${GOOGLE_PREFIX}${model}`;
673
} else {
674
return `${OPENAI_PREFIX}${model}`;
675
}
676
}
677
678
throw new Error(`unknown model: ${model}`);
679
}
680
681
// inverse of model2service, but robust for chat avatars, which might not have a prefix
682
// TODO: fix the mess
683
export function service2model(
684
service: LanguageService | "chatgpt",
685
): LanguageModel {
686
if (service === "chatgpt") {
687
return "gpt-3.5-turbo";
688
}
689
const lm = service2model_core(service);
690
if (lm == null) {
691
// We don't throw an error, since the frontend would crash
692
// throw new Error(`unknown service: ${service}`);
693
console.warn(`service2model: unknown service: ${service}`);
694
return "gpt-3.5-turbo";
695
}
696
return lm;
697
}
698
699
export function service2model_core(
700
service: LanguageService,
701
): LanguageModel | null {
702
// split off the first part of service, e.g., "openai-" or "google-"
703
const s = service.split("-")[0];
704
const hasPrefix = LANGUAGE_MODEL_SERVICES.some((v) => s === v);
705
706
if (isUserDefinedModel(service)) {
707
return service;
708
}
709
710
const m = hasPrefix ? service.split("-").slice(1).join("-") : service;
711
if (hasPrefix) {
712
// we add the trailing "-" to match with these prefixes, which include the "-"
713
switch (`${s}-`) {
714
case OLLAMA_PREFIX:
715
return toOllamaModel(m);
716
case CUSTOM_OPENAI_PREFIX:
717
return toCustomOpenAIModel(m);
718
}
719
}
720
721
if (LANGUAGE_MODELS.includes(m as any)) {
722
return m;
723
}
724
return null;
725
}
726
727
// NOTE: do not use this – instead use server_settings.default_llm
728
export const DEFAULT_MODEL: LanguageModel = "gemini-3-flash-preview-16k";
729
730
interface LLMVendor {
731
name: LLMServiceName;
732
url: string;
733
}
734
735
export function model2vendor(model): LLMVendor {
736
if (isUserDefinedModel(model)) {
737
return { name: "user", url: "" };
738
} else if (isOllamaLLM(model)) {
739
return { name: "ollama", url: LLM_PROVIDER.ollama.url };
740
} else if (isCustomOpenAI(model)) {
741
return {
742
name: "custom_openai",
743
url: LLM_PROVIDER.custom_openai.url,
744
};
745
} else if (isMistralModel(model)) {
746
return { name: "mistralai", url: LLM_PROVIDER.mistralai.url };
747
} else if (isOpenAIModel(model)) {
748
return { name: "openai", url: LLM_PROVIDER.openai.url };
749
} else if (isGoogleModel(model)) {
750
return { name: "google", url: LLM_PROVIDER.google.url };
751
} else if (isAnthropicModel(model)) {
752
return { name: "anthropic", url: LLM_PROVIDER.anthropic.url };
753
} else if (isXaiModel(model)) {
754
return { name: "xai", url: LLM_PROVIDER.xai.url };
755
}
756
757
throw new Error(`model2vendor: unknown model: "${model}"`);
758
}
759
760
// wraps the model name in an object that indicates that it's an ollama model
761
// TODO: maybe it will be necessary at some point to pass in the list of available ollama models
762
// TODO: in the future, this object will also contain info like the max tokens and other parameters (from the DB)
763
export function toOllamaModel(model: string): OllamaLLM {
764
if (isOllamaLLM(model)) {
765
throw new Error(`already an ollama model: ${model}`);
766
}
767
return `${OLLAMA_PREFIX}${model}`;
768
}
769
770
// unwraps the model name from an object that indicates that it's an ollama model
771
export function fromOllamaModel(model: OllamaLLM) {
772
if (!isOllamaLLM(model)) {
773
throw new Error(`not an ollama model: ${model}`);
774
}
775
return model.slice(OLLAMA_PREFIX.length);
776
}
777
778
export function isOllamaLLM(model: unknown): model is OllamaLLM {
779
return (
780
typeof model === "string" &&
781
model.startsWith(OLLAMA_PREFIX) &&
782
model.length > OLLAMA_PREFIX.length
783
);
784
}
785
786
export function toCustomOpenAIModel(model: string): CustomOpenAI {
787
if (isCustomOpenAI(model)) {
788
throw new Error(`already a custom openai model: ${model}`);
789
}
790
return `${CUSTOM_OPENAI_PREFIX}${model}`;
791
}
792
793
export function isCustomOpenAI(model: unknown): model is CustomOpenAI {
794
return (
795
typeof model === "string" &&
796
model.startsWith(CUSTOM_OPENAI_PREFIX) &&
797
model.length > CUSTOM_OPENAI_PREFIX.length
798
);
799
}
800
801
export function fromCustomOpenAIModel(model: CustomOpenAI) {
802
if (!isCustomOpenAI(model)) {
803
throw new Error(`not a custom openai model: ${model}`);
804
}
805
return model.slice(CUSTOM_OPENAI_PREFIX.length);
806
}
807
808
export function toMistralService(model: string): MistralService {
809
if (isMistralService(model)) {
810
throw new Error(`already a mistral model: ${model}`);
811
}
812
if (!isMistralModel(model)) {
813
throw new Error(`not a mistral model: ${model}`);
814
}
815
return `${MISTRAL_PREFIX}${model}`;
816
}
817
818
export function fromMistralService(model: MistralService) {
819
if (!isMistralService(model)) {
820
throw new Error(`not a mistral model: ${model}`);
821
}
822
return model.slice(MISTRAL_PREFIX.length);
823
}
824
825
type LLM2String = {
826
[key in
827
| (typeof USER_SELECTABLE_LANGUAGE_MODELS)[number]
828
| "chatgpt" // some additional ones, backwards compatibility
829
| "chatgpt3"
830
| "chatgpt4"
831
| "gpt-4-32k"
832
| "text-bison-001"
833
| "chat-bison-001"]: string;
834
};
835
836
// Map from psuedo account_id to what should be displayed to user.
837
// This is used in various places in the frontend.
838
// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
839
export const LLM_USERNAMES: LLM2String = {
840
chatgpt: "GPT-3.5",
841
chatgpt3: "GPT-3.5",
842
chatgpt4: "GPT-4",
843
"gpt-4": "GPT-4",
844
"gpt-4-32k": "GPT-4-32k",
845
"gpt-3.5-turbo": "GPT-3.5",
846
"gpt-3.5-turbo-16k": "GPT-3.5-16k",
847
"gpt-4-turbo-preview": "GPT-4 Turbo 128k",
848
"gpt-4-turbo-preview-8k": "GPT-4 Turbo",
849
"gpt-4-turbo": "GPT-4 Turbo 128k",
850
"gpt-4-turbo-8k": "GPT-4 Turbo",
851
"gpt-4o": "GPT-4o 128k",
852
"gpt-4o-8k": "GPT-4o",
853
"gpt-4o-mini": "GPT-4o Mini 128k",
854
"gpt-4o-mini-8k": "GPT-4o Mini",
855
"gpt-4.1": "GPT-4.1",
856
"gpt-4.1-mini": "GPT-4.1 Mini",
857
"o1-mini-8k": "OpenAI o1-mini",
858
"o1-8k": "OpenAI o1",
859
"o1-mini": "OpenAI o1-mini",
860
o1: "OpenAI o1",
861
"text-embedding-ada-002": "Text Embedding Ada 002", // TODO: this is for embeddings, should be moved to a different place
862
"text-bison-001": "PaLM 2",
863
"chat-bison-001": "PaLM 2",
864
"gemini-pro": "Gemini 1.0 Pro",
865
"gemini-1.0-ultra": "Gemini 1.0 Ultra",
866
"gemini-1.5-flash": "Gemini 1.5 Flash",
867
"gemini-1.5-pro": "Gemini 1.5 Pro 1m",
868
"gemini-1.5-pro-8k": "Gemini 1.5 Pro",
869
"gemini-1.5-flash-8k": "Gemini 1.5 Flash",
870
"gemini-2.0-flash-8k": "Gemini 2.0 Flash",
871
"gemini-2.0-flash-lite-8k": "Gemini 2.0 Flash Lite",
872
"gemini-2.5-flash-8k": "Gemini 2.5 Flash",
873
"gemini-2.5-pro-8k": "Gemini 2.5 Pro",
874
"gemini-3-pro-preview-8k": "Gemini 3 Pro",
875
"mistral-small-latest": "Mistral AI Small",
876
"mistral-medium-latest": "Mistral AI Medium",
877
"mistral-large-latest": "Mistral AI Large",
878
"devstral-medium-2507": "Devstral Medium",
879
//"magistral-medium-latest": "Magistral Medium",
880
"claude-3-haiku": "Claude 3 Haiku",
881
"claude-3-haiku-8k": "Claude 3 Haiku",
882
"claude-3-5-haiku-8k": "Claude 3 Haiku",
883
"claude-3-sonnet": "Claude 3 Sonnet 200k",
884
"claude-3-sonnet-4k": "Claude 3 Sonnet",
885
"claude-3-5-sonnet": "Claude 3.5 Sonnet",
886
"claude-3-5-sonnet-4k": "Claude 3.5 Sonnet",
887
"claude-4-sonnet-8k": "Claude 4 Sonnet",
888
"claude-4-opus-8k": "Claude 4 Opus",
889
"claude-4-5-sonnet-8k": "Claude 4.5 Sonnet",
890
"claude-4-5-opus-8k": "Claude 4.5 Opus",
891
"claude-4-6-opus-8k": "Claude 4.6 Opus",
892
"claude-4-5-haiku-8k": "Claude 4.5 Haiku",
893
"claude-3-opus": "Claude 3 Opus",
894
"claude-3-opus-8k": "Claude 3 Opus",
895
"o3-8k": "OpenAI o3",
896
o3: "OpenAI o3 128k",
897
"o4-mini-8k": "OpenAI o4-mini",
898
"o4-mini": "OpenAI o4-mini 128k",
899
"gpt-5-8k": "GPT-5",
900
"gpt-5": "GPT-5 128k",
901
"gpt-5.2-8k": "GPT-5.2",
902
"gpt-5.2": "GPT-5.2 128k",
903
"gpt-5-mini-8k": "GPT-5 Mini",
904
"gpt-5-mini": "GPT-5 Mini 128k",
905
"gemini-3-flash-preview-16k": "Gemini 3 Flash",
906
"grok-4-1-fast-non-reasoning-16k": "Grok 4.1 Fast",
907
"grok-4-1-fast-reasoning-16k": "Grok 4.1 Fast Reasoning",
908
"grok-code-fast-1-16k": "Grok Code Fast",
909
} as const;
910
911
// similar to the above, we map to short user-visible description texts
912
// this comes next to the name, hence you do not have to mention the name
913
export const LLM_DESCR: LLM2String = {
914
chatgpt: "Fast, great for everyday tasks. (OpenAI, 4k token context)",
915
chatgpt3: "Fast, great for everyday tasks. (OpenAI, 4k token context)",
916
chatgpt4:
917
"Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
918
"gpt-4":
919
"Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
920
"gpt-4.1":
921
"Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
922
"gpt-4-32k": "",
923
"gpt-3.5-turbo": "Fast, great for everyday tasks. (OpenAI, 4k token context)",
924
"gpt-3.5-turbo-16k": `Same as ${LLM_USERNAMES["gpt-3.5-turbo"]} but with larger 16k token context`,
925
"gpt-4-turbo-preview-8k":
926
"More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",
927
"gpt-4-turbo-preview": "Like GPT-4 Turbo, but with up to 128k token context",
928
"gpt-4-turbo-8k":
929
"Faster, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",
930
"gpt-4-turbo": "Like GPT-4 Turbo, but with up to 128k token context",
931
"gpt-4o-8k":
932
"Most powerful, fastest, and cheapest (OpenAI, 8k token context)",
933
"gpt-4o": "Most powerful fastest, and cheapest (OpenAI, 128k token context)",
934
"gpt-4o-mini-8k":
935
"Most cost-efficient small model (OpenAI, 8k token context)",
936
"gpt-4.1-mini": "Most cost-efficient small model (OpenAI, 8k token context)",
937
"gpt-4o-mini": "Most cost-efficient small model (OpenAI, 128k token context)",
938
"text-embedding-ada-002": "Text embedding Ada 002 by OpenAI", // TODO: this is for embeddings, should be moved to a different place
939
"o1-8k": "Spends more time thinking (8k token context)",
940
"o1-mini-8k": "A cost-efficient reasoning model (8k token context)",
941
o1: "Spends more time thinking (8k token context)",
942
"o1-mini": "A cost-efficient reasoning model (8k token context)",
943
"text-bison-001": "",
944
"chat-bison-001": "",
945
"gemini-pro":
946
"Google's Gemini 1.0 Pro Generative AI model (30k token context)",
947
"gemini-1.0-ultra":
948
"Google's Gemini 1.0 Ultra Generative AI model (30k token context)",
949
"gemini-1.5-pro":
950
"Google's Gemini 1.5 Pro Generative AI model (1m token context)",
951
"gemini-1.5-flash": "Google's Gemini 1.5 Flash Generative AI model",
952
"gemini-1.5-pro-8k":
953
"Google's Gemini 1.5 Pro Generative AI model (8k token context)",
954
"gemini-1.5-flash-8k":
955
"Google's Gemini 1.5 Flash Generative AI model (8k token context)",
956
"gemini-2.0-flash-8k":
957
"Google's Gemini 2.0 Flash Generative AI model (8k token context)",
958
"gemini-2.0-flash-lite-8k":
959
"Google's Gemini 2.0 Flash Lite Generative AI model (8k token context)",
960
"gemini-2.5-flash-8k":
961
"Google's Gemini 2.5 Flash Generative AI model (8k token context)",
962
"gemini-2.5-pro-8k":
963
"Google's Gemini 2.5 Pro Generative AI model (8k token context)",
964
"gemini-3-pro-preview-8k":
965
"Google's Gemini 3 Pro Generative AI model (8k token context)",
966
"mistral-small-latest":
967
"Small general purpose tasks, text classification, customer service. (Mistral AI, 4k token context)",
968
"mistral-medium-latest":
969
"Intermediate tasks, summarizing, generating documents, etc. (Mistral AI, 4k token context)",
970
"mistral-large-latest":
971
"Most powerful, large reasoning capabilities, but slower. (Mistral AI, 4k token context)",
972
"devstral-medium-2507":
973
"Developer-focused model optimized for coding tasks. (Mistral AI, 8k token context)",
974
// "magistral-medium-latest":
975
// "Enhanced medium model with improved reasoning capabilities. (Mistral AI, 8k token context)",
976
"claude-3-haiku":
977
"Fastest model, lightweight actions (Anthropic, 200k token context)",
978
"claude-3-haiku-8k":
979
"Fastest model, lightweight actions (Anthropic, 8k token context)",
980
"claude-3-5-sonnet":
981
"Our most intelligent model (Anthropic, 200k token context)",
982
"claude-3-sonnet":
983
"Our most intelligent model (Anthropic, 200k token context)",
984
"claude-3-5-sonnet-4k":
985
"Our most intelligent model (Anthropic, 4k token context)",
986
"claude-3-5-haiku-8k":
987
"Fastest model, lightweight actions (Anthropic, 8k token context)",
988
"claude-4-sonnet-8k":
989
"Best combination of performance and speed (Anthropic, 8k token context)",
990
"claude-4-opus-8k":
991
"Excels at writing and complex tasks (Anthropic, 8k token context)",
992
"claude-4-5-sonnet-8k":
993
"Most intelligent model with advanced reasoning (Anthropic, 8k token context)",
994
"claude-4-5-opus-8k":
995
"Flagship model excelling at complex tasks and writing (Anthropic, 8k token context)",
996
"claude-4-6-opus-8k":
997
"Most intelligent model for agents and coding (Anthropic, 8k token context)",
998
"claude-4-5-haiku-8k":
999
"Fastest and most cost-efficient model (Anthropic, 8k token context)",
1000
"claude-3-sonnet-4k":
1001
"Best combination of performance and speed (Anthropic, 4k token context)",
1002
"claude-3-opus":
1003
"Excels at writing and complex tasks (Anthropic, 200k token context)",
1004
"claude-3-opus-8k":
1005
"Excels at writing and complex tasks (Anthropic, 8k token context)",
1006
"o3-8k":
1007
"Advanced reasoning model with enhanced thinking capabilities (8k token context)",
1008
o3: "Advanced reasoning model with enhanced thinking capabilities (128k token context)",
1009
"o4-mini-8k":
1010
"Cost-efficient reasoning model with strong performance (8k token context)",
1011
"o4-mini":
1012
"Cost-efficient reasoning model with strong performance (128k token context)",
1013
"gpt-5-8k":
1014
"OpenAI's most advanced model with built-in reasoning (8k token context)",
1015
"gpt-5":
1016
"OpenAI's most advanced model with built-in reasoning (128k token context)",
1017
"gpt-5.2-8k":
1018
"OpenAI's most advanced model with built-in reasoning (8k token context)",
1019
"gpt-5.2":
1020
"OpenAI's most advanced model with built-in reasoning (128k token context)",
1021
"gpt-5-mini-8k":
1022
"Fast and cost-efficient version of GPT-5 (8k token context)",
1023
"gpt-5-mini": "Fast and cost-efficient version of GPT-5 (128k token context)",
1024
"gemini-3-flash-preview-16k":
1025
"Google's Gemini 3 Flash model (16k token context)",
1026
"grok-4-1-fast-non-reasoning-16k":
1027
"xAI's Grok 4.1 fast non-reasoning model (16k token context)",
1028
"grok-4-1-fast-reasoning-16k":
1029
"xAI's Grok 4.1 fast reasoning model (16k token context)",
1030
"grok-code-fast-1-16k":
1031
"xAI's Grok Code Fast model, specialized for coding tasks (16k token context)",
1032
} as const;
1033
1034
export function isFreeModel(model: unknown, isCoCalcCom: boolean): boolean {
1035
if (!isCoCalcCom) return true;
1036
if (isUserDefinedModel(model)) return true;
1037
if (isOllamaLLM(model)) return true;
1038
if (isCustomOpenAI(model)) return true;
1039
if (typeof model === "string" && LANGUAGE_MODELS.includes(model as any)) {
1040
// i.e. model is now of type CoreLanguageModel and
1041
const costInfo = LLM_COST[model];
1042
if (costInfo != null) {
1043
return costInfo.free;
1044
}
1045
}
1046
// all others are free (this should actually never happen, but we're cautious)
1047
return true;
1048
}
1049
1050
// this is used in purchases/get-service-cost
1051
// we only need to check for the vendor prefixes, no special cases!
1052
export function isLanguageModelService(
1053
service: string,
1054
): service is LanguageService {
1055
if (isUserDefinedModel(service)) return true;
1056
for (const v of LANGUAGE_MODEL_SERVICES) {
1057
if (service.startsWith(`${v}-`)) {
1058
return true;
1059
}
1060
}
1061
return false;
1062
}
1063
1064
export function getLLMServiceStatusCheckMD(service: LLMServiceName): string {
1065
switch (service) {
1066
case "openai":
1067
return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`;
1068
case "google":
1069
return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`;
1070
case "ollama":
1071
return `No status information for Ollama available.`;
1072
case "custom_openai":
1073
return `No status information for Custom OpenAI available.`;
1074
case "mistralai":
1075
return `No status information for Mistral AI available.`;
1076
case "anthropic":
1077
return `Anthropic [status](https://status.anthropic.com/).`;
1078
case "xai":
1079
return `xAI [status](https://status.x.ai/).`;
1080
case "user":
1081
return `No status information for user defined model available.`;
1082
default:
1083
unreachable(service);
1084
}
1085
return "";
1086
}
1087
1088
interface Cost {
1089
prompt_tokens: number;
1090
completion_tokens: number;
1091
max_tokens: number;
1092
free: boolean; // whether this model has a metered paid usage, or offered for free
1093
}
1094
1095
// price per token for a given price of USD per 1M tokens
1096
function usd1Mtokens(usd: number): number {
1097
return usd / 1_000_000;
1098
}
1099
1100
// This is the official published cost that openai charges.
1101
// It changes over time, so this will sometimes need to be updated.
1102
// Our cost is a configurable multiple of this.
1103
// https://openai.com/pricing#language-models
1104
// There appears to be no api that provides the prices, unfortunately.
1105
export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
1106
"gpt-4": {
1107
prompt_tokens: usd1Mtokens(30),
1108
completion_tokens: usd1Mtokens(60),
1109
max_tokens: 8192,
1110
free: false,
1111
},
1112
"gpt-4-32k": {
1113
prompt_tokens: usd1Mtokens(60),
1114
completion_tokens: usd1Mtokens(120),
1115
max_tokens: 32768,
1116
free: false,
1117
},
1118
"gpt-3.5-turbo": {
1119
prompt_tokens: usd1Mtokens(0.5),
1120
completion_tokens: usd1Mtokens(1.5),
1121
max_tokens: 4096,
1122
free: true,
1123
},
1124
"gpt-3.5-turbo-16k": {
1125
prompt_tokens: usd1Mtokens(3),
1126
completion_tokens: usd1Mtokens(4),
1127
max_tokens: 16384,
1128
free: false,
1129
},
1130
// like above, but we limit the tokens to reduce how much money user has to commit to
1131
"gpt-4-turbo-preview-8k": {
1132
prompt_tokens: usd1Mtokens(10),
1133
completion_tokens: usd1Mtokens(30),
1134
max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!
1135
free: false,
1136
},
1137
"gpt-4-turbo-preview": {
1138
prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens
1139
completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens
1140
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
1141
free: false,
1142
}, // like above, but we limit the tokens to reduce how much money user has to commit to
1143
"gpt-4-turbo-8k": {
1144
prompt_tokens: usd1Mtokens(10),
1145
completion_tokens: usd1Mtokens(30),
1146
max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!
1147
free: false,
1148
},
1149
"gpt-4-turbo": {
1150
prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens
1151
completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens
1152
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
1153
free: false,
1154
},
1155
"gpt-4.1": {
1156
prompt_tokens: usd1Mtokens(2),
1157
completion_tokens: usd1Mtokens(8),
1158
max_tokens: 8192,
1159
free: false,
1160
},
1161
"gpt-4.1-mini": {
1162
prompt_tokens: usd1Mtokens(0.4),
1163
completion_tokens: usd1Mtokens(1.6),
1164
max_tokens: 8192,
1165
free: true,
1166
},
1167
"gpt-4o-8k": {
1168
prompt_tokens: usd1Mtokens(2.5),
1169
completion_tokens: usd1Mtokens(10),
1170
max_tokens: 8192, // like gpt-4-turbo-8k
1171
free: false,
1172
},
1173
"gpt-4o": {
1174
prompt_tokens: usd1Mtokens(2.5),
1175
completion_tokens: usd1Mtokens(10),
1176
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
1177
free: false,
1178
},
1179
"gpt-4o-mini-8k": {
1180
prompt_tokens: usd1Mtokens(0.15),
1181
completion_tokens: usd1Mtokens(0.6),
1182
max_tokens: 8192, // like gpt-4-turbo-8k
1183
free: true,
1184
},
1185
"gpt-4o-mini": {
1186
prompt_tokens: usd1Mtokens(0.15),
1187
completion_tokens: usd1Mtokens(0.6),
1188
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
1189
free: true,
1190
},
1191
o1: {
1192
prompt_tokens: usd1Mtokens(15),
1193
completion_tokens: usd1Mtokens(60),
1194
max_tokens: 8192, // like gpt-4-turbo-8k
1195
free: false,
1196
},
1197
"o1-8k": {
1198
prompt_tokens: usd1Mtokens(15),
1199
completion_tokens: usd1Mtokens(60),
1200
max_tokens: 8192, // like gpt-4-turbo-8k
1201
free: false,
1202
},
1203
"o1-mini-8k": {
1204
prompt_tokens: usd1Mtokens(1.1),
1205
completion_tokens: usd1Mtokens(4.4),
1206
max_tokens: 8192, // like gpt-4-turbo-8k
1207
free: true,
1208
},
1209
"o1-mini": {
1210
prompt_tokens: usd1Mtokens(1.1),
1211
completion_tokens: usd1Mtokens(4.4),
1212
max_tokens: 8192, // like gpt-4-turbo-8k
1213
free: true,
1214
},
1215
// also OpenAI
1216
"text-embedding-ada-002": {
1217
prompt_tokens: usd1Mtokens(0.05),
1218
completion_tokens: usd1Mtokens(0.05), // NOTE: this isn't a thing with embeddings
1219
max_tokens: 8191,
1220
free: false,
1221
},
1222
// https://ai.google.dev/pricing
1223
"gemini-pro": {
1224
prompt_tokens: usd1Mtokens(0.5),
1225
completion_tokens: usd1Mtokens(1.5),
1226
max_tokens: 30720,
1227
free: true,
1228
},
1229
"gemini-1.5-pro-8k": {
1230
prompt_tokens: usd1Mtokens(1.25), // (we're below the 128k context)
1231
completion_tokens: usd1Mtokens(5),
1232
max_tokens: 8_000,
1233
free: false,
1234
},
1235
"gemini-1.5-pro": {
1236
prompt_tokens: usd1Mtokens(2.5),
1237
completion_tokens: usd1Mtokens(10),
1238
max_tokens: 1048576,
1239
free: false,
1240
},
1241
"gemini-1.0-ultra": {
1242
prompt_tokens: usd1Mtokens(1), // TODO: price not yet known!
1243
completion_tokens: usd1Mtokens(1),
1244
max_tokens: 30720,
1245
free: true,
1246
},
1247
"gemini-1.5-flash": {
1248
prompt_tokens: usd1Mtokens(0.075),
1249
completion_tokens: usd1Mtokens(0.3),
1250
max_tokens: 8_000,
1251
free: true,
1252
},
1253
"gemini-1.5-flash-8k": {
1254
prompt_tokens: usd1Mtokens(0.075),
1255
completion_tokens: usd1Mtokens(0.3),
1256
max_tokens: 8_000,
1257
free: true,
1258
},
1259
// https://ai.google.dev/gemini-api/docs/pricing?hl=de
1260
"gemini-2.0-flash-8k": {
1261
prompt_tokens: usd1Mtokens(0.1),
1262
completion_tokens: usd1Mtokens(0.4),
1263
max_tokens: 8_000,
1264
free: true,
1265
},
1266
"gemini-2.0-flash-lite-8k": {
1267
prompt_tokens: usd1Mtokens(0.075),
1268
completion_tokens: usd1Mtokens(0.3),
1269
max_tokens: 8_000,
1270
free: true,
1271
},
1272
"gemini-2.5-flash-8k": {
1273
prompt_tokens: usd1Mtokens(0.3),
1274
completion_tokens: usd1Mtokens(2.5),
1275
max_tokens: 8_000,
1276
free: true,
1277
},
1278
"gemini-2.5-pro-8k": {
1279
prompt_tokens: usd1Mtokens(1.25),
1280
completion_tokens: usd1Mtokens(10),
1281
max_tokens: 8_000,
1282
free: false,
1283
},
1284
"gemini-3-flash-preview-16k": {
1285
prompt_tokens: usd1Mtokens(0.5),
1286
completion_tokens: usd1Mtokens(3.0),
1287
max_tokens: 16_000,
1288
free: true,
1289
},
1290
"gemini-3-pro-preview-8k": {
1291
prompt_tokens: usd1Mtokens(2),
1292
completion_tokens: usd1Mtokens(4),
1293
max_tokens: 8_000,
1294
free: false,
1295
},
1296
// https://mistral.ai/technology/
1297
"mistral-small-latest": {
1298
prompt_tokens: usd1Mtokens(0.2),
1299
completion_tokens: usd1Mtokens(0.6),
1300
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
1301
free: true,
1302
},
1303
"mistral-medium-latest": {
1304
prompt_tokens: usd1Mtokens(0.4),
1305
completion_tokens: usd1Mtokens(2),
1306
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
1307
free: true,
1308
},
1309
"mistral-large-latest": {
1310
prompt_tokens: usd1Mtokens(2),
1311
completion_tokens: usd1Mtokens(6),
1312
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
1313
free: false,
1314
},
1315
"devstral-medium-2507": {
1316
prompt_tokens: usd1Mtokens(0.4),
1317
completion_tokens: usd1Mtokens(2),
1318
max_tokens: 8_000, // TODO don't know the real value, see getMaxTokens
1319
free: true,
1320
},
1321
// "magistral-medium-latest": {
1322
// prompt_tokens: usd1Mtokens(2),
1323
// completion_tokens: usd1Mtokens(5),
1324
// max_tokens: 8_000, // TODO don't know the real value, see getMaxTokens
1325
// free: false,
1326
// },
1327
// Anthropic: pricing somewhere on that page: https://www.anthropic.com/api
1328
"claude-3-opus-8k": {
1329
prompt_tokens: usd1Mtokens(15),
1330
completion_tokens: usd1Mtokens(75),
1331
max_tokens: 8_000, // limited to 8k tokens, to reduce the necessary spend limit to commit to
1332
free: false,
1333
},
1334
"claude-3-opus": {
1335
prompt_tokens: usd1Mtokens(15),
1336
completion_tokens: usd1Mtokens(75),
1337
max_tokens: 200_000,
1338
free: false,
1339
},
1340
"claude-3-5-sonnet": {
1341
prompt_tokens: usd1Mtokens(3),
1342
completion_tokens: usd1Mtokens(15),
1343
max_tokens: 200_000,
1344
free: false,
1345
},
1346
"claude-3-5-sonnet-4k": {
1347
prompt_tokens: usd1Mtokens(3),
1348
completion_tokens: usd1Mtokens(15),
1349
max_tokens: 4_000, // limited to 4k tokens
1350
free: false,
1351
},
1352
"claude-3-sonnet-4k": {
1353
prompt_tokens: usd1Mtokens(3),
1354
completion_tokens: usd1Mtokens(15),
1355
max_tokens: 4_000, // limited to 4k tokens, offered for free
1356
free: false,
1357
},
1358
"claude-3-sonnet": {
1359
prompt_tokens: usd1Mtokens(3),
1360
completion_tokens: usd1Mtokens(15),
1361
max_tokens: 200_000,
1362
free: false,
1363
},
1364
"claude-3-haiku-8k": {
1365
prompt_tokens: usd1Mtokens(0.8),
1366
completion_tokens: usd1Mtokens(4),
1367
max_tokens: 8_000, // limited to 8k tokens, offered for free
1368
free: true,
1369
},
1370
"claude-3-haiku": {
1371
prompt_tokens: usd1Mtokens(0.8),
1372
completion_tokens: usd1Mtokens(4),
1373
max_tokens: 8_000, // limited to 8k tokens, offered for free
1374
free: true,
1375
},
1376
"claude-3-5-haiku-8k": {
1377
prompt_tokens: usd1Mtokens(0.8),
1378
completion_tokens: usd1Mtokens(4),
1379
max_tokens: 8_000,
1380
free: true,
1381
},
1382
"claude-4-sonnet-8k": {
1383
prompt_tokens: usd1Mtokens(3),
1384
completion_tokens: usd1Mtokens(15),
1385
max_tokens: 8_000,
1386
free: false,
1387
},
1388
"claude-4-opus-8k": {
1389
prompt_tokens: usd1Mtokens(15),
1390
completion_tokens: usd1Mtokens(75),
1391
max_tokens: 8_000,
1392
free: false,
1393
},
1394
"claude-4-5-sonnet-8k": {
1395
prompt_tokens: usd1Mtokens(3),
1396
completion_tokens: usd1Mtokens(15),
1397
max_tokens: 8_000,
1398
free: false,
1399
},
1400
"claude-4-5-opus-8k": {
1401
prompt_tokens: usd1Mtokens(5),
1402
completion_tokens: usd1Mtokens(25),
1403
max_tokens: 8_000,
1404
free: false,
1405
},
1406
"claude-4-6-opus-8k": {
1407
prompt_tokens: usd1Mtokens(5),
1408
completion_tokens: usd1Mtokens(25),
1409
max_tokens: 8_000,
1410
free: false,
1411
},
1412
"claude-4-5-haiku-8k": {
1413
prompt_tokens: usd1Mtokens(1),
1414
completion_tokens: usd1Mtokens(5),
1415
max_tokens: 8_000,
1416
free: true,
1417
},
1418
"o3-8k": {
1419
prompt_tokens: usd1Mtokens(2),
1420
completion_tokens: usd1Mtokens(8),
1421
max_tokens: 8192,
1422
free: false,
1423
},
1424
o3: {
1425
prompt_tokens: usd1Mtokens(2),
1426
completion_tokens: usd1Mtokens(8),
1427
max_tokens: 128000,
1428
free: false,
1429
},
1430
"o4-mini-8k": {
1431
prompt_tokens: usd1Mtokens(1.1),
1432
completion_tokens: usd1Mtokens(4.4),
1433
max_tokens: 8192,
1434
free: false,
1435
},
1436
"o4-mini": {
1437
prompt_tokens: usd1Mtokens(1.1),
1438
completion_tokens: usd1Mtokens(4.4),
1439
max_tokens: 128000,
1440
free: false,
1441
},
1442
"gpt-5-8k": {
1443
prompt_tokens: usd1Mtokens(1.25),
1444
completion_tokens: usd1Mtokens(10),
1445
max_tokens: 8192,
1446
free: false,
1447
},
1448
"gpt-5": {
1449
prompt_tokens: usd1Mtokens(1.25),
1450
completion_tokens: usd1Mtokens(10),
1451
max_tokens: 128000,
1452
free: false,
1453
},
1454
"gpt-5.2-8k": {
1455
prompt_tokens: usd1Mtokens(1.25),
1456
completion_tokens: usd1Mtokens(10),
1457
max_tokens: 8192,
1458
free: false,
1459
},
1460
"gpt-5.2": {
1461
prompt_tokens: usd1Mtokens(1.25),
1462
completion_tokens: usd1Mtokens(10),
1463
max_tokens: 128000,
1464
free: false,
1465
},
1466
"gpt-5-mini-8k": {
1467
prompt_tokens: usd1Mtokens(0.25),
1468
completion_tokens: usd1Mtokens(2),
1469
max_tokens: 8192,
1470
free: true,
1471
},
1472
"gpt-5-mini": {
1473
prompt_tokens: usd1Mtokens(0.25),
1474
completion_tokens: usd1Mtokens(2),
1475
max_tokens: 128000,
1476
free: true,
1477
},
1478
// xAI (https://x.ai/)
1479
"grok-4-1-fast-non-reasoning-16k": {
1480
prompt_tokens: usd1Mtokens(0.2),
1481
completion_tokens: usd1Mtokens(0.5),
1482
max_tokens: 16_000,
1483
free: true,
1484
},
1485
"grok-4-1-fast-reasoning-16k": {
1486
prompt_tokens: usd1Mtokens(0.2),
1487
completion_tokens: usd1Mtokens(0.5),
1488
max_tokens: 16_000,
1489
free: true,
1490
},
1491
"grok-code-fast-1-16k": {
1492
prompt_tokens: usd1Mtokens(0.2),
1493
completion_tokens: usd1Mtokens(1.5),
1494
max_tokens: 16_000,
1495
free: true,
1496
},
1497
} as const;
1498
1499
// TODO: remove this test – it's only used server side, and that server side check should work for all known LLM models
1500
export function isValidModel(model?: string): boolean {
1501
if (model == null) return false;
1502
if (isUserDefinedModel(model)) return true;
1503
if (isOllamaLLM(model)) return true;
1504
if (isCustomOpenAI(model)) return true;
1505
if (isMistralModel(model)) return true;
1506
if (isGoogleModel(model)) return true;
1507
if (isXaiModel(model)) return true;
1508
return LLM_COST[model ?? ""] != null;
1509
}
1510
1511
export const FALLBACK_MAX_TOKENS = 8192;
1512
1513
// Overload 1: Just model string (existing signature)
1514
export function getMaxTokens(model?: LanguageModel): number;
1515
1516
// Overload 2: Model string + optional config
1517
export function getMaxTokens(
1518
model?: LanguageModel,
1519
config?: { max_tokens?: number },
1520
): number;
1521
1522
// Implementation
1523
export function getMaxTokens(
1524
model?: LanguageModel,
1525
config?: { max_tokens?: number },
1526
): number {
1527
// If config.max_tokens is provided, validate and use it
1528
if (config?.max_tokens != null) {
1529
const maxTokens = config.max_tokens;
1530
// Handle legacy string values and invalid numbers
1531
const num =
1532
typeof maxTokens === "number"
1533
? maxTokens
1534
: parseInt(String(maxTokens), 10);
1535
if (isNaN(num) || num <= 0) {
1536
return FALLBACK_MAX_TOKENS;
1537
}
1538
// Clamp to safe range
1539
return Math.max(1000, Math.min(2000000, num));
1540
}
1541
1542
// Existing logic
1543
if (isOllamaLLM(model)) return FALLBACK_MAX_TOKENS;
1544
return LLM_COST[model ?? ""]?.max_tokens ?? FALLBACK_MAX_TOKENS;
1545
}
1546
1547
export interface LLMCost {
1548
prompt_tokens: number;
1549
completion_tokens: number;
1550
}
1551
1552
export function getLLMCost(
1553
model: LanguageModelCore,
1554
markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3
1555
): LLMCost {
1556
const x = LLM_COST[model];
1557
if (x == null) {
1558
throw Error(`unknown model "${model}"`);
1559
}
1560
const { prompt_tokens, completion_tokens } = x;
1561
if (markup_percentage < 0) {
1562
throw Error("markup percentage can't be negative");
1563
}
1564
const f = 1 + markup_percentage / 100;
1565
return {
1566
prompt_tokens: prompt_tokens * f,
1567
completion_tokens: completion_tokens * f,
1568
};
1569
}
1570
1571
const priceRangeCache = new LRU<string, ReturnType<typeof getLLMPriceRange>>({
1572
max: 10,
1573
});
1574
1575
export function getLLMPriceRange(
1576
prompt: number,
1577
output: number,
1578
markup_percentage: number,
1579
): { min: number; max: number } {
1580
const cacheKey = `${prompt}::${output}::${markup_percentage}`;
1581
const cached = priceRangeCache.get(cacheKey);
1582
if (cached) return cached;
1583
1584
let min = Infinity;
1585
let max = 0;
1586
for (const key in LLM_COST) {
1587
const model = LLM_COST[key];
1588
if (!model || isFreeModel(key, true)) continue;
1589
const { prompt_tokens, completion_tokens } = getLLMCost(
1590
key as LanguageModelCore,
1591
markup_percentage,
1592
);
1593
const p = prompt * prompt_tokens + output * completion_tokens;
1594
1595
min = Math.min(min, p);
1596
max = Math.max(max, p);
1597
}
1598
const ret = { min, max };
1599
priceRangeCache.set(cacheKey, ret);
1600
return ret;
1601
}
1602
1603
// The maximum cost for one single call using the given model.
1604
// We can't know the cost until after it happens, so this bound is useful for
1605
// ensuring user can afford to make a call.
1606
export function getMaxCost(
1607
model: LanguageModelCore,
1608
markup_percentage: number,
1609
): number {
1610
const { prompt_tokens, completion_tokens } = getLLMCost(
1611
model,
1612
markup_percentage,
1613
);
1614
const { max_tokens } = LLM_COST[model];
1615
return Math.max(prompt_tokens, completion_tokens) * max_tokens;
1616
}
1617
1618
/**
1619
* Initially, we just had one system promt for all LLMs.
1620
* This was tuned for the ChatGPTs by OpenAI, but breaks down for others.
1621
* For example, Gemini and Mistral are confused by mentioning "CoCalc" and insert code cells for all kinds of questions.
1622
*/
1623
export function getSystemPrompt(
1624
model: LanguageModel,
1625
_path: string | undefined,
1626
) {
1627
// TODO: for now, path is ignored. We might want to use it to customize the prompt in the future.
1628
const common = "Be brief.";
1629
const math = "Enclose any math formulas in $.";
1630
1631
if (
1632
model2vendor(model).name === "openai" ||
1633
model.startsWith(OPENAI_PREFIX)
1634
) {
1635
const mdCode =
1636
"Include the language directly after the triple backticks in all markdown code blocks.";
1637
return `Assume full access to CoCalc and using CoCalc right now.\n${mdCode}\n${math}\n${common}`;
1638
}
1639
1640
// mistral stupidly inserts anything mentioned in the prompt as examples, always.
1641
if (
1642
model2vendor(model).name === "mistralai" ||
1643
model.startsWith(MISTRAL_PREFIX)
1644
) {
1645
return common;
1646
}
1647
1648
if (
1649
model2vendor(model).name === "google" ||
1650
model.startsWith(GOOGLE_PREFIX)
1651
) {
1652
return `${math}\n${common}`;
1653
}
1654
1655
if (
1656
model2vendor(model).name === "ollama" ||
1657
model.startsWith(OLLAMA_PREFIX)
1658
) {
1659
return `${common}`;
1660
}
1661
1662
if (
1663
model2vendor(model).name === "anthropic" ||
1664
model.startsWith(ANTHROPIC_PREFIX)
1665
) {
1666
return `${math}\n${common}`;
1667
}
1668
1669
if (model2vendor(model).name === "xai" || model.startsWith(XAI_PREFIX)) {
1670
return `${math}\n${common}`;
1671
}
1672
1673
const mdCode = `Any code blocks in triple backticks should mention the language after the first backticks. For example \`\`\`python\nprint("Hello, World!")\n\`\`\``;
1674
return `${mdCode}\n${math}\n${common}`;
1675
}
1676
1677