CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/util/db-schema/llm-utils.ts
Views: 687
1
// this contains bits and pieces from the wrongly named openai.ts file
2
3
import { isEmpty } from "lodash";
4
import LRU from "lru-cache";
5
6
import { unreachable } from "@cocalc/util/misc";
7
8
// these can be defined by admins and users
9
export const SERVICES = [
10
"openai",
11
"google",
12
"mistralai", // the "*ai" is deliberately, because their model names start with "mistral-..." and we have to distinguish it from the prefix
13
"anthropic",
14
"ollama",
15
"custom_openai",
16
] as const;
17
18
// a "user-*" model is a wrapper for all the model services
19
export const LANGUAGE_MODEL_SERVICES = [...SERVICES, "user"] as const;
20
21
export type UserDefinedLLMService = (typeof SERVICES)[number];
22
23
export function isUserDefinedModelType(
24
model: unknown,
25
): model is UserDefinedLLMService {
26
return SERVICES.includes(model as any);
27
}
28
29
// "User LLMs" are defined in the user's account settings.
30
// They query an external LLM service of given type, endpoint, and API key.
31
export interface UserDefinedLLM {
32
id: number; // a unique number
33
service: UserDefinedLLMService;
34
model: string; // non-empty string
35
display: string; // short user-visible string
36
endpoint: string; // URL to the LLM service
37
apiKey: string;
38
icon?: string; // https://.../...png
39
}
40
41
export const USER_LLM_PREFIX = "user-";
42
43
// This basically prefixes the "model" defined by the user with the USER and service prefix.
44
// We do not use the to*() functions, because the names of the models could be arbitrary – for each service
45
export function toUserLLMModelName(llm: UserDefinedLLM) {
46
const { service } = llm;
47
const model: string = (() => {
48
switch (service) {
49
case "custom_openai":
50
return `${CUSTOM_OPENAI_PREFIX}${llm.model}`;
51
case "ollama":
52
return toOllamaModel(llm.model);
53
case "anthropic":
54
return `${ANTHROPIC_PREFIX}${llm.model}`;
55
case "google":
56
return `${GOOGLE_PREFIX}${llm.model}`;
57
case "mistralai":
58
return `${MISTRAL_PREFIX}${llm.model}`;
59
case "openai":
60
return `${OPENAI_PREFIX}${llm.model}`;
61
default:
62
unreachable(service);
63
throw new Error(
64
`toUserLLMModelName of service ${service} not supported`,
65
);
66
}
67
})();
68
return `${USER_LLM_PREFIX}${model}`;
69
}
70
71
export function fromUserDefinedLLMModel(m: string): string | null {
72
if (isUserDefinedModel(m)) {
73
return m.slice(USER_LLM_PREFIX.length);
74
}
75
return null;
76
}
77
78
export function isUserDefinedModel(model: unknown): boolean {
79
if (typeof model !== "string") return false;
80
if (model.startsWith(USER_LLM_PREFIX)) {
81
const m2 = model.slice(USER_LLM_PREFIX.length);
82
return SERVICES.some((svc) => m2.startsWith(`${svc}-`));
83
}
84
return false;
85
}
86
87
export function unpackUserDefinedLLMModel(model: string): {
88
service: UserDefinedLLMService;
89
model: string;
90
} | null {
91
const um = fromUserDefinedLLMModel(model);
92
if (um === null) return null;
93
for (const service of SERVICES) {
94
if (um.startsWith(`${service}-`)) {
95
return { service, model: um.slice(service.length + 1) };
96
}
97
}
98
return null;
99
}
100
101
export const OPENAI_PREFIX = "openai-";
102
103
// NOTE: all arrays of model names should order them by the "simples and fastest" to the "complex, slowest, most expensive"
104
// that way, the ordering the UI isn't looking arbitrary, but has a clear logic
105
106
export const MODELS_OPENAI = [
107
"gpt-3.5-turbo",
108
"gpt-4o-mini-8k", // context limited
109
"gpt-4o-mini", // Released 2024-07-18
110
"gpt-4o-8k", // context limited, similar to gpt-4-turbo-8k
111
"gpt-4o", // Released 2024-05-13
112
// the "preview" variants are disabled, because the preview is over
113
"gpt-4-turbo-preview-8k", // like below, but artificially limited to 8k tokens
114
"gpt-4-turbo-preview",
115
"gpt-4-turbo-8k", // Released 2024-04-11
116
"gpt-4-turbo",
117
"gpt-4",
118
"gpt-4-32k",
119
"gpt-3.5-turbo-16k",
120
"text-embedding-ada-002", // TODO: this is for embeddings, should be moved to a different place
121
] as const;
122
123
export type OpenAIModel = (typeof MODELS_OPENAI)[number];
124
125
export function isOpenAIModel(model: unknown): model is OpenAIModel {
126
return MODELS_OPENAI.includes(model as any);
127
}
128
129
// ATTN: when you modify this list, also change frontend/.../llm/llm-selector.tsx!
130
export const MISTRAL_MODELS = [
131
// yes, all 3 of them have an extra mistral-prefix, on top of the vendor prefix
132
"mistral-small-latest",
133
"mistral-medium-latest", // Deprecated!
134
"mistral-large-latest",
135
] as const;
136
137
export type MistralModel = (typeof MISTRAL_MODELS)[number];
138
139
export function isMistralModel(model: unknown): model is MistralModel {
140
return MISTRAL_MODELS.includes(model as any);
141
}
142
143
// google's are taken from here – we use the generative AI client lib
144
// https://developers.generativeai.google/models/language
145
// $ curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=$GOOGLE_GENAI" | jq
146
export const GOOGLE_MODELS = [
147
"gemini-1.5-flash-8k", // introduced 2024-05-15
148
"gemini-pro",
149
"gemini-1.0-ultra", // hangs
150
"gemini-1.5-pro-8k", // works now with langchaing
151
"gemini-1.5-pro", // works now with langchaing
152
] as const;
153
export type GoogleModel = (typeof GOOGLE_MODELS)[number];
154
export function isGoogleModel(model: unknown): model is GoogleModel {
155
return GOOGLE_MODELS.includes(model as any);
156
}
157
export const GOOGLE_MODEL_TO_ID: Partial<{ [m in GoogleModel]: string }> = {
158
"gemini-1.5-pro": "gemini-1.5-pro-latest",
159
"gemini-1.5-pro-8k": "gemini-1.5-pro-latest",
160
"gemini-1.5-flash-8k": "gemini-1.5-flash-latest",
161
} as const;
162
163
// https://docs.anthropic.com/claude/docs/models-overview -- stable names for the modesl ...
164
export const ANTHROPIC_MODELS = [
165
"claude-3-5-sonnet",
166
"claude-3-5-sonnet-4k", // added 2024-06-24
167
"claude-3-haiku",
168
"claude-3-haiku-8k", // limited context window, offered for free
169
"claude-3-sonnet",
170
"claude-3-sonnet-4k", // limited context window, offered for free
171
"claude-3-opus-8k", // same issue as the large GPT models, limit the context window to limit spending
172
"claude-3-opus",
173
] as const;
174
const CLAUDE_SONNET_VERSION = "20240229";
175
const CLAUDE_HAIKU_VERSION = "20240307";
176
const CLAUDE_OPUS_VERSION = "20240229";
177
const CLAUDE_SONNET_3_5_VERSION = "20240620";
178
// ... and we add a version number (there is no "*-latest") when dispatching on the backend
179
export const ANTHROPIC_VERSION: { [name in AnthropicModel]: string } = {
180
"claude-3-sonnet-4k": CLAUDE_SONNET_VERSION,
181
"claude-3-opus": CLAUDE_OPUS_VERSION,
182
"claude-3-opus-8k": CLAUDE_OPUS_VERSION,
183
"claude-3-sonnet": CLAUDE_SONNET_VERSION,
184
"claude-3-5-sonnet": CLAUDE_SONNET_3_5_VERSION,
185
"claude-3-5-sonnet-4k": CLAUDE_SONNET_3_5_VERSION,
186
"claude-3-haiku": CLAUDE_HAIKU_VERSION,
187
"claude-3-haiku-8k": CLAUDE_HAIKU_VERSION,
188
} as const;
189
export const ANTHROPIC_PREFIX = "anthropic-";
190
export type AnthropicModel = (typeof ANTHROPIC_MODELS)[number];
191
type AnthropicService = `${typeof ANTHROPIC_PREFIX}${AnthropicModel}`;
192
export function isAnthropicModel(model: unknown): model is AnthropicModel {
193
return ANTHROPIC_MODELS.includes(model as any);
194
}
195
export function toAnthropicService(model: AnthropicModel): AnthropicService {
196
return `${ANTHROPIC_PREFIX}${model}`;
197
}
198
export function isAnthropicService(
199
service: string,
200
): service is AnthropicService {
201
return service.startsWith(ANTHROPIC_PREFIX);
202
}
203
export function fromAnthropicService(
204
service: AnthropicService,
205
): AnthropicModel {
206
if (!isAnthropicService(service)) {
207
throw new Error(`not a mistral service: ${service}`);
208
}
209
return service.slice(ANTHROPIC_PREFIX.length) as AnthropicModel;
210
}
211
212
// the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects
213
export const LANGUAGE_MODELS = [
214
...MODELS_OPENAI,
215
...MISTRAL_MODELS,
216
...GOOGLE_MODELS,
217
...ANTHROPIC_MODELS,
218
] as const;
219
220
export const USER_SELECTABLE_LLMS_BY_VENDOR: {
221
[vendor in LLMServiceName]: Readonly<LanguageModelCore[]>;
222
} = {
223
openai: MODELS_OPENAI.filter(
224
(m) =>
225
m === "gpt-4" ||
226
m === "gpt-4-turbo-preview-8k" ||
227
m === "gpt-4o-8k" ||
228
m === "gpt-4o-mini-8k",
229
),
230
google: GOOGLE_MODELS.filter(
231
(m) =>
232
// we only enable the 1.0, 1.5 pro and 1.5 flash with a limited context window
233
m === "gemini-pro" ||
234
m === "gemini-1.5-pro-8k" ||
235
m === "gemini-1.5-flash-8k",
236
),
237
mistralai: MISTRAL_MODELS.filter((m) => m !== "mistral-medium-latest"),
238
anthropic: ANTHROPIC_MODELS.filter((m) => {
239
// we show opus and the context restricted models (to avoid high costs)
240
return (
241
m === "claude-3-opus-8k" ||
242
m === "claude-3-5-sonnet-4k" ||
243
m === "claude-3-haiku-8k"
244
);
245
}),
246
ollama: [], // this is empty, because these models are not hardcoded
247
custom_openai: [], // this is empty, because these models are not hardcoded]
248
user: [],
249
} as const;
250
251
// This hardcodes which models can be selected by users – refine this by setting site_settings.selectable_llms!
252
// Make sure to update this when adding new models.
253
// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx
254
export const USER_SELECTABLE_LANGUAGE_MODELS = [
255
...USER_SELECTABLE_LLMS_BY_VENDOR.openai,
256
...USER_SELECTABLE_LLMS_BY_VENDOR.google,
257
...USER_SELECTABLE_LLMS_BY_VENDOR.mistralai,
258
...USER_SELECTABLE_LLMS_BY_VENDOR.anthropic,
259
] as const;
260
261
export type OllamaLLM = string;
262
export type CustomOpenAI = string;
263
264
// use the one without Ollama to get stronger typing. Ollama could be any string starting with the OLLAMA_PREFIX.
265
export type LanguageModelCore = (typeof LANGUAGE_MODELS)[number];
266
export type LanguageModel = LanguageModelCore | OllamaLLM;
267
export function isCoreLanguageModel(
268
model: unknown,
269
): model is LanguageModelCore {
270
if (typeof model !== "string") return false;
271
return LANGUAGE_MODELS.includes(model as any);
272
}
273
274
// we check if the given object is any known language model
275
export function isLanguageModel(model?: unknown): model is LanguageModel {
276
if (model == null) return false;
277
if (typeof model !== "string") return false;
278
if (isOllamaLLM(model)) return true;
279
if (isCustomOpenAI(model)) return true;
280
if (isUserDefinedModel(model)) return true; // this also checks, if there is a valid model inside
281
return LANGUAGE_MODELS.includes(model as any);
282
}
283
284
export type LLMServiceName = (typeof LANGUAGE_MODEL_SERVICES)[number];
285
286
export function isLLMServiceName(service: unknown): service is LLMServiceName {
287
if (typeof service !== "string") return false;
288
return LANGUAGE_MODEL_SERVICES.includes(service as any);
289
}
290
291
export type LLMServicesAvailable = Record<LLMServiceName, boolean>;
292
293
interface LLMService {
294
name: string;
295
short: string; // additional short text next to the company name
296
desc: string; // more detailed description
297
url: string;
298
}
299
300
export const LLM_PROVIDER: { [key in LLMServiceName]: LLMService } = {
301
openai: {
302
name: "OpenAI",
303
short: "AI research and deployment company",
304
desc: "OpenAI is an AI research and deployment company. Their mission is to ensure that artificial general intelligence benefits all of humanity.",
305
url: "https://openai.com/",
306
},
307
google: {
308
name: "Google",
309
short: "Technology company",
310
desc: "Google's mission is to organize the world's information and make it universally accessible and useful.",
311
url: "https://gemini.google.com/",
312
},
313
anthropic: {
314
name: "Anthropic",
315
short: "AI research company",
316
desc: "Anthropic is an American artificial intelligence (AI) startup company, founded by former members of OpenAI.",
317
url: "https://www.anthropic.com/",
318
},
319
mistralai: {
320
name: "Mistral AI",
321
short: "French AI company",
322
desc: "Mistral AI is a French company selling artificial intelligence (AI) products.",
323
url: "https://mistral.ai/",
324
},
325
ollama: {
326
name: "Ollama",
327
short: "Open-source software",
328
desc: "Ollama language model server at a custom API endpoint.",
329
url: "https://ollama.com/",
330
},
331
custom_openai: {
332
name: "OpenAI API",
333
short: "Custom endpoint",
334
desc: "Calls a custom OpenAI API endoint.",
335
url: "https://js.langchain.com/v0.1/docs/integrations/llms/openai/",
336
},
337
user: {
338
name: "User Defined",
339
short: "Account → Language Model",
340
desc: "Defined by the user in Account Settings → Language Model",
341
url: "",
342
},
343
} as const;
344
345
interface ValidLanguageModelNameProps {
346
model: string | undefined;
347
filter: LLMServicesAvailable;
348
ollama: string[]; // keys of ollama models
349
custom_openai: string[]; // keys of custom openai models
350
selectable_llms: string[]; // either empty, or an array stored in the server settings
351
}
352
353
// NOTE: these values must be in sync with the "no" vals in db-schema/site-defaults.ts
354
const DEFAULT_FILTER: Readonly<LLMServicesAvailable> = {
355
openai: false,
356
google: false,
357
ollama: false,
358
mistralai: false,
359
anthropic: false,
360
custom_openai: false,
361
user: false,
362
} as const;
363
364
// this is used in initialization functions. e.g. to get a default model depending on the overall availability
365
// usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available,
366
// then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc.
367
export function getValidLanguageModelName({
368
model,
369
filter = DEFAULT_FILTER,
370
ollama,
371
custom_openai,
372
selectable_llms,
373
}: ValidLanguageModelNameProps): LanguageModel {
374
if (typeof model === "string" && isValidModel(model)) {
375
try {
376
if (isCoreLanguageModel(model)) {
377
const v = model2vendor(model).name;
378
if (filter[v] && selectable_llms.includes(model)) {
379
return model;
380
}
381
}
382
383
if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) {
384
return model;
385
}
386
387
if (
388
isCustomOpenAI(model) &&
389
custom_openai.includes(fromCustomOpenAIModel(model))
390
) {
391
return model;
392
}
393
394
if (isUserDefinedModel(model)) {
395
return model;
396
}
397
} catch {}
398
}
399
400
for (const free of [true, false]) {
401
const dflt = getDefaultLLM(
402
selectable_llms,
403
filter,
404
ollama,
405
custom_openai,
406
free,
407
);
408
if (dflt != null) {
409
return dflt;
410
}
411
}
412
return DEFAULT_MODEL;
413
}
414
415
export const DEFAULT_LLM_PRIORITY: Readonly<UserDefinedLLMService[]> = [
416
"google",
417
"openai",
418
"anthropic",
419
"mistralai",
420
"ollama",
421
"custom_openai",
422
] as const;
423
424
export function getDefaultLLM(
425
selectable_llms: string[],
426
filter: LLMServicesAvailable,
427
ollama?: { [key: string]: any },
428
custom_openai?: { [key: string]: any },
429
only_free = true,
430
): LanguageModel {
431
for (const v of DEFAULT_LLM_PRIORITY) {
432
if (!filter[v]) continue;
433
for (const m of USER_SELECTABLE_LLMS_BY_VENDOR[v]) {
434
if (selectable_llms.includes(m)) {
435
const isFree = LLM_COST[m].free ?? true;
436
if ((only_free && isFree) || !only_free) {
437
return m;
438
}
439
}
440
}
441
}
442
// none of the standard models, pick the first ollama or custom_openai
443
if (ollama != null && !isEmpty(ollama)) {
444
return toOllamaModel(Object.keys(ollama)[0]);
445
}
446
if (custom_openai != null && !isEmpty(custom_openai)) {
447
return toCustomOpenAIModel(Object.keys(custom_openai)[0]);
448
}
449
return DEFAULT_MODEL;
450
}
451
452
export interface OpenAIMessage {
453
role: "system" | "user" | "assistant";
454
content: string;
455
}
456
export type OpenAIMessages = OpenAIMessage[];
457
458
export const OLLAMA_PREFIX = "ollama-";
459
export type OllamaService = string;
460
export function isOllamaService(service: string): service is OllamaService {
461
return isOllamaLLM(service);
462
}
463
464
export const CUSTOM_OPENAI_PREFIX = "custom_openai-";
465
export type CustomOpenAIService = string;
466
export function isCustomOpenAIService(
467
service: string,
468
): service is CustomOpenAIService {
469
return isCustomOpenAI(service);
470
}
471
472
export const MISTRAL_PREFIX = "mistralai-";
473
export type MistralService = `${typeof MISTRAL_PREFIX}${MistralModel}`;
474
export function isMistralService(service: string): service is MistralService {
475
return service.startsWith(MISTRAL_PREFIX);
476
}
477
478
export const GOOGLE_PREFIX = "google-";
479
480
// we encode the in the frontend and elsewhere with the service name as a prefix
481
// ATTN: don't change the encoding pattern of [vendor]-[model]
482
// for whatever reason, it's also described that way in purchases/close.ts
483
export type LanguageServiceCore =
484
| `${typeof OPENAI_PREFIX}${OpenAIModel}`
485
| `${typeof GOOGLE_PREFIX}${
486
| "text-bison-001"
487
| "chat-bison-001"
488
| "embedding-gecko-001"}`
489
| `${typeof GOOGLE_PREFIX}${GoogleModel}`
490
| AnthropicService
491
| MistralService;
492
493
export type LanguageService =
494
| LanguageServiceCore
495
| OllamaService
496
| CustomOpenAIService;
497
498
// used e.g. for checking "account-id={string}" and other things like that
499
export const LANGUAGE_MODEL_PREFIXES = [
500
"chatgpt",
501
...LANGUAGE_MODEL_SERVICES.map((v) => `${v}-`),
502
] as const;
503
504
// we encode the in the frontend and elsewhere with the service name as a prefix
505
export function model2service(model: LanguageModel): LanguageService {
506
if (model === "text-embedding-ada-002") {
507
return `${OPENAI_PREFIX}${model}`;
508
}
509
if (
510
isOllamaLLM(model) ||
511
isCustomOpenAI(model) ||
512
isUserDefinedModel(model)
513
) {
514
return model; // already has a useful prefix
515
}
516
if (isMistralModel(model)) {
517
return toMistralService(model);
518
}
519
if (isAnthropicModel(model)) {
520
return toAnthropicService(model);
521
}
522
if (isLanguageModel(model)) {
523
if (
524
model === "text-bison-001" ||
525
model === "chat-bison-001" ||
526
model === "embedding-gecko-001" ||
527
isGoogleModel(model)
528
) {
529
return `${GOOGLE_PREFIX}${model}`;
530
} else {
531
return `${OPENAI_PREFIX}${model}`;
532
}
533
}
534
535
throw new Error(`unknown model: ${model}`);
536
}
537
538
// inverse of model2service, but robust for chat avatars, which might not have a prefix
539
// TODO: fix the mess
540
export function service2model(
541
service: LanguageService | "chatgpt",
542
): LanguageModel {
543
if (service === "chatgpt") {
544
return "gpt-3.5-turbo";
545
}
546
const lm = service2model_core(service);
547
if (lm == null) {
548
// We don't throw an error, since the frontend would crash
549
// throw new Error(`unknown service: ${service}`);
550
console.warn(`service2model: unknown service: ${service}`);
551
return "gpt-3.5-turbo";
552
}
553
return lm;
554
}
555
556
export function service2model_core(
557
service: LanguageService,
558
): LanguageModel | null {
559
// split off the first part of service, e.g., "openai-" or "google-"
560
const s = service.split("-")[0];
561
const hasPrefix = LANGUAGE_MODEL_SERVICES.some((v) => s === v);
562
563
if (isUserDefinedModel(service)) {
564
return service;
565
}
566
567
const m = hasPrefix ? service.split("-").slice(1).join("-") : service;
568
if (hasPrefix) {
569
// we add the trailing "-" to match with these prefixes, which include the "-"
570
switch (`${s}-`) {
571
case OLLAMA_PREFIX:
572
return toOllamaModel(m);
573
case CUSTOM_OPENAI_PREFIX:
574
return toCustomOpenAIModel(m);
575
}
576
}
577
578
if (LANGUAGE_MODELS.includes(m as any)) {
579
return m;
580
}
581
return null;
582
}
583
584
// NOTE: do not use this – instead use server_settings.default_llm
585
export const DEFAULT_MODEL: LanguageModel = "gemini-1.5-flash-8k";
586
587
interface LLMVendor {
588
name: LLMServiceName;
589
url: string;
590
}
591
592
export function model2vendor(model): LLMVendor {
593
if (isUserDefinedModel(model)) {
594
return { name: "user", url: "" };
595
} else if (isOllamaLLM(model)) {
596
return { name: "ollama", url: LLM_PROVIDER.ollama.url };
597
} else if (isCustomOpenAI(model)) {
598
return {
599
name: "custom_openai",
600
url: LLM_PROVIDER.custom_openai.url,
601
};
602
} else if (isMistralModel(model)) {
603
return { name: "mistralai", url: LLM_PROVIDER.mistralai.url };
604
} else if (isOpenAIModel(model)) {
605
return { name: "openai", url: LLM_PROVIDER.openai.url };
606
} else if (isGoogleModel(model)) {
607
return { name: "google", url: LLM_PROVIDER.google.url };
608
} else if (isAnthropicModel(model)) {
609
return { name: "anthropic", url: LLM_PROVIDER.anthropic.url };
610
}
611
612
throw new Error(`model2vendor: unknown model: "${model}"`);
613
}
614
615
// wraps the model name in an object that indicates that it's an ollama model
616
// TODO: maybe it will be necessary at some point to pass in the list of available ollama models
617
// TODO: in the future, this object will also contain info like the max tokens and other parameters (from the DB)
618
export function toOllamaModel(model: string): OllamaLLM {
619
if (isOllamaLLM(model)) {
620
throw new Error(`already an ollama model: ${model}`);
621
}
622
return `${OLLAMA_PREFIX}${model}`;
623
}
624
625
// unwraps the model name from an object that indicates that it's an ollama model
626
export function fromOllamaModel(model: OllamaLLM) {
627
if (!isOllamaLLM(model)) {
628
throw new Error(`not an ollama model: ${model}`);
629
}
630
return model.slice(OLLAMA_PREFIX.length);
631
}
632
633
export function isOllamaLLM(model: unknown): model is OllamaLLM {
634
return (
635
typeof model === "string" &&
636
model.startsWith(OLLAMA_PREFIX) &&
637
model.length > OLLAMA_PREFIX.length
638
);
639
}
640
641
export function toCustomOpenAIModel(model: string): CustomOpenAI {
642
if (isCustomOpenAI(model)) {
643
throw new Error(`already a custom openai model: ${model}`);
644
}
645
return `${CUSTOM_OPENAI_PREFIX}${model}`;
646
}
647
648
export function isCustomOpenAI(model: unknown): model is CustomOpenAI {
649
return (
650
typeof model === "string" &&
651
model.startsWith(CUSTOM_OPENAI_PREFIX) &&
652
model.length > CUSTOM_OPENAI_PREFIX.length
653
);
654
}
655
656
export function fromCustomOpenAIModel(model: CustomOpenAI) {
657
if (!isCustomOpenAI(model)) {
658
throw new Error(`not a custom openai model: ${model}`);
659
}
660
return model.slice(CUSTOM_OPENAI_PREFIX.length);
661
}
662
663
export function toMistralService(model: string): MistralService {
664
if (isMistralService(model)) {
665
throw new Error(`already a mistral model: ${model}`);
666
}
667
if (!isMistralModel(model)) {
668
throw new Error(`not a mistral model: ${model}`);
669
}
670
return `${MISTRAL_PREFIX}${model}`;
671
}
672
673
export function fromMistralService(model: MistralService) {
674
if (!isMistralService(model)) {
675
throw new Error(`not a mistral model: ${model}`);
676
}
677
return model.slice(MISTRAL_PREFIX.length);
678
}
679
680
type LLM2String = {
681
[key in
682
| (typeof USER_SELECTABLE_LANGUAGE_MODELS)[number]
683
| "chatgpt" // some additional ones, backwards compatibility
684
| "chatgpt3"
685
| "chatgpt4"
686
| "gpt-4-32k"
687
| "text-bison-001"
688
| "chat-bison-001"]: string;
689
};
690
691
// Map from psuedo account_id to what should be displayed to user.
692
// This is used in various places in the frontend.
693
// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
694
export const LLM_USERNAMES: LLM2String = {
695
chatgpt: "GPT-3.5",
696
chatgpt3: "GPT-3.5",
697
chatgpt4: "GPT-4",
698
"gpt-4": "GPT-4",
699
"gpt-4-32k": "GPT-4-32k",
700
"gpt-3.5-turbo": "GPT-3.5",
701
"gpt-3.5-turbo-16k": "GPT-3.5-16k",
702
"gpt-4-turbo-preview": "GPT-4 Turbo 128k",
703
"gpt-4-turbo-preview-8k": "GPT-4 Turbo",
704
"gpt-4-turbo": "GPT-4 Turbo 128k",
705
"gpt-4-turbo-8k": "GPT-4 Turbo",
706
"gpt-4o": "GPT-4o 128k",
707
"gpt-4o-8k": "GPT-4o",
708
"gpt-4o-mini": "GPT-4o Mini 128k",
709
"gpt-4o-mini-8k": "GPT-4o Mini",
710
"text-embedding-ada-002": "Text Embedding Ada 002", // TODO: this is for embeddings, should be moved to a different place
711
"text-bison-001": "PaLM 2",
712
"chat-bison-001": "PaLM 2",
713
"gemini-pro": "Gemini 1.0 Pro",
714
"gemini-1.0-ultra": "Gemini 1.0 Ultra",
715
"gemini-1.5-pro": "Gemini 1.5 Pro 1m",
716
"gemini-1.5-pro-8k": "Gemini 1.5 Pro",
717
"gemini-1.5-flash-8k": "Gemini 1.5 Flash",
718
"mistral-small-latest": "Mistral AI Small",
719
"mistral-medium-latest": "Mistral AI Medium",
720
"mistral-large-latest": "Mistral AI Large",
721
"claude-3-haiku": "Claude 3 Haiku 200k",
722
"claude-3-haiku-8k": "Claude 3 Haiku",
723
"claude-3-sonnet": "Claude 3 Sonnet 200k",
724
"claude-3-sonnet-4k": "Claude 3 Sonnet",
725
"claude-3-5-sonnet": "Claude 3.5 Sonnet 200k",
726
"claude-3-5-sonnet-4k": "Claude 3.5 Sonnet",
727
"claude-3-opus": "Claude 3 Opus 200k",
728
"claude-3-opus-8k": "Claude 3 Opus",
729
} as const;
730
731
// similar to the above, we map to short user-visible description texts
732
// this comes next to the name, hence you do not have to mention the name
733
export const LLM_DESCR: LLM2String = {
734
chatgpt: "Fast, great for everyday tasks. (OpenAI, 4k token context)",
735
chatgpt3: "Fast, great for everyday tasks. (OpenAI, 4k token context)",
736
chatgpt4:
737
"Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
738
"gpt-4":
739
"Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
740
"gpt-4-32k": "",
741
"gpt-3.5-turbo": "Fast, great for everyday tasks. (OpenAI, 4k token context)",
742
"gpt-3.5-turbo-16k": `Same as ${LLM_USERNAMES["gpt-3.5-turbo"]} but with larger 16k token context`,
743
"gpt-4-turbo-preview-8k":
744
"More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",
745
"gpt-4-turbo-preview":
746
"Like GPT-4 Turbo, but with up to 128k token context",
747
"gpt-4-turbo-8k":
748
"Faster, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",
749
"gpt-4-turbo": "Like GPT-4 Turbo, but with up to 128k token context",
750
"gpt-4o-8k":
751
"Most powerful, fastest, and cheapest (OpenAI, 8k token context)",
752
"gpt-4o": "Most powerful fastest, and cheapest (OpenAI, 128k token context)",
753
"gpt-4o-mini-8k":
754
"Most cost-efficient small model (OpenAI, 8k token context)",
755
"gpt-4o-mini": "Most cost-efficient small model (OpenAI, 128k token context)",
756
"text-embedding-ada-002": "Text embedding Ada 002 by OpenAI", // TODO: this is for embeddings, should be moved to a different place
757
"text-bison-001": "",
758
"chat-bison-001": "",
759
"gemini-pro":
760
"Google's Gemini 1.0 Pro Generative AI model (30k token context)",
761
"gemini-1.0-ultra":
762
"Google's Gemini 1.0 Ultra Generative AI model (30k token context)",
763
"gemini-1.5-pro":
764
"Google's Gemini 1.5 Pro Generative AI model (1m token context)",
765
"gemini-1.5-pro-8k":
766
"Google's Gemini 1.5 Pro Generative AI model (8k token context)",
767
"gemini-1.5-flash-8k":
768
"Google's Gemini 1.5 Flash Generative AI model (8k token context)",
769
"mistral-small-latest":
770
"Fast, simple queries, short answers, less capabilities. (Mistral AI, 4k token context)",
771
"mistral-medium-latest":
772
"Intermediate tasks, summarizing, generating documents, etc. (Mistral AI, 4k token context)",
773
"mistral-large-latest":
774
"Most powerful, large reasoning capabilities, but slower. (Mistral AI, 4k token context)",
775
"claude-3-haiku":
776
"Fastest model, lightweight actions (Anthropic, 200k token context)",
777
"claude-3-haiku-8k":
778
"Fastest model, lightweight actions (Anthropic, 8k token context)",
779
"claude-3-5-sonnet":
780
"Our most intelligent model (Anthropic, 200k token context)",
781
"claude-3-5-sonnet-4k":
782
"Our most intelligent model (Anthropic, 4k token context)",
783
"claude-3-sonnet":
784
"Best combination of performance and speed (Anthropic, 200k token context)",
785
"claude-3-sonnet-4k":
786
"Best combination of performance and speed (Anthropic, 4k token context)",
787
"claude-3-opus":
788
"Excels at writing and complex tasks (Anthropic, 200k token context)",
789
"claude-3-opus-8k":
790
"Excels at writing and complex tasks (Anthropic, 8k token context)",
791
} as const;
792
793
export function isFreeModel(model: unknown, isCoCalcCom: boolean): boolean {
794
if (!isCoCalcCom) return true;
795
if (isUserDefinedModel(model)) return true;
796
if (isOllamaLLM(model)) return true;
797
if (isCustomOpenAI(model)) return true;
798
if (typeof model === "string" && LANGUAGE_MODELS.includes(model as any)) {
799
// i.e. model is now of type CoreLanguageModel and
800
const costInfo = LLM_COST[model];
801
if (costInfo != null) {
802
return costInfo.free;
803
}
804
}
805
// all others are free (this should actually never happen, but we're cautious)
806
return true;
807
}
808
809
// this is used in purchases/get-service-cost
810
// we only need to check for the vendor prefixes, no special cases!
811
export function isLanguageModelService(
812
service: string,
813
): service is LanguageService {
814
if (isUserDefinedModel(service)) return true;
815
for (const v of LANGUAGE_MODEL_SERVICES) {
816
if (service.startsWith(`${v}-`)) {
817
return true;
818
}
819
}
820
return false;
821
}
822
823
export function getLLMServiceStatusCheckMD(service: LLMServiceName): string {
824
switch (service) {
825
case "openai":
826
return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`;
827
case "google":
828
return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`;
829
case "ollama":
830
return `No status information for Ollama available.`;
831
case "custom_openai":
832
return `No status information for Custom OpenAI available.`;
833
case "mistralai":
834
return `No status information for Mistral AI available.`;
835
case "anthropic":
836
return `Anthropic [status](https://status.anthropic.com/).`;
837
case "user":
838
return `No status information for user defined model available.`;
839
default:
840
unreachable(service);
841
}
842
return "";
843
}
844
845
interface Cost {
846
prompt_tokens: number;
847
completion_tokens: number;
848
max_tokens: number;
849
free: boolean; // whether this model has a metered paid usage, or offered for free
850
}
851
852
// price per token for a given price of USD per 1M tokens
853
function usd1Mtokens(usd: number): number {
854
return usd / 1_000_000;
855
}
856
857
// This is the official published cost that openai charges.
858
// It changes over time, so this will sometimes need to be updated.
859
// Our cost is a configurable multiple of this.
860
// https://openai.com/pricing#language-models
861
// There appears to be no api that provides the prices, unfortunately.
862
export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
863
"gpt-4": {
864
prompt_tokens: usd1Mtokens(30),
865
completion_tokens: usd1Mtokens(60),
866
max_tokens: 8192,
867
free: false,
868
},
869
"gpt-4-32k": {
870
prompt_tokens: usd1Mtokens(60),
871
completion_tokens: usd1Mtokens(120),
872
max_tokens: 32768,
873
free: false,
874
},
875
"gpt-3.5-turbo": {
876
prompt_tokens: usd1Mtokens(3),
877
completion_tokens: usd1Mtokens(6),
878
max_tokens: 4096,
879
free: true,
880
},
881
"gpt-3.5-turbo-16k": {
882
prompt_tokens: usd1Mtokens(3),
883
completion_tokens: usd1Mtokens(6),
884
max_tokens: 16384,
885
free: false,
886
},
887
// like above, but we limit the tokens to reduce how much money user has to commit to
888
"gpt-4-turbo-preview-8k": {
889
prompt_tokens: usd1Mtokens(10),
890
completion_tokens: usd1Mtokens(30),
891
max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!
892
free: false,
893
},
894
"gpt-4-turbo-preview": {
895
prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens
896
completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens
897
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
898
free: false,
899
}, // like above, but we limit the tokens to reduce how much money user has to commit to
900
"gpt-4-turbo-8k": {
901
prompt_tokens: usd1Mtokens(10),
902
completion_tokens: usd1Mtokens(30),
903
max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!
904
free: false,
905
},
906
"gpt-4-turbo": {
907
prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens
908
completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens
909
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
910
free: false,
911
},
912
"gpt-4o-8k": {
913
prompt_tokens: usd1Mtokens(2.5),
914
completion_tokens: usd1Mtokens(10),
915
max_tokens: 8192, // like gpt-4-turbo-8k
916
free: false,
917
},
918
"gpt-4o": {
919
prompt_tokens: usd1Mtokens(2.5),
920
completion_tokens: usd1Mtokens(10),
921
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
922
free: false,
923
},
924
"gpt-4o-mini-8k": {
925
prompt_tokens: usd1Mtokens(0.15),
926
completion_tokens: usd1Mtokens(0.6),
927
max_tokens: 8192, // like gpt-4-turbo-8k
928
free: true,
929
},
930
"gpt-4o-mini": {
931
prompt_tokens: usd1Mtokens(0.15),
932
completion_tokens: usd1Mtokens(0.6),
933
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
934
free: true,
935
},
936
// also OpenAI
937
"text-embedding-ada-002": {
938
prompt_tokens: 0.0001 / 1000,
939
completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings
940
max_tokens: 8191,
941
free: false,
942
},
943
// https://ai.google.dev/pricing
944
"gemini-pro": {
945
prompt_tokens: usd1Mtokens(0.5),
946
completion_tokens: usd1Mtokens(1.5),
947
max_tokens: 30720,
948
free: true,
949
},
950
"gemini-1.5-pro-8k": {
951
prompt_tokens: usd1Mtokens(3.5), // (we're below the 128k context)
952
completion_tokens: usd1Mtokens(10.5),
953
max_tokens: 8_000,
954
free: false,
955
},
956
"gemini-1.5-pro": {
957
prompt_tokens: usd1Mtokens(7),
958
completion_tokens: usd1Mtokens(21),
959
max_tokens: 1048576,
960
free: false,
961
},
962
"gemini-1.0-ultra": {
963
prompt_tokens: usd1Mtokens(1), // TODO: price not yet known!
964
completion_tokens: usd1Mtokens(1),
965
max_tokens: 30720,
966
free: true,
967
},
968
"gemini-1.5-flash-8k": {
969
prompt_tokens: usd1Mtokens(0.075),
970
completion_tokens: usd1Mtokens(0.3),
971
max_tokens: 8_000,
972
free: true,
973
},
974
// https://mistral.ai/technology/
975
"mistral-small-latest": {
976
prompt_tokens: usd1Mtokens(0.2),
977
completion_tokens: usd1Mtokens(0.6),
978
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
979
free: true,
980
},
981
"mistral-medium-latest": {
982
prompt_tokens: usd1Mtokens(2.7),
983
completion_tokens: usd1Mtokens(8.1),
984
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
985
free: true,
986
},
987
"mistral-large-latest": {
988
prompt_tokens: usd1Mtokens(2),
989
completion_tokens: usd1Mtokens(6),
990
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
991
free: false,
992
},
993
// Anthropic: pricing somewhere on that page: https://www.anthropic.com/api
994
"claude-3-opus-8k": {
995
prompt_tokens: usd1Mtokens(15),
996
completion_tokens: usd1Mtokens(75),
997
max_tokens: 8_000, // limited to 8k tokens, to reduce the necessary spend limit to commit to
998
free: false,
999
},
1000
"claude-3-opus": {
1001
prompt_tokens: usd1Mtokens(15),
1002
completion_tokens: usd1Mtokens(75),
1003
max_tokens: 200_000,
1004
free: false,
1005
},
1006
"claude-3-5-sonnet": {
1007
prompt_tokens: usd1Mtokens(3),
1008
completion_tokens: usd1Mtokens(15),
1009
max_tokens: 200_000,
1010
free: false,
1011
},
1012
"claude-3-5-sonnet-4k": {
1013
prompt_tokens: usd1Mtokens(3),
1014
completion_tokens: usd1Mtokens(15),
1015
max_tokens: 4_000, // limited to 4k tokens
1016
free: false,
1017
},
1018
"claude-3-sonnet-4k": {
1019
prompt_tokens: usd1Mtokens(3),
1020
completion_tokens: usd1Mtokens(15),
1021
max_tokens: 4_000, // limited to 4k tokens, offered for free
1022
free: true,
1023
},
1024
"claude-3-sonnet": {
1025
prompt_tokens: usd1Mtokens(3),
1026
completion_tokens: usd1Mtokens(15),
1027
max_tokens: 200_000,
1028
free: false,
1029
},
1030
"claude-3-haiku-8k": {
1031
prompt_tokens: usd1Mtokens(0.25),
1032
completion_tokens: usd1Mtokens(1.25),
1033
max_tokens: 8_000, // limited to 8k tokens, offered for free
1034
free: true,
1035
},
1036
"claude-3-haiku": {
1037
prompt_tokens: usd1Mtokens(0.25),
1038
completion_tokens: usd1Mtokens(1.25),
1039
max_tokens: 200_000,
1040
free: false,
1041
},
1042
} as const;
1043
1044
// TODO: remove this test – it's only used server side, and that server side check should work for all known LLM models
1045
export function isValidModel(model?: string): boolean {
1046
if (model == null) return false;
1047
if (isUserDefinedModel(model)) return true;
1048
if (isOllamaLLM(model)) return true;
1049
if (isCustomOpenAI(model)) return true;
1050
if (isMistralModel(model)) return true;
1051
if (isGoogleModel(model)) return true;
1052
return LLM_COST[model ?? ""] != null;
1053
}
1054
1055
export function getMaxTokens(model?: LanguageModel): number {
1056
// TODO: store max tokens in the model object itself, this is just a fallback
1057
if (isOllamaLLM(model)) return 8192;
1058
if (isMistralModel(model)) return 4096; // TODO: check with MistralAI
1059
return LLM_COST[model ?? ""]?.max_tokens ?? 4096;
1060
}
1061
1062
export interface LLMCost {
1063
prompt_tokens: number;
1064
completion_tokens: number;
1065
}
1066
1067
export function getLLMCost(
1068
model: LanguageModelCore,
1069
markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3
1070
): LLMCost {
1071
const x = LLM_COST[model];
1072
if (x == null) {
1073
throw Error(`unknown model "${model}"`);
1074
}
1075
const { prompt_tokens, completion_tokens } = x;
1076
if (markup_percentage < 0) {
1077
throw Error("markup percentage can't be negative");
1078
}
1079
const f = 1 + markup_percentage / 100;
1080
return {
1081
prompt_tokens: prompt_tokens * f,
1082
completion_tokens: completion_tokens * f,
1083
};
1084
}
1085
1086
const priceRangeCache = new LRU<string, ReturnType<typeof getLLMPriceRange>>({
1087
max: 10,
1088
});
1089
1090
export function getLLMPriceRange(
1091
prompt: number,
1092
output: number,
1093
markup_percentage: number,
1094
): { min: number; max: number } {
1095
const cacheKey = `${prompt}::${output}::${markup_percentage}`;
1096
const cached = priceRangeCache.get(cacheKey);
1097
if (cached) return cached;
1098
1099
let min = Infinity;
1100
let max = 0;
1101
for (const key in LLM_COST) {
1102
const model = LLM_COST[key];
1103
if (!model || isFreeModel(key, true)) continue;
1104
const { prompt_tokens, completion_tokens } = getLLMCost(
1105
key as LanguageModelCore,
1106
markup_percentage,
1107
);
1108
const p = prompt * prompt_tokens + output * completion_tokens;
1109
1110
min = Math.min(min, p);
1111
max = Math.max(max, p);
1112
}
1113
const ret = { min, max };
1114
priceRangeCache.set(cacheKey, ret);
1115
return ret;
1116
}
1117
1118
// The maximum cost for one single call using the given model.
1119
// We can't know the cost until after it happens, so this bound is useful for
1120
// ensuring user can afford to make a call.
1121
export function getMaxCost(
1122
model: LanguageModelCore,
1123
markup_percentage: number,
1124
): number {
1125
const { prompt_tokens, completion_tokens } = getLLMCost(
1126
model,
1127
markup_percentage,
1128
);
1129
const { max_tokens } = LLM_COST[model];
1130
return Math.max(prompt_tokens, completion_tokens) * max_tokens;
1131
}
1132
1133
/**
1134
* Initially, we just had one system promt for all LLMs.
1135
* This was tuned for the ChatGPTs by OpenAI, but breaks down for others.
1136
* For example, Gemini and Mistral are confused by mentioning "CoCalc" and insert code cells for all kinds of questions.
1137
*/
1138
export function getSystemPrompt(
1139
model: LanguageModel,
1140
_path: string | undefined,
1141
) {
1142
// TODO: for now, path is ignored. We might want to use it to customize the prompt in the future.
1143
const common = "Be brief.";
1144
const math = "Enclose any math formulas in $.";
1145
1146
if (
1147
model2vendor(model).name === "openai" ||
1148
model.startsWith(OPENAI_PREFIX)
1149
) {
1150
const mdCode =
1151
"Include the language directly after the triple backticks in all markdown code blocks.";
1152
return `Assume full access to CoCalc and using CoCalc right now.\n${mdCode}\n${math}\n${common}`;
1153
}
1154
1155
// mistral stupidly inserts anything mentioned in the prompt as examples, always.
1156
if (
1157
model2vendor(model).name === "mistralai" ||
1158
model.startsWith(MISTRAL_PREFIX)
1159
) {
1160
return common;
1161
}
1162
1163
if (
1164
model2vendor(model).name === "google" ||
1165
model.startsWith(GOOGLE_PREFIX)
1166
) {
1167
return `${math}\n${common}`;
1168
}
1169
1170
if (
1171
model2vendor(model).name === "ollama" ||
1172
model.startsWith(OLLAMA_PREFIX)
1173
) {
1174
return `${common}`;
1175
}
1176
1177
if (
1178
model2vendor(model).name === "anthropic" ||
1179
model.startsWith(ANTHROPIC_PREFIX)
1180
) {
1181
return `${math}\n${common}`;
1182
}
1183
1184
const mdCode = `Any code blocks in triple backticks should mention the language after the first backticks. For example \`\`\`python\nprint("Hello, World!")\n\`\`\``;
1185
return `${mdCode}\n${math}\n${common}`;
1186
}
1187
1188