Path: blob/master/src/packages/util/db-schema/llm-utils.ts
5805 views
// this contains bits and pieces from the wrongly named openai.ts file12import { isEmpty } from "lodash";3import LRU from "lru-cache";45import { unreachable } from "@cocalc/util/misc";67// these can be defined by admins and users8export const SERVICES = [9"openai",10"google",11"mistralai", // the "*ai" is deliberately, because their model names start with "mistral-..." and we have to distinguish it from the prefix12"anthropic",13"ollama",14"custom_openai",15"xai",16] as const;1718// a "user-*" model is a wrapper for all the model services19export const LANGUAGE_MODEL_SERVICES = [...SERVICES, "user"] as const;2021export type UserDefinedLLMService = (typeof SERVICES)[number];2223export function isUserDefinedModelType(24model: unknown,25): model is UserDefinedLLMService {26return SERVICES.includes(model as any);27}2829// "User LLMs" are defined in the user's account settings.30// They query an external LLM service of given type, endpoint, and API key.31export interface UserDefinedLLM {32id: number; // a unique number33service: UserDefinedLLMService;34model: string; // non-empty string35display: string; // short user-visible string36endpoint: string; // URL to the LLM service37apiKey: string;38icon?: string; // https://.../...png39max_tokens?: number; // optional context window size in tokens40}4142export const USER_LLM_PREFIX = "user-";4344// This basically prefixes the "model" defined by the user with the USER and service prefix.45// We do not use the to*() functions, because the names of the models could be arbitrary – for each service46export function toUserLLMModelName(llm: UserDefinedLLM) {47const { service } = llm;48const model: string = (() => {49switch (service) {50case "custom_openai":51return `${CUSTOM_OPENAI_PREFIX}${llm.model}`;52case "ollama":53return toOllamaModel(llm.model);54case "anthropic":55return `${ANTHROPIC_PREFIX}${llm.model}`;56case "google":57return `${GOOGLE_PREFIX}${llm.model}`;58case "mistralai":59return `${MISTRAL_PREFIX}${llm.model}`;60case "openai":61return `${OPENAI_PREFIX}${llm.model}`;62case "xai":63return `${XAI_PREFIX}${llm.model}`;64default:65unreachable(service);66throw new Error(67`toUserLLMModelName of service ${service} not supported`,68);69}70})();71return `${USER_LLM_PREFIX}${model}`;72}7374export function fromUserDefinedLLMModel(m: string): string | null {75if (isUserDefinedModel(m)) {76return m.slice(USER_LLM_PREFIX.length);77}78return null;79}8081export function isUserDefinedModel(model: unknown): boolean {82if (typeof model !== "string") return false;83if (model.startsWith(USER_LLM_PREFIX)) {84const m2 = model.slice(USER_LLM_PREFIX.length);85return SERVICES.some((svc) => m2.startsWith(`${svc}-`));86}87return false;88}8990export function unpackUserDefinedLLMModel(model: string): {91service: UserDefinedLLMService;92model: string;93} | null {94const um = fromUserDefinedLLMModel(model);95if (um === null) return null;96for (const service of SERVICES) {97if (um.startsWith(`${service}-`)) {98return { service, model: um.slice(service.length + 1) };99}100}101return null;102}103104export const OPENAI_PREFIX = "openai-";105106// NOTE: all arrays of model names should order them by the "simples and fastest" to the "complex, slowest, most expensive"107// that way, the ordering the UI isn't looking arbitrary, but has a clear logic108109export const MODELS_OPENAI = [110"gpt-3.5-turbo",111"gpt-4o-mini-8k", // context limited112"gpt-4o-mini", // Released 2024-07-18113"gpt-4o-8k", // context limited, similar to gpt-4-turbo-8k114"gpt-4o", // Released 2024-05-13115// the "preview" variants are disabled, because the preview is over116"gpt-4-turbo-preview-8k", // like below, but artificially limited to 8k tokens117"gpt-4-turbo-preview",118"gpt-4-turbo-8k", // Released 2024-04-11119"gpt-4-turbo",120"gpt-4",121"gpt-4.1",122"gpt-4.1-mini",123"gpt-4-32k",124"gpt-3.5-turbo-16k",125"text-embedding-ada-002", // TODO: this is for embeddings, should be moved to a different place126"o1-mini-8k",127"o1-mini",128"o1-8k",129"o1",130"o3-8k", // context limited131"o3",132"o4-mini-8k", // context limited133"o4-mini",134"gpt-5-8k", // context limited135"gpt-5",136"gpt-5.2-8k", // context limited137"gpt-5.2",138"gpt-5-mini-8k", // context limited139"gpt-5-mini",140] as const;141142export type OpenAIModel = (typeof MODELS_OPENAI)[number];143144export function isOpenAIModel(model: unknown): model is OpenAIModel {145return MODELS_OPENAI.includes(model as any);146}147148// ATTN: when you modify this list, also change frontend/.../llm/llm-selector.tsx!149export const MISTRAL_MODELS = [150// yes, all of them have an extra mistral-prefix, on top of the vendor prefix151"mistral-small-latest",152"mistral-medium-latest",153"mistral-large-latest",154"devstral-medium-2507",155// "magistral-medium-latest", // throws error156] as const;157158export type MistralModel = (typeof MISTRAL_MODELS)[number];159160export function isMistralModel(model: unknown): model is MistralModel {161return MISTRAL_MODELS.includes(model as any);162}163164// google's are taken from here – we use the generative AI client lib165// https://developers.generativeai.google/models/language166// $ curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=$GOOGLE_GENAI" | jq167export const GOOGLE_MODELS = [168"gemini-1.5-flash-8k", // introduced 2024-05-15169"gemini-1.5-flash", // for user defined models170"gemini-pro", // Discontinued Feb'25. Keep it to avoid breaking old references!171"gemini-1.0-ultra", // hangs172"gemini-1.5-pro-8k", // works now with langchaing173"gemini-1.5-pro", // works now with langchaing174"gemini-2.5-flash-8k",175"gemini-2.5-pro-8k",176"gemini-2.0-flash-8k",177"gemini-2.0-flash-lite-8k",178"gemini-3-flash-preview-16k", // Preview model, context limited to 16k179"gemini-3-pro-preview-8k", // Preview model, context limited to 8k180] as const;181export type GoogleModel = (typeof GOOGLE_MODELS)[number];182export function isGoogleModel(model: unknown): model is GoogleModel {183return GOOGLE_MODELS.includes(model as any);184}185// Canonical Google models (non-thinking)186const CANONICAL_GOOGLE_MODELS = [187"gemini-1.5-pro-latest",188"gemini-1.5-flash-latest",189"gemini-2.0-flash",190"gemini-2.0-flash-lite",191] as const;192193// Canonical Google models that support thinking/reasoning tokens (Gemini 2.5+ and 3+)194const CANONICAL_GOOGLE_MODELS_THINKING = [195"gemini-2.5-flash",196"gemini-2.5-pro",197"gemini-3-flash-preview",198"gemini-3-pro-preview",199] as const;200201export type CanonicalGoogleModel = (typeof CANONICAL_GOOGLE_MODELS)[number];202export type CanonicalGoogleThinkingModel =203(typeof CANONICAL_GOOGLE_MODELS_THINKING)[number];204205// Union type for all canonical Google model IDs206type CanonicalGoogleModelId =207| CanonicalGoogleModel208| CanonicalGoogleThinkingModel;209210export const GOOGLE_MODEL_TO_ID: Partial<{211[m in GoogleModel]: CanonicalGoogleModelId;212}> = {213"gemini-1.5-pro": "gemini-1.5-pro-latest",214"gemini-1.5-pro-8k": "gemini-1.5-pro-latest",215"gemini-1.5-flash-8k": "gemini-1.5-flash-latest",216"gemini-2.0-flash-8k": "gemini-2.0-flash",217"gemini-2.0-flash-lite-8k": "gemini-2.0-flash-lite",218"gemini-2.5-flash-8k": "gemini-2.5-flash",219"gemini-2.5-pro-8k": "gemini-2.5-pro",220"gemini-3-flash-preview-16k": "gemini-3-flash-preview",221"gemini-3-pro-preview-8k": "gemini-3-pro-preview",222} as const;223224/**225* Check if a Google model supports thinking/reasoning tokens.226* These are Gemini 2.5+ and Gemini 3+ models.227* @param model - The canonical Google model name (after GOOGLE_MODEL_TO_ID mapping)228*/229export function isGoogleThinkingModel(model: string): boolean {230return CANONICAL_GOOGLE_MODELS_THINKING.includes(231model as CanonicalGoogleThinkingModel,232);233}234235// https://docs.anthropic.com/en/docs/about-claude/models/overview -- stable names for the modesl ...236export const ANTHROPIC_MODELS = [237"claude-3-5-sonnet",238"claude-3-5-sonnet-4k", // added 2024-06-24239"claude-3-5-haiku-8k",240"claude-3-haiku",241"claude-3-haiku-8k", // limited context window, offered for free242"claude-3-sonnet",243"claude-3-sonnet-4k", // limited context window, offered for free244"claude-3-opus",245"claude-3-opus-8k", // same issue as the large GPT models, limit the context window to limit spending246"claude-4-sonnet-8k",247"claude-4-opus-8k",248"claude-4-5-sonnet-8k", // added 2025249"claude-4-5-opus-8k", // added 2025250"claude-4-6-opus-8k", // added 2026-02251"claude-4-5-haiku-8k", // added 2025252] as const;253254// https://docs.anthropic.com/en/docs/about-claude/models/overview#model-aliases255// if it points to null, the model is no longer supported256export const ANTHROPIC_VERSION: { [name in AnthropicModel]: string | null } = {257"claude-3-5-sonnet": null,258"claude-3-5-sonnet-4k": null,259"claude-3-5-haiku-8k": null,260"claude-3-haiku": "claude-3-haiku-20240307",261"claude-3-haiku-8k": "claude-3-haiku-20240307",262"claude-4-sonnet-8k": "claude-sonnet-4-0",263"claude-4-opus-8k": "claude-opus-4-0",264"claude-4-5-sonnet-8k": "claude-sonnet-4-5",265"claude-4-5-opus-8k": "claude-opus-4-5",266"claude-4-6-opus-8k": "claude-opus-4-6",267"claude-4-5-haiku-8k": "claude-haiku-4-5",268"claude-3-sonnet": null,269"claude-3-sonnet-4k": null,270"claude-3-opus": null,271"claude-3-opus-8k": null,272} as const;273export const ANTHROPIC_PREFIX = "anthropic-";274export type AnthropicModel = (typeof ANTHROPIC_MODELS)[number];275type AnthropicService = `${typeof ANTHROPIC_PREFIX}${AnthropicModel}`;276export function isAnthropicModel(model: unknown): model is AnthropicModel {277return ANTHROPIC_MODELS.includes(model as any);278}279export function toAnthropicService(model: AnthropicModel): AnthropicService {280return `${ANTHROPIC_PREFIX}${model}`;281}282export function isAnthropicService(283service: string,284): service is AnthropicService {285return service.startsWith(ANTHROPIC_PREFIX);286}287export function fromAnthropicService(288service: AnthropicService,289): AnthropicModel {290if (!isAnthropicService(service)) {291throw new Error(`not a mistral service: ${service}`);292}293return service.slice(ANTHROPIC_PREFIX.length) as AnthropicModel;294}295296// xAI (https://x.ai/)297export const XAI_MODELS = [298"grok-4-1-fast-non-reasoning-16k",299"grok-4-1-fast-reasoning-16k",300"grok-code-fast-1-16k",301] as const;302export const XAI_MODEL_TO_ID: Partial<{ [m in XaiModel]: string }> = {303"grok-4-1-fast-non-reasoning-16k": "grok-4-1-fast-non-reasoning",304"grok-4-1-fast-reasoning-16k": "grok-4-1-fast-reasoning",305"grok-code-fast-1-16k": "grok-code-fast-1",306};307export const XAI_PREFIX = "xai-";308export type XaiModel = (typeof XAI_MODELS)[number];309export type XaiService = `${typeof XAI_PREFIX}${XaiModel}`;310export function isXaiModel(model: unknown): model is XaiModel {311return XAI_MODELS.includes(model as any);312}313export function toXaiService(model: XaiModel): XaiService {314return `${XAI_PREFIX}${model}`;315}316export function isXaiService(service: string): service is XaiService {317return service.startsWith(XAI_PREFIX);318}319export function fromXaiService(service: XaiService): XaiModel {320if (!isXaiService(service)) {321throw new Error(`not an xai service: ${service}`);322}323return service.slice(XAI_PREFIX.length) as XaiModel;324}325export function toXaiProviderModel(model: string): string {326const mapped = XAI_MODEL_TO_ID[model as XaiModel];327if (mapped != null) {328return mapped;329}330return model.replace(/-\d+k$/, "");331}332333// the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects334export const LANGUAGE_MODELS = [335...MODELS_OPENAI,336...MISTRAL_MODELS,337...GOOGLE_MODELS,338...ANTHROPIC_MODELS,339...XAI_MODELS,340] as const;341342export const USER_SELECTABLE_LLMS_BY_VENDOR: {343[vendor in LLMServiceName]: Readonly<LanguageModelCore[]>;344} = {345openai: MODELS_OPENAI.filter(346(m) =>347m === "gpt-4" ||348m === "gpt-4-turbo-preview-8k" ||349m === "gpt-4o-8k" ||350m === "gpt-4o-mini-8k" ||351m === "gpt-4.1" ||352m === "gpt-4.1-mini" ||353m === "o3-8k" ||354m === "o4-mini-8k" ||355m === "gpt-5.2-8k" ||356m === "gpt-5-mini-8k",357),358google: [359"gemini-3-flash-preview-16k",360"gemini-3-pro-preview-8k",361"gemini-2.5-flash-8k",362"gemini-2.5-pro-8k",363],364mistralai: MISTRAL_MODELS.filter((m) => m !== "mistral-small-latest"),365anthropic: ANTHROPIC_MODELS.filter((m) => {366// latest of each tier; keep opus 4.5 temporarily for users who have it configured367return (368m === "claude-4-5-haiku-8k" ||369m === "claude-4-5-sonnet-8k" ||370m === "claude-4-5-opus-8k" ||371m === "claude-4-6-opus-8k"372);373}),374ollama: [], // this is empty, because these models are not hardcoded375custom_openai: [], // this is empty, because these models are not hardcoded]376xai: XAI_MODELS, // all xAI models are user-selectable377user: [],378} as const;379380// This hardcodes which models can be selected by users – refine this by setting site_settings.selectable_llms!381// Make sure to update this when adding new models.382// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx383export const USER_SELECTABLE_LANGUAGE_MODELS = [384...USER_SELECTABLE_LLMS_BY_VENDOR.openai,385...USER_SELECTABLE_LLMS_BY_VENDOR.google,386...USER_SELECTABLE_LLMS_BY_VENDOR.mistralai,387...USER_SELECTABLE_LLMS_BY_VENDOR.anthropic,388...USER_SELECTABLE_LLMS_BY_VENDOR.xai,389] as const;390391export type OllamaLLM = string;392export type CustomOpenAI = string;393394// use the one without Ollama to get stronger typing. Ollama could be any string starting with the OLLAMA_PREFIX.395export type LanguageModelCore = (typeof LANGUAGE_MODELS)[number];396export type LanguageModel = LanguageModelCore | OllamaLLM;397export function isCoreLanguageModel(398model: unknown,399): model is LanguageModelCore {400if (typeof model !== "string") return false;401return LANGUAGE_MODELS.includes(model as any);402}403404// we check if the given object is any known language model405export function isLanguageModel(model?: unknown): model is LanguageModel {406if (model == null) return false;407if (typeof model !== "string") return false;408if (isOllamaLLM(model)) return true;409if (isCustomOpenAI(model)) return true;410if (isUserDefinedModel(model)) return true; // this also checks, if there is a valid model inside411return LANGUAGE_MODELS.includes(model as any);412}413414export type LLMServiceName = (typeof LANGUAGE_MODEL_SERVICES)[number];415416export function isLLMServiceName(service: unknown): service is LLMServiceName {417if (typeof service !== "string") return false;418return LANGUAGE_MODEL_SERVICES.includes(service as any);419}420421export type LLMServicesAvailable = Record<LLMServiceName, boolean>;422423interface LLMService {424name: string;425short: string; // additional short text next to the company name426desc: string; // more detailed description427url: string;428}429430export const LLM_PROVIDER: { [key in LLMServiceName]: LLMService } = {431openai: {432name: "OpenAI",433short: "AI research and deployment company",434desc: "OpenAI is an AI research and deployment company. Their mission is to ensure that artificial general intelligence benefits all of humanity.",435url: "https://openai.com/",436},437google: {438name: "Google",439short: "Technology company",440desc: "Google's mission is to organize the world's information and make it universally accessible and useful.",441url: "https://gemini.google.com/",442},443anthropic: {444name: "Anthropic",445short: "AI research company",446desc: "Anthropic is an American artificial intelligence (AI) startup company, founded by former members of OpenAI.",447url: "https://www.anthropic.com/",448},449mistralai: {450name: "Mistral AI",451short: "French AI company",452desc: "Mistral AI is a French company selling artificial intelligence (AI) products.",453url: "https://mistral.ai/",454},455ollama: {456name: "Ollama",457short: "Open-source software",458desc: "Ollama language model server at a custom API endpoint.",459url: "https://ollama.com/",460},461custom_openai: {462name: "OpenAI API",463short: "Custom endpoint",464desc: "Calls a custom OpenAI API endoint.",465url: "https://js.langchain.com/v0.1/docs/integrations/llms/openai/",466},467xai: {468name: "xAI",469short: "AI company by X Corp",470desc: "xAI is an American artificial intelligence company founded by Elon Musk.",471url: "https://x.ai/",472},473user: {474name: "User Defined",475short: "Account → Language Model",476desc: "Defined by the user in Account Settings → Language Model",477url: "",478},479} as const;480481interface ValidLanguageModelNameProps {482model: string | undefined;483filter: LLMServicesAvailable;484ollama: string[]; // keys of ollama models485custom_openai: string[]; // keys of custom openai models486selectable_llms: string[]; // either empty, or an array stored in the server settings487}488489// NOTE: these values must be in sync with the "no" vals in db-schema/site-defaults.ts490const DEFAULT_FILTER: Readonly<LLMServicesAvailable> = {491openai: false,492google: false,493ollama: false,494mistralai: false,495anthropic: false,496custom_openai: false,497xai: false,498user: false,499} as const;500501// this is used in initialization functions. e.g. to get a default model depending on the overall availability502// usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available,503// then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc.504export function getValidLanguageModelName({505model,506filter = DEFAULT_FILTER,507ollama,508custom_openai,509selectable_llms,510}: ValidLanguageModelNameProps): LanguageModel {511if (typeof model === "string" && isValidModel(model)) {512try {513if (isCoreLanguageModel(model)) {514const v = model2vendor(model).name;515if (filter[v] && selectable_llms.includes(model)) {516return model;517}518}519520if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) {521return model;522}523524if (525isCustomOpenAI(model) &&526custom_openai.includes(fromCustomOpenAIModel(model))527) {528return model;529}530531if (isUserDefinedModel(model)) {532return model;533}534} catch {}535}536537for (const free of [true, false]) {538const dflt = getDefaultLLM(539selectable_llms,540filter,541ollama,542custom_openai,543free,544);545if (dflt != null) {546return dflt;547}548}549return DEFAULT_MODEL;550}551552export const DEFAULT_LLM_PRIORITY: Readonly<UserDefinedLLMService[]> = [553"google",554"openai",555"anthropic",556"mistralai",557"xai",558"ollama",559"custom_openai",560] as const;561562export function getDefaultLLM(563selectable_llms: string[],564filter: LLMServicesAvailable,565ollama?: { [key: string]: any },566custom_openai?: { [key: string]: any },567only_free = true,568): LanguageModel {569for (const v of DEFAULT_LLM_PRIORITY) {570if (!filter[v]) continue;571for (const m of USER_SELECTABLE_LLMS_BY_VENDOR[v]) {572if (selectable_llms.includes(m)) {573const isFree = LLM_COST[m].free ?? true;574if ((only_free && isFree) || !only_free) {575return m;576}577}578}579}580// none of the standard models, pick the first ollama or custom_openai581if (ollama != null && !isEmpty(ollama)) {582return toOllamaModel(Object.keys(ollama)[0]);583}584if (custom_openai != null && !isEmpty(custom_openai)) {585return toCustomOpenAIModel(Object.keys(custom_openai)[0]);586}587return DEFAULT_MODEL;588}589590export interface OpenAIMessage {591role: "system" | "user" | "assistant";592content: string;593}594export type OpenAIMessages = OpenAIMessage[];595596export const OLLAMA_PREFIX = "ollama-";597export type OllamaService = string;598export function isOllamaService(service: string): service is OllamaService {599return isOllamaLLM(service);600}601602export const CUSTOM_OPENAI_PREFIX = "custom_openai-";603export type CustomOpenAIService = string;604export function isCustomOpenAIService(605service: string,606): service is CustomOpenAIService {607return isCustomOpenAI(service);608}609610export const MISTRAL_PREFIX = "mistralai-";611export type MistralService = `${typeof MISTRAL_PREFIX}${MistralModel}`;612export function isMistralService(service: string): service is MistralService {613return service.startsWith(MISTRAL_PREFIX);614}615616export const GOOGLE_PREFIX = "google-";617618// we encode the in the frontend and elsewhere with the service name as a prefix619// ATTN: don't change the encoding pattern of [vendor]-[model]620// for whatever reason, it's also described that way in purchases/close.ts621export type LanguageServiceCore =622| `${typeof OPENAI_PREFIX}${OpenAIModel}`623| `${typeof GOOGLE_PREFIX}${624| "text-bison-001"625| "chat-bison-001"626| "embedding-gecko-001"}`627| `${typeof GOOGLE_PREFIX}${GoogleModel}`628| AnthropicService629| MistralService630| XaiService;631632export type LanguageService =633| LanguageServiceCore634| OllamaService635| CustomOpenAIService;636637// used e.g. for checking "account-id={string}" and other things like that638export const LANGUAGE_MODEL_PREFIXES = [639"chatgpt",640...LANGUAGE_MODEL_SERVICES.map((v) => `${v}-`),641] as const;642643// we encode the in the frontend and elsewhere with the service name as a prefix644export function model2service(model: LanguageModel): LanguageService {645if (model === "text-embedding-ada-002") {646return `${OPENAI_PREFIX}${model}`;647}648if (649isOllamaLLM(model) ||650isCustomOpenAI(model) ||651isUserDefinedModel(model)652) {653return model; // already has a useful prefix654}655if (isXaiModel(model)) {656return toXaiService(model);657}658if (isMistralModel(model)) {659return toMistralService(model);660}661if (isAnthropicModel(model)) {662return toAnthropicService(model);663}664if (isLanguageModel(model)) {665if (666model === "text-bison-001" ||667model === "chat-bison-001" ||668model === "embedding-gecko-001" ||669isGoogleModel(model)670) {671return `${GOOGLE_PREFIX}${model}`;672} else {673return `${OPENAI_PREFIX}${model}`;674}675}676677throw new Error(`unknown model: ${model}`);678}679680// inverse of model2service, but robust for chat avatars, which might not have a prefix681// TODO: fix the mess682export function service2model(683service: LanguageService | "chatgpt",684): LanguageModel {685if (service === "chatgpt") {686return "gpt-3.5-turbo";687}688const lm = service2model_core(service);689if (lm == null) {690// We don't throw an error, since the frontend would crash691// throw new Error(`unknown service: ${service}`);692console.warn(`service2model: unknown service: ${service}`);693return "gpt-3.5-turbo";694}695return lm;696}697698export function service2model_core(699service: LanguageService,700): LanguageModel | null {701// split off the first part of service, e.g., "openai-" or "google-"702const s = service.split("-")[0];703const hasPrefix = LANGUAGE_MODEL_SERVICES.some((v) => s === v);704705if (isUserDefinedModel(service)) {706return service;707}708709const m = hasPrefix ? service.split("-").slice(1).join("-") : service;710if (hasPrefix) {711// we add the trailing "-" to match with these prefixes, which include the "-"712switch (`${s}-`) {713case OLLAMA_PREFIX:714return toOllamaModel(m);715case CUSTOM_OPENAI_PREFIX:716return toCustomOpenAIModel(m);717}718}719720if (LANGUAGE_MODELS.includes(m as any)) {721return m;722}723return null;724}725726// NOTE: do not use this – instead use server_settings.default_llm727export const DEFAULT_MODEL: LanguageModel = "gemini-3-flash-preview-16k";728729interface LLMVendor {730name: LLMServiceName;731url: string;732}733734export function model2vendor(model): LLMVendor {735if (isUserDefinedModel(model)) {736return { name: "user", url: "" };737} else if (isOllamaLLM(model)) {738return { name: "ollama", url: LLM_PROVIDER.ollama.url };739} else if (isCustomOpenAI(model)) {740return {741name: "custom_openai",742url: LLM_PROVIDER.custom_openai.url,743};744} else if (isMistralModel(model)) {745return { name: "mistralai", url: LLM_PROVIDER.mistralai.url };746} else if (isOpenAIModel(model)) {747return { name: "openai", url: LLM_PROVIDER.openai.url };748} else if (isGoogleModel(model)) {749return { name: "google", url: LLM_PROVIDER.google.url };750} else if (isAnthropicModel(model)) {751return { name: "anthropic", url: LLM_PROVIDER.anthropic.url };752} else if (isXaiModel(model)) {753return { name: "xai", url: LLM_PROVIDER.xai.url };754}755756throw new Error(`model2vendor: unknown model: "${model}"`);757}758759// wraps the model name in an object that indicates that it's an ollama model760// TODO: maybe it will be necessary at some point to pass in the list of available ollama models761// TODO: in the future, this object will also contain info like the max tokens and other parameters (from the DB)762export function toOllamaModel(model: string): OllamaLLM {763if (isOllamaLLM(model)) {764throw new Error(`already an ollama model: ${model}`);765}766return `${OLLAMA_PREFIX}${model}`;767}768769// unwraps the model name from an object that indicates that it's an ollama model770export function fromOllamaModel(model: OllamaLLM) {771if (!isOllamaLLM(model)) {772throw new Error(`not an ollama model: ${model}`);773}774return model.slice(OLLAMA_PREFIX.length);775}776777export function isOllamaLLM(model: unknown): model is OllamaLLM {778return (779typeof model === "string" &&780model.startsWith(OLLAMA_PREFIX) &&781model.length > OLLAMA_PREFIX.length782);783}784785export function toCustomOpenAIModel(model: string): CustomOpenAI {786if (isCustomOpenAI(model)) {787throw new Error(`already a custom openai model: ${model}`);788}789return `${CUSTOM_OPENAI_PREFIX}${model}`;790}791792export function isCustomOpenAI(model: unknown): model is CustomOpenAI {793return (794typeof model === "string" &&795model.startsWith(CUSTOM_OPENAI_PREFIX) &&796model.length > CUSTOM_OPENAI_PREFIX.length797);798}799800export function fromCustomOpenAIModel(model: CustomOpenAI) {801if (!isCustomOpenAI(model)) {802throw new Error(`not a custom openai model: ${model}`);803}804return model.slice(CUSTOM_OPENAI_PREFIX.length);805}806807export function toMistralService(model: string): MistralService {808if (isMistralService(model)) {809throw new Error(`already a mistral model: ${model}`);810}811if (!isMistralModel(model)) {812throw new Error(`not a mistral model: ${model}`);813}814return `${MISTRAL_PREFIX}${model}`;815}816817export function fromMistralService(model: MistralService) {818if (!isMistralService(model)) {819throw new Error(`not a mistral model: ${model}`);820}821return model.slice(MISTRAL_PREFIX.length);822}823824type LLM2String = {825[key in826| (typeof USER_SELECTABLE_LANGUAGE_MODELS)[number]827| "chatgpt" // some additional ones, backwards compatibility828| "chatgpt3"829| "chatgpt4"830| "gpt-4-32k"831| "text-bison-001"832| "chat-bison-001"]: string;833};834835// Map from psuedo account_id to what should be displayed to user.836// This is used in various places in the frontend.837// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing838export const LLM_USERNAMES: LLM2String = {839chatgpt: "GPT-3.5",840chatgpt3: "GPT-3.5",841chatgpt4: "GPT-4",842"gpt-4": "GPT-4",843"gpt-4-32k": "GPT-4-32k",844"gpt-3.5-turbo": "GPT-3.5",845"gpt-3.5-turbo-16k": "GPT-3.5-16k",846"gpt-4-turbo-preview": "GPT-4 Turbo 128k",847"gpt-4-turbo-preview-8k": "GPT-4 Turbo",848"gpt-4-turbo": "GPT-4 Turbo 128k",849"gpt-4-turbo-8k": "GPT-4 Turbo",850"gpt-4o": "GPT-4o 128k",851"gpt-4o-8k": "GPT-4o",852"gpt-4o-mini": "GPT-4o Mini 128k",853"gpt-4o-mini-8k": "GPT-4o Mini",854"gpt-4.1": "GPT-4.1",855"gpt-4.1-mini": "GPT-4.1 Mini",856"o1-mini-8k": "OpenAI o1-mini",857"o1-8k": "OpenAI o1",858"o1-mini": "OpenAI o1-mini",859o1: "OpenAI o1",860"text-embedding-ada-002": "Text Embedding Ada 002", // TODO: this is for embeddings, should be moved to a different place861"text-bison-001": "PaLM 2",862"chat-bison-001": "PaLM 2",863"gemini-pro": "Gemini 1.0 Pro",864"gemini-1.0-ultra": "Gemini 1.0 Ultra",865"gemini-1.5-flash": "Gemini 1.5 Flash",866"gemini-1.5-pro": "Gemini 1.5 Pro 1m",867"gemini-1.5-pro-8k": "Gemini 1.5 Pro",868"gemini-1.5-flash-8k": "Gemini 1.5 Flash",869"gemini-2.0-flash-8k": "Gemini 2.0 Flash",870"gemini-2.0-flash-lite-8k": "Gemini 2.0 Flash Lite",871"gemini-2.5-flash-8k": "Gemini 2.5 Flash",872"gemini-2.5-pro-8k": "Gemini 2.5 Pro",873"gemini-3-pro-preview-8k": "Gemini 3 Pro",874"mistral-small-latest": "Mistral AI Small",875"mistral-medium-latest": "Mistral AI Medium",876"mistral-large-latest": "Mistral AI Large",877"devstral-medium-2507": "Devstral Medium",878//"magistral-medium-latest": "Magistral Medium",879"claude-3-haiku": "Claude 3 Haiku",880"claude-3-haiku-8k": "Claude 3 Haiku",881"claude-3-5-haiku-8k": "Claude 3 Haiku",882"claude-3-sonnet": "Claude 3 Sonnet 200k",883"claude-3-sonnet-4k": "Claude 3 Sonnet",884"claude-3-5-sonnet": "Claude 3.5 Sonnet",885"claude-3-5-sonnet-4k": "Claude 3.5 Sonnet",886"claude-4-sonnet-8k": "Claude 4 Sonnet",887"claude-4-opus-8k": "Claude 4 Opus",888"claude-4-5-sonnet-8k": "Claude 4.5 Sonnet",889"claude-4-5-opus-8k": "Claude 4.5 Opus",890"claude-4-6-opus-8k": "Claude 4.6 Opus",891"claude-4-5-haiku-8k": "Claude 4.5 Haiku",892"claude-3-opus": "Claude 3 Opus",893"claude-3-opus-8k": "Claude 3 Opus",894"o3-8k": "OpenAI o3",895o3: "OpenAI o3 128k",896"o4-mini-8k": "OpenAI o4-mini",897"o4-mini": "OpenAI o4-mini 128k",898"gpt-5-8k": "GPT-5",899"gpt-5": "GPT-5 128k",900"gpt-5.2-8k": "GPT-5.2",901"gpt-5.2": "GPT-5.2 128k",902"gpt-5-mini-8k": "GPT-5 Mini",903"gpt-5-mini": "GPT-5 Mini 128k",904"gemini-3-flash-preview-16k": "Gemini 3 Flash",905"grok-4-1-fast-non-reasoning-16k": "Grok 4.1 Fast",906"grok-4-1-fast-reasoning-16k": "Grok 4.1 Fast Reasoning",907"grok-code-fast-1-16k": "Grok Code Fast",908} as const;909910// similar to the above, we map to short user-visible description texts911// this comes next to the name, hence you do not have to mention the name912export const LLM_DESCR: LLM2String = {913chatgpt: "Fast, great for everyday tasks. (OpenAI, 4k token context)",914chatgpt3: "Fast, great for everyday tasks. (OpenAI, 4k token context)",915chatgpt4:916"Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",917"gpt-4":918"Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",919"gpt-4.1":920"Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",921"gpt-4-32k": "",922"gpt-3.5-turbo": "Fast, great for everyday tasks. (OpenAI, 4k token context)",923"gpt-3.5-turbo-16k": `Same as ${LLM_USERNAMES["gpt-3.5-turbo"]} but with larger 16k token context`,924"gpt-4-turbo-preview-8k":925"More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",926"gpt-4-turbo-preview": "Like GPT-4 Turbo, but with up to 128k token context",927"gpt-4-turbo-8k":928"Faster, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",929"gpt-4-turbo": "Like GPT-4 Turbo, but with up to 128k token context",930"gpt-4o-8k":931"Most powerful, fastest, and cheapest (OpenAI, 8k token context)",932"gpt-4o": "Most powerful fastest, and cheapest (OpenAI, 128k token context)",933"gpt-4o-mini-8k":934"Most cost-efficient small model (OpenAI, 8k token context)",935"gpt-4.1-mini": "Most cost-efficient small model (OpenAI, 8k token context)",936"gpt-4o-mini": "Most cost-efficient small model (OpenAI, 128k token context)",937"text-embedding-ada-002": "Text embedding Ada 002 by OpenAI", // TODO: this is for embeddings, should be moved to a different place938"o1-8k": "Spends more time thinking (8k token context)",939"o1-mini-8k": "A cost-efficient reasoning model (8k token context)",940o1: "Spends more time thinking (8k token context)",941"o1-mini": "A cost-efficient reasoning model (8k token context)",942"text-bison-001": "",943"chat-bison-001": "",944"gemini-pro":945"Google's Gemini 1.0 Pro Generative AI model (30k token context)",946"gemini-1.0-ultra":947"Google's Gemini 1.0 Ultra Generative AI model (30k token context)",948"gemini-1.5-pro":949"Google's Gemini 1.5 Pro Generative AI model (1m token context)",950"gemini-1.5-flash": "Google's Gemini 1.5 Flash Generative AI model",951"gemini-1.5-pro-8k":952"Google's Gemini 1.5 Pro Generative AI model (8k token context)",953"gemini-1.5-flash-8k":954"Google's Gemini 1.5 Flash Generative AI model (8k token context)",955"gemini-2.0-flash-8k":956"Google's Gemini 2.0 Flash Generative AI model (8k token context)",957"gemini-2.0-flash-lite-8k":958"Google's Gemini 2.0 Flash Lite Generative AI model (8k token context)",959"gemini-2.5-flash-8k":960"Google's Gemini 2.5 Flash Generative AI model (8k token context)",961"gemini-2.5-pro-8k":962"Google's Gemini 2.5 Pro Generative AI model (8k token context)",963"gemini-3-pro-preview-8k":964"Google's Gemini 3 Pro Generative AI model (8k token context)",965"mistral-small-latest":966"Small general purpose tasks, text classification, customer service. (Mistral AI, 4k token context)",967"mistral-medium-latest":968"Intermediate tasks, summarizing, generating documents, etc. (Mistral AI, 4k token context)",969"mistral-large-latest":970"Most powerful, large reasoning capabilities, but slower. (Mistral AI, 4k token context)",971"devstral-medium-2507":972"Developer-focused model optimized for coding tasks. (Mistral AI, 8k token context)",973// "magistral-medium-latest":974// "Enhanced medium model with improved reasoning capabilities. (Mistral AI, 8k token context)",975"claude-3-haiku":976"Fastest model, lightweight actions (Anthropic, 200k token context)",977"claude-3-haiku-8k":978"Fastest model, lightweight actions (Anthropic, 8k token context)",979"claude-3-5-sonnet":980"Our most intelligent model (Anthropic, 200k token context)",981"claude-3-sonnet":982"Our most intelligent model (Anthropic, 200k token context)",983"claude-3-5-sonnet-4k":984"Our most intelligent model (Anthropic, 4k token context)",985"claude-3-5-haiku-8k":986"Fastest model, lightweight actions (Anthropic, 8k token context)",987"claude-4-sonnet-8k":988"Best combination of performance and speed (Anthropic, 8k token context)",989"claude-4-opus-8k":990"Excels at writing and complex tasks (Anthropic, 8k token context)",991"claude-4-5-sonnet-8k":992"Most intelligent model with advanced reasoning (Anthropic, 8k token context)",993"claude-4-5-opus-8k":994"Flagship model excelling at complex tasks and writing (Anthropic, 8k token context)",995"claude-4-6-opus-8k":996"Most intelligent model for agents and coding (Anthropic, 8k token context)",997"claude-4-5-haiku-8k":998"Fastest and most cost-efficient model (Anthropic, 8k token context)",999"claude-3-sonnet-4k":1000"Best combination of performance and speed (Anthropic, 4k token context)",1001"claude-3-opus":1002"Excels at writing and complex tasks (Anthropic, 200k token context)",1003"claude-3-opus-8k":1004"Excels at writing and complex tasks (Anthropic, 8k token context)",1005"o3-8k":1006"Advanced reasoning model with enhanced thinking capabilities (8k token context)",1007o3: "Advanced reasoning model with enhanced thinking capabilities (128k token context)",1008"o4-mini-8k":1009"Cost-efficient reasoning model with strong performance (8k token context)",1010"o4-mini":1011"Cost-efficient reasoning model with strong performance (128k token context)",1012"gpt-5-8k":1013"OpenAI's most advanced model with built-in reasoning (8k token context)",1014"gpt-5":1015"OpenAI's most advanced model with built-in reasoning (128k token context)",1016"gpt-5.2-8k":1017"OpenAI's most advanced model with built-in reasoning (8k token context)",1018"gpt-5.2":1019"OpenAI's most advanced model with built-in reasoning (128k token context)",1020"gpt-5-mini-8k":1021"Fast and cost-efficient version of GPT-5 (8k token context)",1022"gpt-5-mini": "Fast and cost-efficient version of GPT-5 (128k token context)",1023"gemini-3-flash-preview-16k":1024"Google's Gemini 3 Flash model (16k token context)",1025"grok-4-1-fast-non-reasoning-16k":1026"xAI's Grok 4.1 fast non-reasoning model (16k token context)",1027"grok-4-1-fast-reasoning-16k":1028"xAI's Grok 4.1 fast reasoning model (16k token context)",1029"grok-code-fast-1-16k":1030"xAI's Grok Code Fast model, specialized for coding tasks (16k token context)",1031} as const;10321033export function isFreeModel(model: unknown, isCoCalcCom: boolean): boolean {1034if (!isCoCalcCom) return true;1035if (isUserDefinedModel(model)) return true;1036if (isOllamaLLM(model)) return true;1037if (isCustomOpenAI(model)) return true;1038if (typeof model === "string" && LANGUAGE_MODELS.includes(model as any)) {1039// i.e. model is now of type CoreLanguageModel and1040const costInfo = LLM_COST[model];1041if (costInfo != null) {1042return costInfo.free;1043}1044}1045// all others are free (this should actually never happen, but we're cautious)1046return true;1047}10481049// this is used in purchases/get-service-cost1050// we only need to check for the vendor prefixes, no special cases!1051export function isLanguageModelService(1052service: string,1053): service is LanguageService {1054if (isUserDefinedModel(service)) return true;1055for (const v of LANGUAGE_MODEL_SERVICES) {1056if (service.startsWith(`${v}-`)) {1057return true;1058}1059}1060return false;1061}10621063export function getLLMServiceStatusCheckMD(service: LLMServiceName): string {1064switch (service) {1065case "openai":1066return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`;1067case "google":1068return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`;1069case "ollama":1070return `No status information for Ollama available.`;1071case "custom_openai":1072return `No status information for Custom OpenAI available.`;1073case "mistralai":1074return `No status information for Mistral AI available.`;1075case "anthropic":1076return `Anthropic [status](https://status.anthropic.com/).`;1077case "xai":1078return `xAI [status](https://status.x.ai/).`;1079case "user":1080return `No status information for user defined model available.`;1081default:1082unreachable(service);1083}1084return "";1085}10861087interface Cost {1088prompt_tokens: number;1089completion_tokens: number;1090max_tokens: number;1091free: boolean; // whether this model has a metered paid usage, or offered for free1092}10931094// price per token for a given price of USD per 1M tokens1095function usd1Mtokens(usd: number): number {1096return usd / 1_000_000;1097}10981099// This is the official published cost that openai charges.1100// It changes over time, so this will sometimes need to be updated.1101// Our cost is a configurable multiple of this.1102// https://openai.com/pricing#language-models1103// There appears to be no api that provides the prices, unfortunately.1104export const LLM_COST: { [name in LanguageModelCore]: Cost } = {1105"gpt-4": {1106prompt_tokens: usd1Mtokens(30),1107completion_tokens: usd1Mtokens(60),1108max_tokens: 8192,1109free: false,1110},1111"gpt-4-32k": {1112prompt_tokens: usd1Mtokens(60),1113completion_tokens: usd1Mtokens(120),1114max_tokens: 32768,1115free: false,1116},1117"gpt-3.5-turbo": {1118prompt_tokens: usd1Mtokens(0.5),1119completion_tokens: usd1Mtokens(1.5),1120max_tokens: 4096,1121free: true,1122},1123"gpt-3.5-turbo-16k": {1124prompt_tokens: usd1Mtokens(3),1125completion_tokens: usd1Mtokens(4),1126max_tokens: 16384,1127free: false,1128},1129// like above, but we limit the tokens to reduce how much money user has to commit to1130"gpt-4-turbo-preview-8k": {1131prompt_tokens: usd1Mtokens(10),1132completion_tokens: usd1Mtokens(30),1133max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!1134free: false,1135},1136"gpt-4-turbo-preview": {1137prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens1138completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens1139max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit1140free: false,1141}, // like above, but we limit the tokens to reduce how much money user has to commit to1142"gpt-4-turbo-8k": {1143prompt_tokens: usd1Mtokens(10),1144completion_tokens: usd1Mtokens(30),1145max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!1146free: false,1147},1148"gpt-4-turbo": {1149prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens1150completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens1151max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit1152free: false,1153},1154"gpt-4.1": {1155prompt_tokens: usd1Mtokens(2),1156completion_tokens: usd1Mtokens(8),1157max_tokens: 8192,1158free: false,1159},1160"gpt-4.1-mini": {1161prompt_tokens: usd1Mtokens(0.4),1162completion_tokens: usd1Mtokens(1.6),1163max_tokens: 8192,1164free: true,1165},1166"gpt-4o-8k": {1167prompt_tokens: usd1Mtokens(2.5),1168completion_tokens: usd1Mtokens(10),1169max_tokens: 8192, // like gpt-4-turbo-8k1170free: false,1171},1172"gpt-4o": {1173prompt_tokens: usd1Mtokens(2.5),1174completion_tokens: usd1Mtokens(10),1175max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit1176free: false,1177},1178"gpt-4o-mini-8k": {1179prompt_tokens: usd1Mtokens(0.15),1180completion_tokens: usd1Mtokens(0.6),1181max_tokens: 8192, // like gpt-4-turbo-8k1182free: true,1183},1184"gpt-4o-mini": {1185prompt_tokens: usd1Mtokens(0.15),1186completion_tokens: usd1Mtokens(0.6),1187max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit1188free: true,1189},1190o1: {1191prompt_tokens: usd1Mtokens(15),1192completion_tokens: usd1Mtokens(60),1193max_tokens: 8192, // like gpt-4-turbo-8k1194free: false,1195},1196"o1-8k": {1197prompt_tokens: usd1Mtokens(15),1198completion_tokens: usd1Mtokens(60),1199max_tokens: 8192, // like gpt-4-turbo-8k1200free: false,1201},1202"o1-mini-8k": {1203prompt_tokens: usd1Mtokens(1.1),1204completion_tokens: usd1Mtokens(4.4),1205max_tokens: 8192, // like gpt-4-turbo-8k1206free: true,1207},1208"o1-mini": {1209prompt_tokens: usd1Mtokens(1.1),1210completion_tokens: usd1Mtokens(4.4),1211max_tokens: 8192, // like gpt-4-turbo-8k1212free: true,1213},1214// also OpenAI1215"text-embedding-ada-002": {1216prompt_tokens: usd1Mtokens(0.05),1217completion_tokens: usd1Mtokens(0.05), // NOTE: this isn't a thing with embeddings1218max_tokens: 8191,1219free: false,1220},1221// https://ai.google.dev/pricing1222"gemini-pro": {1223prompt_tokens: usd1Mtokens(0.5),1224completion_tokens: usd1Mtokens(1.5),1225max_tokens: 30720,1226free: true,1227},1228"gemini-1.5-pro-8k": {1229prompt_tokens: usd1Mtokens(1.25), // (we're below the 128k context)1230completion_tokens: usd1Mtokens(5),1231max_tokens: 8_000,1232free: false,1233},1234"gemini-1.5-pro": {1235prompt_tokens: usd1Mtokens(2.5),1236completion_tokens: usd1Mtokens(10),1237max_tokens: 1048576,1238free: false,1239},1240"gemini-1.0-ultra": {1241prompt_tokens: usd1Mtokens(1), // TODO: price not yet known!1242completion_tokens: usd1Mtokens(1),1243max_tokens: 30720,1244free: true,1245},1246"gemini-1.5-flash": {1247prompt_tokens: usd1Mtokens(0.075),1248completion_tokens: usd1Mtokens(0.3),1249max_tokens: 8_000,1250free: true,1251},1252"gemini-1.5-flash-8k": {1253prompt_tokens: usd1Mtokens(0.075),1254completion_tokens: usd1Mtokens(0.3),1255max_tokens: 8_000,1256free: true,1257},1258// https://ai.google.dev/gemini-api/docs/pricing?hl=de1259"gemini-2.0-flash-8k": {1260prompt_tokens: usd1Mtokens(0.1),1261completion_tokens: usd1Mtokens(0.4),1262max_tokens: 8_000,1263free: true,1264},1265"gemini-2.0-flash-lite-8k": {1266prompt_tokens: usd1Mtokens(0.075),1267completion_tokens: usd1Mtokens(0.3),1268max_tokens: 8_000,1269free: true,1270},1271"gemini-2.5-flash-8k": {1272prompt_tokens: usd1Mtokens(0.3),1273completion_tokens: usd1Mtokens(2.5),1274max_tokens: 8_000,1275free: true,1276},1277"gemini-2.5-pro-8k": {1278prompt_tokens: usd1Mtokens(1.25),1279completion_tokens: usd1Mtokens(10),1280max_tokens: 8_000,1281free: false,1282},1283"gemini-3-flash-preview-16k": {1284prompt_tokens: usd1Mtokens(0.5),1285completion_tokens: usd1Mtokens(3.0),1286max_tokens: 16_000,1287free: true,1288},1289"gemini-3-pro-preview-8k": {1290prompt_tokens: usd1Mtokens(2),1291completion_tokens: usd1Mtokens(4),1292max_tokens: 8_000,1293free: false,1294},1295// https://mistral.ai/technology/1296"mistral-small-latest": {1297prompt_tokens: usd1Mtokens(0.2),1298completion_tokens: usd1Mtokens(0.6),1299max_tokens: 4096, // TODO don't know the real value, see getMaxTokens1300free: true,1301},1302"mistral-medium-latest": {1303prompt_tokens: usd1Mtokens(0.4),1304completion_tokens: usd1Mtokens(2),1305max_tokens: 4096, // TODO don't know the real value, see getMaxTokens1306free: true,1307},1308"mistral-large-latest": {1309prompt_tokens: usd1Mtokens(2),1310completion_tokens: usd1Mtokens(6),1311max_tokens: 4096, // TODO don't know the real value, see getMaxTokens1312free: false,1313},1314"devstral-medium-2507": {1315prompt_tokens: usd1Mtokens(0.4),1316completion_tokens: usd1Mtokens(2),1317max_tokens: 8_000, // TODO don't know the real value, see getMaxTokens1318free: true,1319},1320// "magistral-medium-latest": {1321// prompt_tokens: usd1Mtokens(2),1322// completion_tokens: usd1Mtokens(5),1323// max_tokens: 8_000, // TODO don't know the real value, see getMaxTokens1324// free: false,1325// },1326// Anthropic: pricing somewhere on that page: https://www.anthropic.com/api1327"claude-3-opus-8k": {1328prompt_tokens: usd1Mtokens(15),1329completion_tokens: usd1Mtokens(75),1330max_tokens: 8_000, // limited to 8k tokens, to reduce the necessary spend limit to commit to1331free: false,1332},1333"claude-3-opus": {1334prompt_tokens: usd1Mtokens(15),1335completion_tokens: usd1Mtokens(75),1336max_tokens: 200_000,1337free: false,1338},1339"claude-3-5-sonnet": {1340prompt_tokens: usd1Mtokens(3),1341completion_tokens: usd1Mtokens(15),1342max_tokens: 200_000,1343free: false,1344},1345"claude-3-5-sonnet-4k": {1346prompt_tokens: usd1Mtokens(3),1347completion_tokens: usd1Mtokens(15),1348max_tokens: 4_000, // limited to 4k tokens1349free: false,1350},1351"claude-3-sonnet-4k": {1352prompt_tokens: usd1Mtokens(3),1353completion_tokens: usd1Mtokens(15),1354max_tokens: 4_000, // limited to 4k tokens, offered for free1355free: false,1356},1357"claude-3-sonnet": {1358prompt_tokens: usd1Mtokens(3),1359completion_tokens: usd1Mtokens(15),1360max_tokens: 200_000,1361free: false,1362},1363"claude-3-haiku-8k": {1364prompt_tokens: usd1Mtokens(0.8),1365completion_tokens: usd1Mtokens(4),1366max_tokens: 8_000, // limited to 8k tokens, offered for free1367free: true,1368},1369"claude-3-haiku": {1370prompt_tokens: usd1Mtokens(0.8),1371completion_tokens: usd1Mtokens(4),1372max_tokens: 8_000, // limited to 8k tokens, offered for free1373free: true,1374},1375"claude-3-5-haiku-8k": {1376prompt_tokens: usd1Mtokens(0.8),1377completion_tokens: usd1Mtokens(4),1378max_tokens: 8_000,1379free: true,1380},1381"claude-4-sonnet-8k": {1382prompt_tokens: usd1Mtokens(3),1383completion_tokens: usd1Mtokens(15),1384max_tokens: 8_000,1385free: false,1386},1387"claude-4-opus-8k": {1388prompt_tokens: usd1Mtokens(15),1389completion_tokens: usd1Mtokens(75),1390max_tokens: 8_000,1391free: false,1392},1393"claude-4-5-sonnet-8k": {1394prompt_tokens: usd1Mtokens(3),1395completion_tokens: usd1Mtokens(15),1396max_tokens: 8_000,1397free: false,1398},1399"claude-4-5-opus-8k": {1400prompt_tokens: usd1Mtokens(5),1401completion_tokens: usd1Mtokens(25),1402max_tokens: 8_000,1403free: false,1404},1405"claude-4-6-opus-8k": {1406prompt_tokens: usd1Mtokens(5),1407completion_tokens: usd1Mtokens(25),1408max_tokens: 8_000,1409free: false,1410},1411"claude-4-5-haiku-8k": {1412prompt_tokens: usd1Mtokens(1),1413completion_tokens: usd1Mtokens(5),1414max_tokens: 8_000,1415free: true,1416},1417"o3-8k": {1418prompt_tokens: usd1Mtokens(2),1419completion_tokens: usd1Mtokens(8),1420max_tokens: 8192,1421free: false,1422},1423o3: {1424prompt_tokens: usd1Mtokens(2),1425completion_tokens: usd1Mtokens(8),1426max_tokens: 128000,1427free: false,1428},1429"o4-mini-8k": {1430prompt_tokens: usd1Mtokens(1.1),1431completion_tokens: usd1Mtokens(4.4),1432max_tokens: 8192,1433free: false,1434},1435"o4-mini": {1436prompt_tokens: usd1Mtokens(1.1),1437completion_tokens: usd1Mtokens(4.4),1438max_tokens: 128000,1439free: false,1440},1441"gpt-5-8k": {1442prompt_tokens: usd1Mtokens(1.25),1443completion_tokens: usd1Mtokens(10),1444max_tokens: 8192,1445free: false,1446},1447"gpt-5": {1448prompt_tokens: usd1Mtokens(1.25),1449completion_tokens: usd1Mtokens(10),1450max_tokens: 128000,1451free: false,1452},1453"gpt-5.2-8k": {1454prompt_tokens: usd1Mtokens(1.25),1455completion_tokens: usd1Mtokens(10),1456max_tokens: 8192,1457free: false,1458},1459"gpt-5.2": {1460prompt_tokens: usd1Mtokens(1.25),1461completion_tokens: usd1Mtokens(10),1462max_tokens: 128000,1463free: false,1464},1465"gpt-5-mini-8k": {1466prompt_tokens: usd1Mtokens(0.25),1467completion_tokens: usd1Mtokens(2),1468max_tokens: 8192,1469free: true,1470},1471"gpt-5-mini": {1472prompt_tokens: usd1Mtokens(0.25),1473completion_tokens: usd1Mtokens(2),1474max_tokens: 128000,1475free: true,1476},1477// xAI (https://x.ai/)1478"grok-4-1-fast-non-reasoning-16k": {1479prompt_tokens: usd1Mtokens(0.2),1480completion_tokens: usd1Mtokens(0.5),1481max_tokens: 16_000,1482free: true,1483},1484"grok-4-1-fast-reasoning-16k": {1485prompt_tokens: usd1Mtokens(0.2),1486completion_tokens: usd1Mtokens(0.5),1487max_tokens: 16_000,1488free: true,1489},1490"grok-code-fast-1-16k": {1491prompt_tokens: usd1Mtokens(0.2),1492completion_tokens: usd1Mtokens(1.5),1493max_tokens: 16_000,1494free: true,1495},1496} as const;14971498// TODO: remove this test – it's only used server side, and that server side check should work for all known LLM models1499export function isValidModel(model?: string): boolean {1500if (model == null) return false;1501if (isUserDefinedModel(model)) return true;1502if (isOllamaLLM(model)) return true;1503if (isCustomOpenAI(model)) return true;1504if (isMistralModel(model)) return true;1505if (isGoogleModel(model)) return true;1506if (isXaiModel(model)) return true;1507return LLM_COST[model ?? ""] != null;1508}15091510export const FALLBACK_MAX_TOKENS = 8192;15111512// Overload 1: Just model string (existing signature)1513export function getMaxTokens(model?: LanguageModel): number;15141515// Overload 2: Model string + optional config1516export function getMaxTokens(1517model?: LanguageModel,1518config?: { max_tokens?: number },1519): number;15201521// Implementation1522export function getMaxTokens(1523model?: LanguageModel,1524config?: { max_tokens?: number },1525): number {1526// If config.max_tokens is provided, validate and use it1527if (config?.max_tokens != null) {1528const maxTokens = config.max_tokens;1529// Handle legacy string values and invalid numbers1530const num =1531typeof maxTokens === "number"1532? maxTokens1533: parseInt(String(maxTokens), 10);1534if (isNaN(num) || num <= 0) {1535return FALLBACK_MAX_TOKENS;1536}1537// Clamp to safe range1538return Math.max(1000, Math.min(2000000, num));1539}15401541// Existing logic1542if (isOllamaLLM(model)) return FALLBACK_MAX_TOKENS;1543return LLM_COST[model ?? ""]?.max_tokens ?? FALLBACK_MAX_TOKENS;1544}15451546export interface LLMCost {1547prompt_tokens: number;1548completion_tokens: number;1549}15501551export function getLLMCost(1552model: LanguageModelCore,1553markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.31554): LLMCost {1555const x = LLM_COST[model];1556if (x == null) {1557throw Error(`unknown model "${model}"`);1558}1559const { prompt_tokens, completion_tokens } = x;1560if (markup_percentage < 0) {1561throw Error("markup percentage can't be negative");1562}1563const f = 1 + markup_percentage / 100;1564return {1565prompt_tokens: prompt_tokens * f,1566completion_tokens: completion_tokens * f,1567};1568}15691570const priceRangeCache = new LRU<string, ReturnType<typeof getLLMPriceRange>>({1571max: 10,1572});15731574export function getLLMPriceRange(1575prompt: number,1576output: number,1577markup_percentage: number,1578): { min: number; max: number } {1579const cacheKey = `${prompt}::${output}::${markup_percentage}`;1580const cached = priceRangeCache.get(cacheKey);1581if (cached) return cached;15821583let min = Infinity;1584let max = 0;1585for (const key in LLM_COST) {1586const model = LLM_COST[key];1587if (!model || isFreeModel(key, true)) continue;1588const { prompt_tokens, completion_tokens } = getLLMCost(1589key as LanguageModelCore,1590markup_percentage,1591);1592const p = prompt * prompt_tokens + output * completion_tokens;15931594min = Math.min(min, p);1595max = Math.max(max, p);1596}1597const ret = { min, max };1598priceRangeCache.set(cacheKey, ret);1599return ret;1600}16011602// The maximum cost for one single call using the given model.1603// We can't know the cost until after it happens, so this bound is useful for1604// ensuring user can afford to make a call.1605export function getMaxCost(1606model: LanguageModelCore,1607markup_percentage: number,1608): number {1609const { prompt_tokens, completion_tokens } = getLLMCost(1610model,1611markup_percentage,1612);1613const { max_tokens } = LLM_COST[model];1614return Math.max(prompt_tokens, completion_tokens) * max_tokens;1615}16161617/**1618* Initially, we just had one system promt for all LLMs.1619* This was tuned for the ChatGPTs by OpenAI, but breaks down for others.1620* For example, Gemini and Mistral are confused by mentioning "CoCalc" and insert code cells for all kinds of questions.1621*/1622export function getSystemPrompt(1623model: LanguageModel,1624_path: string | undefined,1625) {1626// TODO: for now, path is ignored. We might want to use it to customize the prompt in the future.1627const common = "Be brief.";1628const math = "Enclose any math formulas in $.";16291630if (1631model2vendor(model).name === "openai" ||1632model.startsWith(OPENAI_PREFIX)1633) {1634const mdCode =1635"Include the language directly after the triple backticks in all markdown code blocks.";1636return `Assume full access to CoCalc and using CoCalc right now.\n${mdCode}\n${math}\n${common}`;1637}16381639// mistral stupidly inserts anything mentioned in the prompt as examples, always.1640if (1641model2vendor(model).name === "mistralai" ||1642model.startsWith(MISTRAL_PREFIX)1643) {1644return common;1645}16461647if (1648model2vendor(model).name === "google" ||1649model.startsWith(GOOGLE_PREFIX)1650) {1651return `${math}\n${common}`;1652}16531654if (1655model2vendor(model).name === "ollama" ||1656model.startsWith(OLLAMA_PREFIX)1657) {1658return `${common}`;1659}16601661if (1662model2vendor(model).name === "anthropic" ||1663model.startsWith(ANTHROPIC_PREFIX)1664) {1665return `${math}\n${common}`;1666}16671668if (model2vendor(model).name === "xai" || model.startsWith(XAI_PREFIX)) {1669return `${math}\n${common}`;1670}16711672const mdCode = `Any code blocks in triple backticks should mention the language after the first backticks. For example \`\`\`python\nprint("Hello, World!")\n\`\`\``;1673return `${mdCode}\n${math}\n${common}`;1674}167516761677