Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/util/db-schema/llm-utils.ts
Views: 687
// this contains bits and pieces from the wrongly named openai.ts file12import { isEmpty } from "lodash";3import LRU from "lru-cache";45import { unreachable } from "@cocalc/util/misc";67// these can be defined by admins and users8export const SERVICES = [9"openai",10"google",11"mistralai", // the "*ai" is deliberately, because their model names start with "mistral-..." and we have to distinguish it from the prefix12"anthropic",13"ollama",14"custom_openai",15] as const;1617// a "user-*" model is a wrapper for all the model services18export const LANGUAGE_MODEL_SERVICES = [...SERVICES, "user"] as const;1920export type UserDefinedLLMService = (typeof SERVICES)[number];2122export function isUserDefinedModelType(23model: unknown,24): model is UserDefinedLLMService {25return SERVICES.includes(model as any);26}2728// "User LLMs" are defined in the user's account settings.29// They query an external LLM service of given type, endpoint, and API key.30export interface UserDefinedLLM {31id: number; // a unique number32service: UserDefinedLLMService;33model: string; // non-empty string34display: string; // short user-visible string35endpoint: string; // URL to the LLM service36apiKey: string;37icon?: string; // https://.../...png38}3940export const USER_LLM_PREFIX = "user-";4142// This basically prefixes the "model" defined by the user with the USER and service prefix.43// We do not use the to*() functions, because the names of the models could be arbitrary – for each service44export function toUserLLMModelName(llm: UserDefinedLLM) {45const { service } = llm;46const model: string = (() => {47switch (service) {48case "custom_openai":49return `${CUSTOM_OPENAI_PREFIX}${llm.model}`;50case "ollama":51return toOllamaModel(llm.model);52case "anthropic":53return `${ANTHROPIC_PREFIX}${llm.model}`;54case "google":55return `${GOOGLE_PREFIX}${llm.model}`;56case "mistralai":57return `${MISTRAL_PREFIX}${llm.model}`;58case "openai":59return `${OPENAI_PREFIX}${llm.model}`;60default:61unreachable(service);62throw new Error(63`toUserLLMModelName of service ${service} not supported`,64);65}66})();67return `${USER_LLM_PREFIX}${model}`;68}6970export function fromUserDefinedLLMModel(m: string): string | null {71if (isUserDefinedModel(m)) {72return m.slice(USER_LLM_PREFIX.length);73}74return null;75}7677export function isUserDefinedModel(model: unknown): boolean {78if (typeof model !== "string") return false;79if (model.startsWith(USER_LLM_PREFIX)) {80const m2 = model.slice(USER_LLM_PREFIX.length);81return SERVICES.some((svc) => m2.startsWith(`${svc}-`));82}83return false;84}8586export function unpackUserDefinedLLMModel(model: string): {87service: UserDefinedLLMService;88model: string;89} | null {90const um = fromUserDefinedLLMModel(model);91if (um === null) return null;92for (const service of SERVICES) {93if (um.startsWith(`${service}-`)) {94return { service, model: um.slice(service.length + 1) };95}96}97return null;98}99100export const OPENAI_PREFIX = "openai-";101102// NOTE: all arrays of model names should order them by the "simples and fastest" to the "complex, slowest, most expensive"103// that way, the ordering the UI isn't looking arbitrary, but has a clear logic104105export const MODELS_OPENAI = [106"gpt-3.5-turbo",107"gpt-4o-mini-8k", // context limited108"gpt-4o-mini", // Released 2024-07-18109"gpt-4o-8k", // context limited, similar to gpt-4-turbo-8k110"gpt-4o", // Released 2024-05-13111// the "preview" variants are disabled, because the preview is over112"gpt-4-turbo-preview-8k", // like below, but artificially limited to 8k tokens113"gpt-4-turbo-preview",114"gpt-4-turbo-8k", // Released 2024-04-11115"gpt-4-turbo",116"gpt-4",117"gpt-4-32k",118"gpt-3.5-turbo-16k",119"text-embedding-ada-002", // TODO: this is for embeddings, should be moved to a different place120] as const;121122export type OpenAIModel = (typeof MODELS_OPENAI)[number];123124export function isOpenAIModel(model: unknown): model is OpenAIModel {125return MODELS_OPENAI.includes(model as any);126}127128// ATTN: when you modify this list, also change frontend/.../llm/llm-selector.tsx!129export const MISTRAL_MODELS = [130// yes, all 3 of them have an extra mistral-prefix, on top of the vendor prefix131"mistral-small-latest",132"mistral-medium-latest", // Deprecated!133"mistral-large-latest",134] as const;135136export type MistralModel = (typeof MISTRAL_MODELS)[number];137138export function isMistralModel(model: unknown): model is MistralModel {139return MISTRAL_MODELS.includes(model as any);140}141142// google's are taken from here – we use the generative AI client lib143// https://developers.generativeai.google/models/language144// $ curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=$GOOGLE_GENAI" | jq145export const GOOGLE_MODELS = [146"gemini-1.5-flash-8k", // introduced 2024-05-15147"gemini-pro",148"gemini-1.0-ultra", // hangs149"gemini-1.5-pro-8k", // works now with langchaing150"gemini-1.5-pro", // works now with langchaing151] as const;152export type GoogleModel = (typeof GOOGLE_MODELS)[number];153export function isGoogleModel(model: unknown): model is GoogleModel {154return GOOGLE_MODELS.includes(model as any);155}156export const GOOGLE_MODEL_TO_ID: Partial<{ [m in GoogleModel]: string }> = {157"gemini-1.5-pro": "gemini-1.5-pro-latest",158"gemini-1.5-pro-8k": "gemini-1.5-pro-latest",159"gemini-1.5-flash-8k": "gemini-1.5-flash-latest",160} as const;161162// https://docs.anthropic.com/claude/docs/models-overview -- stable names for the modesl ...163export const ANTHROPIC_MODELS = [164"claude-3-5-sonnet",165"claude-3-5-sonnet-4k", // added 2024-06-24166"claude-3-haiku",167"claude-3-haiku-8k", // limited context window, offered for free168"claude-3-sonnet",169"claude-3-sonnet-4k", // limited context window, offered for free170"claude-3-opus-8k", // same issue as the large GPT models, limit the context window to limit spending171"claude-3-opus",172] as const;173const CLAUDE_SONNET_VERSION = "20240229";174const CLAUDE_HAIKU_VERSION = "20240307";175const CLAUDE_OPUS_VERSION = "20240229";176const CLAUDE_SONNET_3_5_VERSION = "20240620";177// ... and we add a version number (there is no "*-latest") when dispatching on the backend178export const ANTHROPIC_VERSION: { [name in AnthropicModel]: string } = {179"claude-3-sonnet-4k": CLAUDE_SONNET_VERSION,180"claude-3-opus": CLAUDE_OPUS_VERSION,181"claude-3-opus-8k": CLAUDE_OPUS_VERSION,182"claude-3-sonnet": CLAUDE_SONNET_VERSION,183"claude-3-5-sonnet": CLAUDE_SONNET_3_5_VERSION,184"claude-3-5-sonnet-4k": CLAUDE_SONNET_3_5_VERSION,185"claude-3-haiku": CLAUDE_HAIKU_VERSION,186"claude-3-haiku-8k": CLAUDE_HAIKU_VERSION,187} as const;188export const ANTHROPIC_PREFIX = "anthropic-";189export type AnthropicModel = (typeof ANTHROPIC_MODELS)[number];190type AnthropicService = `${typeof ANTHROPIC_PREFIX}${AnthropicModel}`;191export function isAnthropicModel(model: unknown): model is AnthropicModel {192return ANTHROPIC_MODELS.includes(model as any);193}194export function toAnthropicService(model: AnthropicModel): AnthropicService {195return `${ANTHROPIC_PREFIX}${model}`;196}197export function isAnthropicService(198service: string,199): service is AnthropicService {200return service.startsWith(ANTHROPIC_PREFIX);201}202export function fromAnthropicService(203service: AnthropicService,204): AnthropicModel {205if (!isAnthropicService(service)) {206throw new Error(`not a mistral service: ${service}`);207}208return service.slice(ANTHROPIC_PREFIX.length) as AnthropicModel;209}210211// the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects212export const LANGUAGE_MODELS = [213...MODELS_OPENAI,214...MISTRAL_MODELS,215...GOOGLE_MODELS,216...ANTHROPIC_MODELS,217] as const;218219export const USER_SELECTABLE_LLMS_BY_VENDOR: {220[vendor in LLMServiceName]: Readonly<LanguageModelCore[]>;221} = {222openai: MODELS_OPENAI.filter(223(m) =>224m === "gpt-4" ||225m === "gpt-4-turbo-preview-8k" ||226m === "gpt-4o-8k" ||227m === "gpt-4o-mini-8k",228),229google: GOOGLE_MODELS.filter(230(m) =>231// we only enable the 1.0, 1.5 pro and 1.5 flash with a limited context window232m === "gemini-pro" ||233m === "gemini-1.5-pro-8k" ||234m === "gemini-1.5-flash-8k",235),236mistralai: MISTRAL_MODELS.filter((m) => m !== "mistral-medium-latest"),237anthropic: ANTHROPIC_MODELS.filter((m) => {238// we show opus and the context restricted models (to avoid high costs)239return (240m === "claude-3-opus-8k" ||241m === "claude-3-5-sonnet-4k" ||242m === "claude-3-haiku-8k"243);244}),245ollama: [], // this is empty, because these models are not hardcoded246custom_openai: [], // this is empty, because these models are not hardcoded]247user: [],248} as const;249250// This hardcodes which models can be selected by users – refine this by setting site_settings.selectable_llms!251// Make sure to update this when adding new models.252// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx253export const USER_SELECTABLE_LANGUAGE_MODELS = [254...USER_SELECTABLE_LLMS_BY_VENDOR.openai,255...USER_SELECTABLE_LLMS_BY_VENDOR.google,256...USER_SELECTABLE_LLMS_BY_VENDOR.mistralai,257...USER_SELECTABLE_LLMS_BY_VENDOR.anthropic,258] as const;259260export type OllamaLLM = string;261export type CustomOpenAI = string;262263// use the one without Ollama to get stronger typing. Ollama could be any string starting with the OLLAMA_PREFIX.264export type LanguageModelCore = (typeof LANGUAGE_MODELS)[number];265export type LanguageModel = LanguageModelCore | OllamaLLM;266export function isCoreLanguageModel(267model: unknown,268): model is LanguageModelCore {269if (typeof model !== "string") return false;270return LANGUAGE_MODELS.includes(model as any);271}272273// we check if the given object is any known language model274export function isLanguageModel(model?: unknown): model is LanguageModel {275if (model == null) return false;276if (typeof model !== "string") return false;277if (isOllamaLLM(model)) return true;278if (isCustomOpenAI(model)) return true;279if (isUserDefinedModel(model)) return true; // this also checks, if there is a valid model inside280return LANGUAGE_MODELS.includes(model as any);281}282283export type LLMServiceName = (typeof LANGUAGE_MODEL_SERVICES)[number];284285export function isLLMServiceName(service: unknown): service is LLMServiceName {286if (typeof service !== "string") return false;287return LANGUAGE_MODEL_SERVICES.includes(service as any);288}289290export type LLMServicesAvailable = Record<LLMServiceName, boolean>;291292interface LLMService {293name: string;294short: string; // additional short text next to the company name295desc: string; // more detailed description296url: string;297}298299export const LLM_PROVIDER: { [key in LLMServiceName]: LLMService } = {300openai: {301name: "OpenAI",302short: "AI research and deployment company",303desc: "OpenAI is an AI research and deployment company. Their mission is to ensure that artificial general intelligence benefits all of humanity.",304url: "https://openai.com/",305},306google: {307name: "Google",308short: "Technology company",309desc: "Google's mission is to organize the world's information and make it universally accessible and useful.",310url: "https://gemini.google.com/",311},312anthropic: {313name: "Anthropic",314short: "AI research company",315desc: "Anthropic is an American artificial intelligence (AI) startup company, founded by former members of OpenAI.",316url: "https://www.anthropic.com/",317},318mistralai: {319name: "Mistral AI",320short: "French AI company",321desc: "Mistral AI is a French company selling artificial intelligence (AI) products.",322url: "https://mistral.ai/",323},324ollama: {325name: "Ollama",326short: "Open-source software",327desc: "Ollama language model server at a custom API endpoint.",328url: "https://ollama.com/",329},330custom_openai: {331name: "OpenAI API",332short: "Custom endpoint",333desc: "Calls a custom OpenAI API endoint.",334url: "https://js.langchain.com/v0.1/docs/integrations/llms/openai/",335},336user: {337name: "User Defined",338short: "Account → Language Model",339desc: "Defined by the user in Account Settings → Language Model",340url: "",341},342} as const;343344interface ValidLanguageModelNameProps {345model: string | undefined;346filter: LLMServicesAvailable;347ollama: string[]; // keys of ollama models348custom_openai: string[]; // keys of custom openai models349selectable_llms: string[]; // either empty, or an array stored in the server settings350}351352// NOTE: these values must be in sync with the "no" vals in db-schema/site-defaults.ts353const DEFAULT_FILTER: Readonly<LLMServicesAvailable> = {354openai: false,355google: false,356ollama: false,357mistralai: false,358anthropic: false,359custom_openai: false,360user: false,361} as const;362363// this is used in initialization functions. e.g. to get a default model depending on the overall availability364// usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available,365// then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc.366export function getValidLanguageModelName({367model,368filter = DEFAULT_FILTER,369ollama,370custom_openai,371selectable_llms,372}: ValidLanguageModelNameProps): LanguageModel {373if (typeof model === "string" && isValidModel(model)) {374try {375if (isCoreLanguageModel(model)) {376const v = model2vendor(model).name;377if (filter[v] && selectable_llms.includes(model)) {378return model;379}380}381382if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) {383return model;384}385386if (387isCustomOpenAI(model) &&388custom_openai.includes(fromCustomOpenAIModel(model))389) {390return model;391}392393if (isUserDefinedModel(model)) {394return model;395}396} catch {}397}398399for (const free of [true, false]) {400const dflt = getDefaultLLM(401selectable_llms,402filter,403ollama,404custom_openai,405free,406);407if (dflt != null) {408return dflt;409}410}411return DEFAULT_MODEL;412}413414export const DEFAULT_LLM_PRIORITY: Readonly<UserDefinedLLMService[]> = [415"google",416"openai",417"anthropic",418"mistralai",419"ollama",420"custom_openai",421] as const;422423export function getDefaultLLM(424selectable_llms: string[],425filter: LLMServicesAvailable,426ollama?: { [key: string]: any },427custom_openai?: { [key: string]: any },428only_free = true,429): LanguageModel {430for (const v of DEFAULT_LLM_PRIORITY) {431if (!filter[v]) continue;432for (const m of USER_SELECTABLE_LLMS_BY_VENDOR[v]) {433if (selectable_llms.includes(m)) {434const isFree = LLM_COST[m].free ?? true;435if ((only_free && isFree) || !only_free) {436return m;437}438}439}440}441// none of the standard models, pick the first ollama or custom_openai442if (ollama != null && !isEmpty(ollama)) {443return toOllamaModel(Object.keys(ollama)[0]);444}445if (custom_openai != null && !isEmpty(custom_openai)) {446return toCustomOpenAIModel(Object.keys(custom_openai)[0]);447}448return DEFAULT_MODEL;449}450451export interface OpenAIMessage {452role: "system" | "user" | "assistant";453content: string;454}455export type OpenAIMessages = OpenAIMessage[];456457export const OLLAMA_PREFIX = "ollama-";458export type OllamaService = string;459export function isOllamaService(service: string): service is OllamaService {460return isOllamaLLM(service);461}462463export const CUSTOM_OPENAI_PREFIX = "custom_openai-";464export type CustomOpenAIService = string;465export function isCustomOpenAIService(466service: string,467): service is CustomOpenAIService {468return isCustomOpenAI(service);469}470471export const MISTRAL_PREFIX = "mistralai-";472export type MistralService = `${typeof MISTRAL_PREFIX}${MistralModel}`;473export function isMistralService(service: string): service is MistralService {474return service.startsWith(MISTRAL_PREFIX);475}476477export const GOOGLE_PREFIX = "google-";478479// we encode the in the frontend and elsewhere with the service name as a prefix480// ATTN: don't change the encoding pattern of [vendor]-[model]481// for whatever reason, it's also described that way in purchases/close.ts482export type LanguageServiceCore =483| `${typeof OPENAI_PREFIX}${OpenAIModel}`484| `${typeof GOOGLE_PREFIX}${485| "text-bison-001"486| "chat-bison-001"487| "embedding-gecko-001"}`488| `${typeof GOOGLE_PREFIX}${GoogleModel}`489| AnthropicService490| MistralService;491492export type LanguageService =493| LanguageServiceCore494| OllamaService495| CustomOpenAIService;496497// used e.g. for checking "account-id={string}" and other things like that498export const LANGUAGE_MODEL_PREFIXES = [499"chatgpt",500...LANGUAGE_MODEL_SERVICES.map((v) => `${v}-`),501] as const;502503// we encode the in the frontend and elsewhere with the service name as a prefix504export function model2service(model: LanguageModel): LanguageService {505if (model === "text-embedding-ada-002") {506return `${OPENAI_PREFIX}${model}`;507}508if (509isOllamaLLM(model) ||510isCustomOpenAI(model) ||511isUserDefinedModel(model)512) {513return model; // already has a useful prefix514}515if (isMistralModel(model)) {516return toMistralService(model);517}518if (isAnthropicModel(model)) {519return toAnthropicService(model);520}521if (isLanguageModel(model)) {522if (523model === "text-bison-001" ||524model === "chat-bison-001" ||525model === "embedding-gecko-001" ||526isGoogleModel(model)527) {528return `${GOOGLE_PREFIX}${model}`;529} else {530return `${OPENAI_PREFIX}${model}`;531}532}533534throw new Error(`unknown model: ${model}`);535}536537// inverse of model2service, but robust for chat avatars, which might not have a prefix538// TODO: fix the mess539export function service2model(540service: LanguageService | "chatgpt",541): LanguageModel {542if (service === "chatgpt") {543return "gpt-3.5-turbo";544}545const lm = service2model_core(service);546if (lm == null) {547// We don't throw an error, since the frontend would crash548// throw new Error(`unknown service: ${service}`);549console.warn(`service2model: unknown service: ${service}`);550return "gpt-3.5-turbo";551}552return lm;553}554555export function service2model_core(556service: LanguageService,557): LanguageModel | null {558// split off the first part of service, e.g., "openai-" or "google-"559const s = service.split("-")[0];560const hasPrefix = LANGUAGE_MODEL_SERVICES.some((v) => s === v);561562if (isUserDefinedModel(service)) {563return service;564}565566const m = hasPrefix ? service.split("-").slice(1).join("-") : service;567if (hasPrefix) {568// we add the trailing "-" to match with these prefixes, which include the "-"569switch (`${s}-`) {570case OLLAMA_PREFIX:571return toOllamaModel(m);572case CUSTOM_OPENAI_PREFIX:573return toCustomOpenAIModel(m);574}575}576577if (LANGUAGE_MODELS.includes(m as any)) {578return m;579}580return null;581}582583// NOTE: do not use this – instead use server_settings.default_llm584export const DEFAULT_MODEL: LanguageModel = "gemini-1.5-flash-8k";585586interface LLMVendor {587name: LLMServiceName;588url: string;589}590591export function model2vendor(model): LLMVendor {592if (isUserDefinedModel(model)) {593return { name: "user", url: "" };594} else if (isOllamaLLM(model)) {595return { name: "ollama", url: LLM_PROVIDER.ollama.url };596} else if (isCustomOpenAI(model)) {597return {598name: "custom_openai",599url: LLM_PROVIDER.custom_openai.url,600};601} else if (isMistralModel(model)) {602return { name: "mistralai", url: LLM_PROVIDER.mistralai.url };603} else if (isOpenAIModel(model)) {604return { name: "openai", url: LLM_PROVIDER.openai.url };605} else if (isGoogleModel(model)) {606return { name: "google", url: LLM_PROVIDER.google.url };607} else if (isAnthropicModel(model)) {608return { name: "anthropic", url: LLM_PROVIDER.anthropic.url };609}610611throw new Error(`model2vendor: unknown model: "${model}"`);612}613614// wraps the model name in an object that indicates that it's an ollama model615// TODO: maybe it will be necessary at some point to pass in the list of available ollama models616// TODO: in the future, this object will also contain info like the max tokens and other parameters (from the DB)617export function toOllamaModel(model: string): OllamaLLM {618if (isOllamaLLM(model)) {619throw new Error(`already an ollama model: ${model}`);620}621return `${OLLAMA_PREFIX}${model}`;622}623624// unwraps the model name from an object that indicates that it's an ollama model625export function fromOllamaModel(model: OllamaLLM) {626if (!isOllamaLLM(model)) {627throw new Error(`not an ollama model: ${model}`);628}629return model.slice(OLLAMA_PREFIX.length);630}631632export function isOllamaLLM(model: unknown): model is OllamaLLM {633return (634typeof model === "string" &&635model.startsWith(OLLAMA_PREFIX) &&636model.length > OLLAMA_PREFIX.length637);638}639640export function toCustomOpenAIModel(model: string): CustomOpenAI {641if (isCustomOpenAI(model)) {642throw new Error(`already a custom openai model: ${model}`);643}644return `${CUSTOM_OPENAI_PREFIX}${model}`;645}646647export function isCustomOpenAI(model: unknown): model is CustomOpenAI {648return (649typeof model === "string" &&650model.startsWith(CUSTOM_OPENAI_PREFIX) &&651model.length > CUSTOM_OPENAI_PREFIX.length652);653}654655export function fromCustomOpenAIModel(model: CustomOpenAI) {656if (!isCustomOpenAI(model)) {657throw new Error(`not a custom openai model: ${model}`);658}659return model.slice(CUSTOM_OPENAI_PREFIX.length);660}661662export function toMistralService(model: string): MistralService {663if (isMistralService(model)) {664throw new Error(`already a mistral model: ${model}`);665}666if (!isMistralModel(model)) {667throw new Error(`not a mistral model: ${model}`);668}669return `${MISTRAL_PREFIX}${model}`;670}671672export function fromMistralService(model: MistralService) {673if (!isMistralService(model)) {674throw new Error(`not a mistral model: ${model}`);675}676return model.slice(MISTRAL_PREFIX.length);677}678679type LLM2String = {680[key in681| (typeof USER_SELECTABLE_LANGUAGE_MODELS)[number]682| "chatgpt" // some additional ones, backwards compatibility683| "chatgpt3"684| "chatgpt4"685| "gpt-4-32k"686| "text-bison-001"687| "chat-bison-001"]: string;688};689690// Map from psuedo account_id to what should be displayed to user.691// This is used in various places in the frontend.692// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing693export const LLM_USERNAMES: LLM2String = {694chatgpt: "GPT-3.5",695chatgpt3: "GPT-3.5",696chatgpt4: "GPT-4",697"gpt-4": "GPT-4",698"gpt-4-32k": "GPT-4-32k",699"gpt-3.5-turbo": "GPT-3.5",700"gpt-3.5-turbo-16k": "GPT-3.5-16k",701"gpt-4-turbo-preview": "GPT-4 Turbo 128k",702"gpt-4-turbo-preview-8k": "GPT-4 Turbo",703"gpt-4-turbo": "GPT-4 Turbo 128k",704"gpt-4-turbo-8k": "GPT-4 Turbo",705"gpt-4o": "GPT-4o 128k",706"gpt-4o-8k": "GPT-4o",707"gpt-4o-mini": "GPT-4o Mini 128k",708"gpt-4o-mini-8k": "GPT-4o Mini",709"text-embedding-ada-002": "Text Embedding Ada 002", // TODO: this is for embeddings, should be moved to a different place710"text-bison-001": "PaLM 2",711"chat-bison-001": "PaLM 2",712"gemini-pro": "Gemini 1.0 Pro",713"gemini-1.0-ultra": "Gemini 1.0 Ultra",714"gemini-1.5-pro": "Gemini 1.5 Pro 1m",715"gemini-1.5-pro-8k": "Gemini 1.5 Pro",716"gemini-1.5-flash-8k": "Gemini 1.5 Flash",717"mistral-small-latest": "Mistral AI Small",718"mistral-medium-latest": "Mistral AI Medium",719"mistral-large-latest": "Mistral AI Large",720"claude-3-haiku": "Claude 3 Haiku 200k",721"claude-3-haiku-8k": "Claude 3 Haiku",722"claude-3-sonnet": "Claude 3 Sonnet 200k",723"claude-3-sonnet-4k": "Claude 3 Sonnet",724"claude-3-5-sonnet": "Claude 3.5 Sonnet 200k",725"claude-3-5-sonnet-4k": "Claude 3.5 Sonnet",726"claude-3-opus": "Claude 3 Opus 200k",727"claude-3-opus-8k": "Claude 3 Opus",728} as const;729730// similar to the above, we map to short user-visible description texts731// this comes next to the name, hence you do not have to mention the name732export const LLM_DESCR: LLM2String = {733chatgpt: "Fast, great for everyday tasks. (OpenAI, 4k token context)",734chatgpt3: "Fast, great for everyday tasks. (OpenAI, 4k token context)",735chatgpt4:736"Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",737"gpt-4":738"Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",739"gpt-4-32k": "",740"gpt-3.5-turbo": "Fast, great for everyday tasks. (OpenAI, 4k token context)",741"gpt-3.5-turbo-16k": `Same as ${LLM_USERNAMES["gpt-3.5-turbo"]} but with larger 16k token context`,742"gpt-4-turbo-preview-8k":743"More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",744"gpt-4-turbo-preview":745"Like GPT-4 Turbo, but with up to 128k token context",746"gpt-4-turbo-8k":747"Faster, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",748"gpt-4-turbo": "Like GPT-4 Turbo, but with up to 128k token context",749"gpt-4o-8k":750"Most powerful, fastest, and cheapest (OpenAI, 8k token context)",751"gpt-4o": "Most powerful fastest, and cheapest (OpenAI, 128k token context)",752"gpt-4o-mini-8k":753"Most cost-efficient small model (OpenAI, 8k token context)",754"gpt-4o-mini": "Most cost-efficient small model (OpenAI, 128k token context)",755"text-embedding-ada-002": "Text embedding Ada 002 by OpenAI", // TODO: this is for embeddings, should be moved to a different place756"text-bison-001": "",757"chat-bison-001": "",758"gemini-pro":759"Google's Gemini 1.0 Pro Generative AI model (30k token context)",760"gemini-1.0-ultra":761"Google's Gemini 1.0 Ultra Generative AI model (30k token context)",762"gemini-1.5-pro":763"Google's Gemini 1.5 Pro Generative AI model (1m token context)",764"gemini-1.5-pro-8k":765"Google's Gemini 1.5 Pro Generative AI model (8k token context)",766"gemini-1.5-flash-8k":767"Google's Gemini 1.5 Flash Generative AI model (8k token context)",768"mistral-small-latest":769"Fast, simple queries, short answers, less capabilities. (Mistral AI, 4k token context)",770"mistral-medium-latest":771"Intermediate tasks, summarizing, generating documents, etc. (Mistral AI, 4k token context)",772"mistral-large-latest":773"Most powerful, large reasoning capabilities, but slower. (Mistral AI, 4k token context)",774"claude-3-haiku":775"Fastest model, lightweight actions (Anthropic, 200k token context)",776"claude-3-haiku-8k":777"Fastest model, lightweight actions (Anthropic, 8k token context)",778"claude-3-5-sonnet":779"Our most intelligent model (Anthropic, 200k token context)",780"claude-3-5-sonnet-4k":781"Our most intelligent model (Anthropic, 4k token context)",782"claude-3-sonnet":783"Best combination of performance and speed (Anthropic, 200k token context)",784"claude-3-sonnet-4k":785"Best combination of performance and speed (Anthropic, 4k token context)",786"claude-3-opus":787"Excels at writing and complex tasks (Anthropic, 200k token context)",788"claude-3-opus-8k":789"Excels at writing and complex tasks (Anthropic, 8k token context)",790} as const;791792export function isFreeModel(model: unknown, isCoCalcCom: boolean): boolean {793if (!isCoCalcCom) return true;794if (isUserDefinedModel(model)) return true;795if (isOllamaLLM(model)) return true;796if (isCustomOpenAI(model)) return true;797if (typeof model === "string" && LANGUAGE_MODELS.includes(model as any)) {798// i.e. model is now of type CoreLanguageModel and799const costInfo = LLM_COST[model];800if (costInfo != null) {801return costInfo.free;802}803}804// all others are free (this should actually never happen, but we're cautious)805return true;806}807808// this is used in purchases/get-service-cost809// we only need to check for the vendor prefixes, no special cases!810export function isLanguageModelService(811service: string,812): service is LanguageService {813if (isUserDefinedModel(service)) return true;814for (const v of LANGUAGE_MODEL_SERVICES) {815if (service.startsWith(`${v}-`)) {816return true;817}818}819return false;820}821822export function getLLMServiceStatusCheckMD(service: LLMServiceName): string {823switch (service) {824case "openai":825return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`;826case "google":827return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`;828case "ollama":829return `No status information for Ollama available.`;830case "custom_openai":831return `No status information for Custom OpenAI available.`;832case "mistralai":833return `No status information for Mistral AI available.`;834case "anthropic":835return `Anthropic [status](https://status.anthropic.com/).`;836case "user":837return `No status information for user defined model available.`;838default:839unreachable(service);840}841return "";842}843844interface Cost {845prompt_tokens: number;846completion_tokens: number;847max_tokens: number;848free: boolean; // whether this model has a metered paid usage, or offered for free849}850851// price per token for a given price of USD per 1M tokens852function usd1Mtokens(usd: number): number {853return usd / 1_000_000;854}855856// This is the official published cost that openai charges.857// It changes over time, so this will sometimes need to be updated.858// Our cost is a configurable multiple of this.859// https://openai.com/pricing#language-models860// There appears to be no api that provides the prices, unfortunately.861export const LLM_COST: { [name in LanguageModelCore]: Cost } = {862"gpt-4": {863prompt_tokens: usd1Mtokens(30),864completion_tokens: usd1Mtokens(60),865max_tokens: 8192,866free: false,867},868"gpt-4-32k": {869prompt_tokens: usd1Mtokens(60),870completion_tokens: usd1Mtokens(120),871max_tokens: 32768,872free: false,873},874"gpt-3.5-turbo": {875prompt_tokens: usd1Mtokens(3),876completion_tokens: usd1Mtokens(6),877max_tokens: 4096,878free: true,879},880"gpt-3.5-turbo-16k": {881prompt_tokens: usd1Mtokens(3),882completion_tokens: usd1Mtokens(6),883max_tokens: 16384,884free: false,885},886// like above, but we limit the tokens to reduce how much money user has to commit to887"gpt-4-turbo-preview-8k": {888prompt_tokens: usd1Mtokens(10),889completion_tokens: usd1Mtokens(30),890max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!891free: false,892},893"gpt-4-turbo-preview": {894prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens895completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens896max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit897free: false,898}, // like above, but we limit the tokens to reduce how much money user has to commit to899"gpt-4-turbo-8k": {900prompt_tokens: usd1Mtokens(10),901completion_tokens: usd1Mtokens(30),902max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!903free: false,904},905"gpt-4-turbo": {906prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens907completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens908max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit909free: false,910},911"gpt-4o-8k": {912prompt_tokens: usd1Mtokens(2.5),913completion_tokens: usd1Mtokens(10),914max_tokens: 8192, // like gpt-4-turbo-8k915free: false,916},917"gpt-4o": {918prompt_tokens: usd1Mtokens(2.5),919completion_tokens: usd1Mtokens(10),920max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit921free: false,922},923"gpt-4o-mini-8k": {924prompt_tokens: usd1Mtokens(0.15),925completion_tokens: usd1Mtokens(0.6),926max_tokens: 8192, // like gpt-4-turbo-8k927free: true,928},929"gpt-4o-mini": {930prompt_tokens: usd1Mtokens(0.15),931completion_tokens: usd1Mtokens(0.6),932max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit933free: true,934},935// also OpenAI936"text-embedding-ada-002": {937prompt_tokens: 0.0001 / 1000,938completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings939max_tokens: 8191,940free: false,941},942// https://ai.google.dev/pricing943"gemini-pro": {944prompt_tokens: usd1Mtokens(0.5),945completion_tokens: usd1Mtokens(1.5),946max_tokens: 30720,947free: true,948},949"gemini-1.5-pro-8k": {950prompt_tokens: usd1Mtokens(3.5), // (we're below the 128k context)951completion_tokens: usd1Mtokens(10.5),952max_tokens: 8_000,953free: false,954},955"gemini-1.5-pro": {956prompt_tokens: usd1Mtokens(7),957completion_tokens: usd1Mtokens(21),958max_tokens: 1048576,959free: false,960},961"gemini-1.0-ultra": {962prompt_tokens: usd1Mtokens(1), // TODO: price not yet known!963completion_tokens: usd1Mtokens(1),964max_tokens: 30720,965free: true,966},967"gemini-1.5-flash-8k": {968prompt_tokens: usd1Mtokens(0.075),969completion_tokens: usd1Mtokens(0.3),970max_tokens: 8_000,971free: true,972},973// https://mistral.ai/technology/974"mistral-small-latest": {975prompt_tokens: usd1Mtokens(0.2),976completion_tokens: usd1Mtokens(0.6),977max_tokens: 4096, // TODO don't know the real value, see getMaxTokens978free: true,979},980"mistral-medium-latest": {981prompt_tokens: usd1Mtokens(2.7),982completion_tokens: usd1Mtokens(8.1),983max_tokens: 4096, // TODO don't know the real value, see getMaxTokens984free: true,985},986"mistral-large-latest": {987prompt_tokens: usd1Mtokens(2),988completion_tokens: usd1Mtokens(6),989max_tokens: 4096, // TODO don't know the real value, see getMaxTokens990free: false,991},992// Anthropic: pricing somewhere on that page: https://www.anthropic.com/api993"claude-3-opus-8k": {994prompt_tokens: usd1Mtokens(15),995completion_tokens: usd1Mtokens(75),996max_tokens: 8_000, // limited to 8k tokens, to reduce the necessary spend limit to commit to997free: false,998},999"claude-3-opus": {1000prompt_tokens: usd1Mtokens(15),1001completion_tokens: usd1Mtokens(75),1002max_tokens: 200_000,1003free: false,1004},1005"claude-3-5-sonnet": {1006prompt_tokens: usd1Mtokens(3),1007completion_tokens: usd1Mtokens(15),1008max_tokens: 200_000,1009free: false,1010},1011"claude-3-5-sonnet-4k": {1012prompt_tokens: usd1Mtokens(3),1013completion_tokens: usd1Mtokens(15),1014max_tokens: 4_000, // limited to 4k tokens1015free: false,1016},1017"claude-3-sonnet-4k": {1018prompt_tokens: usd1Mtokens(3),1019completion_tokens: usd1Mtokens(15),1020max_tokens: 4_000, // limited to 4k tokens, offered for free1021free: true,1022},1023"claude-3-sonnet": {1024prompt_tokens: usd1Mtokens(3),1025completion_tokens: usd1Mtokens(15),1026max_tokens: 200_000,1027free: false,1028},1029"claude-3-haiku-8k": {1030prompt_tokens: usd1Mtokens(0.25),1031completion_tokens: usd1Mtokens(1.25),1032max_tokens: 8_000, // limited to 8k tokens, offered for free1033free: true,1034},1035"claude-3-haiku": {1036prompt_tokens: usd1Mtokens(0.25),1037completion_tokens: usd1Mtokens(1.25),1038max_tokens: 200_000,1039free: false,1040},1041} as const;10421043// TODO: remove this test – it's only used server side, and that server side check should work for all known LLM models1044export function isValidModel(model?: string): boolean {1045if (model == null) return false;1046if (isUserDefinedModel(model)) return true;1047if (isOllamaLLM(model)) return true;1048if (isCustomOpenAI(model)) return true;1049if (isMistralModel(model)) return true;1050if (isGoogleModel(model)) return true;1051return LLM_COST[model ?? ""] != null;1052}10531054export function getMaxTokens(model?: LanguageModel): number {1055// TODO: store max tokens in the model object itself, this is just a fallback1056if (isOllamaLLM(model)) return 8192;1057if (isMistralModel(model)) return 4096; // TODO: check with MistralAI1058return LLM_COST[model ?? ""]?.max_tokens ?? 4096;1059}10601061export interface LLMCost {1062prompt_tokens: number;1063completion_tokens: number;1064}10651066export function getLLMCost(1067model: LanguageModelCore,1068markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.31069): LLMCost {1070const x = LLM_COST[model];1071if (x == null) {1072throw Error(`unknown model "${model}"`);1073}1074const { prompt_tokens, completion_tokens } = x;1075if (markup_percentage < 0) {1076throw Error("markup percentage can't be negative");1077}1078const f = 1 + markup_percentage / 100;1079return {1080prompt_tokens: prompt_tokens * f,1081completion_tokens: completion_tokens * f,1082};1083}10841085const priceRangeCache = new LRU<string, ReturnType<typeof getLLMPriceRange>>({1086max: 10,1087});10881089export function getLLMPriceRange(1090prompt: number,1091output: number,1092markup_percentage: number,1093): { min: number; max: number } {1094const cacheKey = `${prompt}::${output}::${markup_percentage}`;1095const cached = priceRangeCache.get(cacheKey);1096if (cached) return cached;10971098let min = Infinity;1099let max = 0;1100for (const key in LLM_COST) {1101const model = LLM_COST[key];1102if (!model || isFreeModel(key, true)) continue;1103const { prompt_tokens, completion_tokens } = getLLMCost(1104key as LanguageModelCore,1105markup_percentage,1106);1107const p = prompt * prompt_tokens + output * completion_tokens;11081109min = Math.min(min, p);1110max = Math.max(max, p);1111}1112const ret = { min, max };1113priceRangeCache.set(cacheKey, ret);1114return ret;1115}11161117// The maximum cost for one single call using the given model.1118// We can't know the cost until after it happens, so this bound is useful for1119// ensuring user can afford to make a call.1120export function getMaxCost(1121model: LanguageModelCore,1122markup_percentage: number,1123): number {1124const { prompt_tokens, completion_tokens } = getLLMCost(1125model,1126markup_percentage,1127);1128const { max_tokens } = LLM_COST[model];1129return Math.max(prompt_tokens, completion_tokens) * max_tokens;1130}11311132/**1133* Initially, we just had one system promt for all LLMs.1134* This was tuned for the ChatGPTs by OpenAI, but breaks down for others.1135* For example, Gemini and Mistral are confused by mentioning "CoCalc" and insert code cells for all kinds of questions.1136*/1137export function getSystemPrompt(1138model: LanguageModel,1139_path: string | undefined,1140) {1141// TODO: for now, path is ignored. We might want to use it to customize the prompt in the future.1142const common = "Be brief.";1143const math = "Enclose any math formulas in $.";11441145if (1146model2vendor(model).name === "openai" ||1147model.startsWith(OPENAI_PREFIX)1148) {1149const mdCode =1150"Include the language directly after the triple backticks in all markdown code blocks.";1151return `Assume full access to CoCalc and using CoCalc right now.\n${mdCode}\n${math}\n${common}`;1152}11531154// mistral stupidly inserts anything mentioned in the prompt as examples, always.1155if (1156model2vendor(model).name === "mistralai" ||1157model.startsWith(MISTRAL_PREFIX)1158) {1159return common;1160}11611162if (1163model2vendor(model).name === "google" ||1164model.startsWith(GOOGLE_PREFIX)1165) {1166return `${math}\n${common}`;1167}11681169if (1170model2vendor(model).name === "ollama" ||1171model.startsWith(OLLAMA_PREFIX)1172) {1173return `${common}`;1174}11751176if (1177model2vendor(model).name === "anthropic" ||1178model.startsWith(ANTHROPIC_PREFIX)1179) {1180return `${math}\n${common}`;1181}11821183const mdCode = `Any code blocks in triple backticks should mention the language after the first backticks. For example \`\`\`python\nprint("Hello, World!")\n\`\`\``;1184return `${mdCode}\n${math}\n${common}`;1185}118611871188