Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/frontend/client/llm.ts
Views: 687
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45import { delay } from "awaiting";6import { EventEmitter } from "events";78import { redux } from "@cocalc/frontend/app-framework";9import type { EmbeddingData } from "@cocalc/util/db-schema/llm";10import {11MAX_EMBEDDINGS_TOKENS,12MAX_REMOVE_LIMIT,13MAX_SAVE_LIMIT,14MAX_SEARCH_LIMIT,15} from "@cocalc/util/db-schema/llm";16import {17LanguageModel,18LanguageServiceCore,19getSystemPrompt,20isFreeModel,21model2service,22} from "@cocalc/util/db-schema/llm-utils";23import * as message from "@cocalc/util/message";24import type { WebappClient } from "./client";25import type { History } from "./types";26import {27LOCALIZATIONS,28OTHER_SETTINGS_LOCALE_KEY,29OTHER_SETTINGS_REPLY_ENGLISH_KEY,30} from "@cocalc/util/i18n/const";31import { sanitizeLocale } from "@cocalc/frontend/i18n";3233interface QueryLLMProps {34input: string;35model: LanguageModel;36system?: string;37history?: History;38project_id?: string;39path?: string;40chatStream?: ChatStream; // if given, uses chat stream41tag?: string;42startStreamExplicitly?: boolean;43}4445interface EmbeddingsQuery {46scope: string | string[];47limit: number; // client automatically deals with large limit by making multiple requests (i.e., there is no limit on the limit)48text?: string;49filter?: object;50selector?: { include?: string[]; exclude?: string[] };51offset?: number | string;52}5354export class LLMClient {55private client: WebappClient;5657constructor(client: WebappClient) {58this.client = client;59}6061public async query(opts: QueryLLMProps): Promise<string> {62return await this.queryLanguageModel(opts);63}6465// ATTN/TODO: startExplicitly seems to be broken66public queryStream(opts, startExplicitly = false): ChatStream {67const chatStream = new ChatStream();68(async () => {69try {70await this.queryLanguageModel({ ...opts, chatStream });71if (!startExplicitly) {72chatStream.emit("start");73}74} catch (err) {75chatStream.emit("error", err);76}77})();78return chatStream;79}8081private async queryLanguageModel({82input,83model,84system, // if not set, a default system prompt is used – disable by setting to ""85history,86project_id,87path,88chatStream,89tag = "",90}: QueryLLMProps): Promise<string> {91system ??= getSystemPrompt(model, path);9293// remove all date entries from all history objects94if (history != null) {95for (const h of history) {96delete h.date;97}98}99100if (!redux.getStore("projects").hasLanguageModelEnabled(project_id, tag)) {101throw new Error(102`Language model support is not currently enabled ${103project_id ? "in this project" : "on this server"104}. [tag=${tag}]`,105);106}107108input = input.trim();109if (chatStream == null) {110if (!input || input == "test") {111return "Great! What can I assist you with today?";112}113if (input == "ping") {114await delay(1000);115return "Pong";116}117}118119// append a sentence to request to translate the output to the user's language – unless disabled120const other_settings = redux.getStore("account").get("other_settings");121const alwaysEnglish = !!other_settings.get(122OTHER_SETTINGS_REPLY_ENGLISH_KEY,123);124const locale = sanitizeLocale(125other_settings.get(OTHER_SETTINGS_LOCALE_KEY),126);127if (!alwaysEnglish && locale != "en") {128const lang = LOCALIZATIONS[locale].name; // name is always in english129system = `${system}\n\nYour answer must be written in the language ${lang}.`;130}131132const is_cocalc_com = redux.getStore("customize").get("is_cocalc_com");133134if (!isFreeModel(model, is_cocalc_com)) {135// Ollama and others are treated as "free"136const service = model2service(model) as LanguageServiceCore;137// when client gets non-free openai model request, check if allowed. If not, show quota modal.138const { allowed, reason } =139await this.client.purchases_client.isPurchaseAllowed(service);140141if (!allowed) {142await this.client.purchases_client.quotaModal({143service,144reason,145allowed,146});147}148// Now check again after modal dismissed...149const x = await this.client.purchases_client.isPurchaseAllowed(service);150if (!x.allowed) {151throw Error(reason);152}153}154155// do not import until needed -- it is HUGE!156const {157numTokensUpperBound,158truncateHistory,159truncateMessage,160getMaxTokens,161} = await import("@cocalc/frontend/misc/llm");162163// We always leave some room for output:164const maxTokens = getMaxTokens(model) - 1000;165input = truncateMessage(input, maxTokens);166const n = numTokensUpperBound(input, getMaxTokens(model));167if (n >= maxTokens) {168history = undefined;169} else if (history != null) {170history = truncateHistory(history, maxTokens - n, model);171}172// console.log("chatgpt", { input, system, history, project_id, path });173const mesg = message.chatgpt({174text: input,175system,176project_id,177path,178history,179model,180tag: `app:${tag}`,181stream: chatStream != null,182});183184if (chatStream == null) {185return (await this.client.async_call({ message: mesg })).text;186}187188chatStream.once("start", () => {189// streaming version190this.client.call({191message: mesg,192error_event: true,193cb: (err, resp) => {194if (err) {195chatStream.error(err);196} else {197chatStream.process(resp.text);198}199},200});201});202203return "see stream for output";204}205206public async embeddings_search(207query: EmbeddingsQuery,208): Promise<{ id: string; payload: object }[]> {209let limit = Math.min(MAX_SEARCH_LIMIT, query.limit);210const result = await this.embeddings_search_call({ ...query, limit });211212if (result.length >= MAX_SEARCH_LIMIT) {213// get additional pages214while (true) {215const offset =216query.text == null ? result[result.length - 1].id : result.length;217const page = await this.embeddings_search_call({218...query,219limit,220offset,221});222// Include the new elements223result.push(...page);224if (page.length < MAX_SEARCH_LIMIT) {225// didn't reach the limit, so we're done.226break;227}228}229}230return result;231}232233private async embeddings_search_call({234scope,235limit,236text,237filter,238selector,239offset,240}: EmbeddingsQuery) {241text = text?.trim();242const resp = await this.client.async_call({243message: message.openai_embeddings_search({244scope,245text,246filter,247limit,248selector,249offset,250}),251});252return resp.matches;253}254255public async embeddings_save({256project_id,257path,258data: data0,259}: {260project_id: string;261path: string;262data: EmbeddingData[];263}): Promise<string[]> {264this.assertHasNeuralSearch();265const { truncateMessage } = await import("@cocalc/frontend/misc/llm");266267// Make data be data0, but without mutate data0268// and with any text truncated to fit in the269// embeddings limit.270const data: EmbeddingData[] = [];271for (const x of data0) {272const { text } = x;273if (typeof text != "string") {274throw Error("text must be a string");275}276const text1 = truncateMessage(text, MAX_EMBEDDINGS_TOKENS);277if (text1.length != text.length) {278data.push({ ...x, text: text1 });279} else {280data.push(x);281}282}283284const ids: string[] = [];285let v = data;286while (v.length > 0) {287const resp = await this.client.async_call({288message: message.openai_embeddings_save({289project_id,290path,291data: v.slice(0, MAX_SAVE_LIMIT),292}),293});294ids.push(...resp.ids);295v = v.slice(MAX_SAVE_LIMIT);296}297298return ids;299}300301public async embeddings_remove({302project_id,303path,304data,305}: {306project_id: string;307path: string;308data: EmbeddingData[];309}): Promise<string[]> {310this.assertHasNeuralSearch();311312const ids: string[] = [];313let v = data;314while (v.length > 0) {315const resp = await this.client.async_call({316message: message.openai_embeddings_remove({317project_id,318path,319data: v.slice(0, MAX_REMOVE_LIMIT),320}),321});322ids.push(...resp.ids);323v = v.slice(MAX_REMOVE_LIMIT);324}325326return ids;327}328329neuralSearchIsEnabled(): boolean {330return !!redux.getStore("customize").get("neural_search_enabled");331}332333assertHasNeuralSearch() {334if (!this.neuralSearchIsEnabled()) {335throw Error("OpenAI support is not currently enabled on this server");336}337}338}339340class ChatStream extends EventEmitter {341constructor() {342super();343}344345process(text?: string) {346// emits undefined text when done (or err below)347this.emit("token", text);348}349350error(err) {351this.emit("error", err);352}353}354355export type { ChatStream };356357358