CoCalc -- llm.ts

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/frontend/client/llm.ts
Views: ⁶⁸⁷
1
/*
2
 *  This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
 *  License: MS-RSL – see LICENSE.md for details
4
 */
5

6
import { delay } from "awaiting";
7
import { EventEmitter } from "events";
8

9
import { redux } from "@cocalc/frontend/app-framework";
10
import type { EmbeddingData } from "@cocalc/util/db-schema/llm";
11
import {
12
  MAX_EMBEDDINGS_TOKENS,
13
  MAX_REMOVE_LIMIT,
14
  MAX_SAVE_LIMIT,
15
  MAX_SEARCH_LIMIT,
16
} from "@cocalc/util/db-schema/llm";
17
import {
18
  LanguageModel,
19
  LanguageServiceCore,
20
  getSystemPrompt,
21
  isFreeModel,
22
  model2service,
23
} from "@cocalc/util/db-schema/llm-utils";
24
import * as message from "@cocalc/util/message";
25
import type { WebappClient } from "./client";
26
import type { History } from "./types";
27
import {
28
  LOCALIZATIONS,
29
  OTHER_SETTINGS_LOCALE_KEY,
30
  OTHER_SETTINGS_REPLY_ENGLISH_KEY,
31
} from "@cocalc/util/i18n/const";
32
import { sanitizeLocale } from "@cocalc/frontend/i18n";
33

34
interface QueryLLMProps {
35
  input: string;
36
  model: LanguageModel;
37
  system?: string;
38
  history?: History;
39
  project_id?: string;
40
  path?: string;
41
  chatStream?: ChatStream; // if given, uses chat stream
42
  tag?: string;
43
  startStreamExplicitly?: boolean;
44
}
45

46
interface EmbeddingsQuery {
47
  scope: string | string[];
48
  limit: number; // client automatically deals with large limit by making multiple requests (i.e., there is no limit on the limit)
49
  text?: string;
50
  filter?: object;
51
  selector?: { include?: string[]; exclude?: string[] };
52
  offset?: number | string;
53
}
54

55
export class LLMClient {
56
  private client: WebappClient;
57

58
  constructor(client: WebappClient) {
59
    this.client = client;
60
  }
61

62
  public async query(opts: QueryLLMProps): Promise<string> {
63
    return await this.queryLanguageModel(opts);
64
  }
65

66
  // ATTN/TODO: startExplicitly seems to be broken
67
  public queryStream(opts, startExplicitly = false): ChatStream {
68
    const chatStream = new ChatStream();
69
    (async () => {
70
      try {
71
        await this.queryLanguageModel({ ...opts, chatStream });
72
        if (!startExplicitly) {
73
          chatStream.emit("start");
74
        }
75
      } catch (err) {
76
        chatStream.emit("error", err);
77
      }
78
    })();
79
    return chatStream;
80
  }
81

82
  private async queryLanguageModel({
83
    input,
84
    model,
85
    system, // if not set, a default system prompt is used – disable by setting to ""
86
    history,
87
    project_id,
88
    path,
89
    chatStream,
90
    tag = "",
91
  }: QueryLLMProps): Promise<string> {
92
    system ??= getSystemPrompt(model, path);
93

94
    // remove all date entries from all history objects
95
    if (history != null) {
96
      for (const h of history) {
97
        delete h.date;
98
      }
99
    }
100

101
    if (!redux.getStore("projects").hasLanguageModelEnabled(project_id, tag)) {
102
      throw new Error(
103
        `Language model support is not currently enabled ${
104
          project_id ? "in this project" : "on this server"
105
        }. [tag=${tag}]`,
106
      );
107
    }
108

109
    input = input.trim();
110
    if (chatStream == null) {
111
      if (!input || input == "test") {
112
        return "Great! What can I assist you with today?";
113
      }
114
      if (input == "ping") {
115
        await delay(1000);
116
        return "Pong";
117
      }
118
    }
119

120
    // append a sentence to request to translate the output to the user's language – unless disabled
121
    const other_settings = redux.getStore("account").get("other_settings");
122
    const alwaysEnglish = !!other_settings.get(
123
      OTHER_SETTINGS_REPLY_ENGLISH_KEY,
124
    );
125
    const locale = sanitizeLocale(
126
      other_settings.get(OTHER_SETTINGS_LOCALE_KEY),
127
    );
128
    if (!alwaysEnglish && locale != "en") {
129
      const lang = LOCALIZATIONS[locale].name; // name is always in english
130
      system = `${system}\n\nYour answer must be written in the language ${lang}.`;
131
    }
132

133
    const is_cocalc_com = redux.getStore("customize").get("is_cocalc_com");
134

135
    if (!isFreeModel(model, is_cocalc_com)) {
136
      // Ollama and others are treated as "free"
137
      const service = model2service(model) as LanguageServiceCore;
138
      // when client gets non-free openai model request, check if allowed.  If not, show quota modal.
139
      const { allowed, reason } =
140
        await this.client.purchases_client.isPurchaseAllowed(service);
141

142
      if (!allowed) {
143
        await this.client.purchases_client.quotaModal({
144
          service,
145
          reason,
146
          allowed,
147
        });
148
      }
149
      // Now check again after modal dismissed...
150
      const x = await this.client.purchases_client.isPurchaseAllowed(service);
151
      if (!x.allowed) {
152
        throw Error(reason);
153
      }
154
    }
155

156
    // do not import until needed -- it is HUGE!
157
    const {
158
      numTokensUpperBound,
159
      truncateHistory,
160
      truncateMessage,
161
      getMaxTokens,
162
    } = await import("@cocalc/frontend/misc/llm");
163

164
    // We always leave some room for output:
165
    const maxTokens = getMaxTokens(model) - 1000;
166
    input = truncateMessage(input, maxTokens);
167
    const n = numTokensUpperBound(input, getMaxTokens(model));
168
    if (n >= maxTokens) {
169
      history = undefined;
170
    } else if (history != null) {
171
      history = truncateHistory(history, maxTokens - n, model);
172
    }
173
    // console.log("chatgpt", { input, system, history, project_id, path });
174
    const mesg = message.chatgpt({
175
      text: input,
176
      system,
177
      project_id,
178
      path,
179
      history,
180
      model,
181
      tag: `app:${tag}`,
182
      stream: chatStream != null,
183
    });
184

185
    if (chatStream == null) {
186
      return (await this.client.async_call({ message: mesg })).text;
187
    }
188

189
    chatStream.once("start", () => {
190
      // streaming version
191
      this.client.call({
192
        message: mesg,
193
        error_event: true,
194
        cb: (err, resp) => {
195
          if (err) {
196
            chatStream.error(err);
197
          } else {
198
            chatStream.process(resp.text);
199
          }
200
        },
201
      });
202
    });
203

204
    return "see stream for output";
205
  }
206

207
  public async embeddings_search(
208
    query: EmbeddingsQuery,
209
  ): Promise<{ id: string; payload: object }[]> {
210
    let limit = Math.min(MAX_SEARCH_LIMIT, query.limit);
211
    const result = await this.embeddings_search_call({ ...query, limit });
212

213
    if (result.length >= MAX_SEARCH_LIMIT) {
214
      // get additional pages
215
      while (true) {
216
        const offset =
217
          query.text == null ? result[result.length - 1].id : result.length;
218
        const page = await this.embeddings_search_call({
219
          ...query,
220
          limit,
221
          offset,
222
        });
223
        // Include the new elements
224
        result.push(...page);
225
        if (page.length < MAX_SEARCH_LIMIT) {
226
          // didn't reach the limit, so we're done.
227
          break;
228
        }
229
      }
230
    }
231
    return result;
232
  }
233

234
  private async embeddings_search_call({
235
    scope,
236
    limit,
237
    text,
238
    filter,
239
    selector,
240
    offset,
241
  }: EmbeddingsQuery) {
242
    text = text?.trim();
243
    const resp = await this.client.async_call({
244
      message: message.openai_embeddings_search({
245
        scope,
246
        text,
247
        filter,
248
        limit,
249
        selector,
250
        offset,
251
      }),
252
    });
253
    return resp.matches;
254
  }
255

256
  public async embeddings_save({
257
    project_id,
258
    path,
259
    data: data0,
260
  }: {
261
    project_id: string;
262
    path: string;
263
    data: EmbeddingData[];
264
  }): Promise<string[]> {
265
    this.assertHasNeuralSearch();
266
    const { truncateMessage } = await import("@cocalc/frontend/misc/llm");
267

268
    // Make data be data0, but without mutate data0
269
    // and with any text truncated to fit in the
270
    // embeddings limit.
271
    const data: EmbeddingData[] = [];
272
    for (const x of data0) {
273
      const { text } = x;
274
      if (typeof text != "string") {
275
        throw Error("text must be a string");
276
      }
277
      const text1 = truncateMessage(text, MAX_EMBEDDINGS_TOKENS);
278
      if (text1.length != text.length) {
279
        data.push({ ...x, text: text1 });
280
      } else {
281
        data.push(x);
282
      }
283
    }
284

285
    const ids: string[] = [];
286
    let v = data;
287
    while (v.length > 0) {
288
      const resp = await this.client.async_call({
289
        message: message.openai_embeddings_save({
290
          project_id,
291
          path,
292
          data: v.slice(0, MAX_SAVE_LIMIT),
293
        }),
294
      });
295
      ids.push(...resp.ids);
296
      v = v.slice(MAX_SAVE_LIMIT);
297
    }
298

299
    return ids;
300
  }
301

302
  public async embeddings_remove({
303
    project_id,
304
    path,
305
    data,
306
  }: {
307
    project_id: string;
308
    path: string;
309
    data: EmbeddingData[];
310
  }): Promise<string[]> {
311
    this.assertHasNeuralSearch();
312

313
    const ids: string[] = [];
314
    let v = data;
315
    while (v.length > 0) {
316
      const resp = await this.client.async_call({
317
        message: message.openai_embeddings_remove({
318
          project_id,
319
          path,
320
          data: v.slice(0, MAX_REMOVE_LIMIT),
321
        }),
322
      });
323
      ids.push(...resp.ids);
324
      v = v.slice(MAX_REMOVE_LIMIT);
325
    }
326

327
    return ids;
328
  }
329

330
  neuralSearchIsEnabled(): boolean {
331
    return !!redux.getStore("customize").get("neural_search_enabled");
332
  }
333

334
  assertHasNeuralSearch() {
335
    if (!this.neuralSearchIsEnabled()) {
336
      throw Error("OpenAI support is not currently enabled on this server");
337
    }
338
  }
339
}
340

341
class ChatStream extends EventEmitter {
342
  constructor() {
343
    super();
344
  }
345

346
  process(text?: string) {
347
    // emits undefined text when done (or err below)
348
    this.emit("token", text);
349
  }
350

351
  error(err) {
352
    this.emit("error", err);
353
  }
354
}
355

356
export type { ChatStream };
357

358
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

Product

Resources

Company

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more, all in one place.

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.