CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/frontend/client/llm.ts
Views: 687
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import { delay } from "awaiting";
7
import { EventEmitter } from "events";
8
9
import { redux } from "@cocalc/frontend/app-framework";
10
import type { EmbeddingData } from "@cocalc/util/db-schema/llm";
11
import {
12
MAX_EMBEDDINGS_TOKENS,
13
MAX_REMOVE_LIMIT,
14
MAX_SAVE_LIMIT,
15
MAX_SEARCH_LIMIT,
16
} from "@cocalc/util/db-schema/llm";
17
import {
18
LanguageModel,
19
LanguageServiceCore,
20
getSystemPrompt,
21
isFreeModel,
22
model2service,
23
} from "@cocalc/util/db-schema/llm-utils";
24
import * as message from "@cocalc/util/message";
25
import type { WebappClient } from "./client";
26
import type { History } from "./types";
27
import {
28
LOCALIZATIONS,
29
OTHER_SETTINGS_LOCALE_KEY,
30
OTHER_SETTINGS_REPLY_ENGLISH_KEY,
31
} from "@cocalc/util/i18n/const";
32
import { sanitizeLocale } from "@cocalc/frontend/i18n";
33
34
interface QueryLLMProps {
35
input: string;
36
model: LanguageModel;
37
system?: string;
38
history?: History;
39
project_id?: string;
40
path?: string;
41
chatStream?: ChatStream; // if given, uses chat stream
42
tag?: string;
43
startStreamExplicitly?: boolean;
44
}
45
46
interface EmbeddingsQuery {
47
scope: string | string[];
48
limit: number; // client automatically deals with large limit by making multiple requests (i.e., there is no limit on the limit)
49
text?: string;
50
filter?: object;
51
selector?: { include?: string[]; exclude?: string[] };
52
offset?: number | string;
53
}
54
55
export class LLMClient {
56
private client: WebappClient;
57
58
constructor(client: WebappClient) {
59
this.client = client;
60
}
61
62
public async query(opts: QueryLLMProps): Promise<string> {
63
return await this.queryLanguageModel(opts);
64
}
65
66
// ATTN/TODO: startExplicitly seems to be broken
67
public queryStream(opts, startExplicitly = false): ChatStream {
68
const chatStream = new ChatStream();
69
(async () => {
70
try {
71
await this.queryLanguageModel({ ...opts, chatStream });
72
if (!startExplicitly) {
73
chatStream.emit("start");
74
}
75
} catch (err) {
76
chatStream.emit("error", err);
77
}
78
})();
79
return chatStream;
80
}
81
82
private async queryLanguageModel({
83
input,
84
model,
85
system, // if not set, a default system prompt is used – disable by setting to ""
86
history,
87
project_id,
88
path,
89
chatStream,
90
tag = "",
91
}: QueryLLMProps): Promise<string> {
92
system ??= getSystemPrompt(model, path);
93
94
// remove all date entries from all history objects
95
if (history != null) {
96
for (const h of history) {
97
delete h.date;
98
}
99
}
100
101
if (!redux.getStore("projects").hasLanguageModelEnabled(project_id, tag)) {
102
throw new Error(
103
`Language model support is not currently enabled ${
104
project_id ? "in this project" : "on this server"
105
}. [tag=${tag}]`,
106
);
107
}
108
109
input = input.trim();
110
if (chatStream == null) {
111
if (!input || input == "test") {
112
return "Great! What can I assist you with today?";
113
}
114
if (input == "ping") {
115
await delay(1000);
116
return "Pong";
117
}
118
}
119
120
// append a sentence to request to translate the output to the user's language – unless disabled
121
const other_settings = redux.getStore("account").get("other_settings");
122
const alwaysEnglish = !!other_settings.get(
123
OTHER_SETTINGS_REPLY_ENGLISH_KEY,
124
);
125
const locale = sanitizeLocale(
126
other_settings.get(OTHER_SETTINGS_LOCALE_KEY),
127
);
128
if (!alwaysEnglish && locale != "en") {
129
const lang = LOCALIZATIONS[locale].name; // name is always in english
130
system = `${system}\n\nYour answer must be written in the language ${lang}.`;
131
}
132
133
const is_cocalc_com = redux.getStore("customize").get("is_cocalc_com");
134
135
if (!isFreeModel(model, is_cocalc_com)) {
136
// Ollama and others are treated as "free"
137
const service = model2service(model) as LanguageServiceCore;
138
// when client gets non-free openai model request, check if allowed. If not, show quota modal.
139
const { allowed, reason } =
140
await this.client.purchases_client.isPurchaseAllowed(service);
141
142
if (!allowed) {
143
await this.client.purchases_client.quotaModal({
144
service,
145
reason,
146
allowed,
147
});
148
}
149
// Now check again after modal dismissed...
150
const x = await this.client.purchases_client.isPurchaseAllowed(service);
151
if (!x.allowed) {
152
throw Error(reason);
153
}
154
}
155
156
// do not import until needed -- it is HUGE!
157
const {
158
numTokensUpperBound,
159
truncateHistory,
160
truncateMessage,
161
getMaxTokens,
162
} = await import("@cocalc/frontend/misc/llm");
163
164
// We always leave some room for output:
165
const maxTokens = getMaxTokens(model) - 1000;
166
input = truncateMessage(input, maxTokens);
167
const n = numTokensUpperBound(input, getMaxTokens(model));
168
if (n >= maxTokens) {
169
history = undefined;
170
} else if (history != null) {
171
history = truncateHistory(history, maxTokens - n, model);
172
}
173
// console.log("chatgpt", { input, system, history, project_id, path });
174
const mesg = message.chatgpt({
175
text: input,
176
system,
177
project_id,
178
path,
179
history,
180
model,
181
tag: `app:${tag}`,
182
stream: chatStream != null,
183
});
184
185
if (chatStream == null) {
186
return (await this.client.async_call({ message: mesg })).text;
187
}
188
189
chatStream.once("start", () => {
190
// streaming version
191
this.client.call({
192
message: mesg,
193
error_event: true,
194
cb: (err, resp) => {
195
if (err) {
196
chatStream.error(err);
197
} else {
198
chatStream.process(resp.text);
199
}
200
},
201
});
202
});
203
204
return "see stream for output";
205
}
206
207
public async embeddings_search(
208
query: EmbeddingsQuery,
209
): Promise<{ id: string; payload: object }[]> {
210
let limit = Math.min(MAX_SEARCH_LIMIT, query.limit);
211
const result = await this.embeddings_search_call({ ...query, limit });
212
213
if (result.length >= MAX_SEARCH_LIMIT) {
214
// get additional pages
215
while (true) {
216
const offset =
217
query.text == null ? result[result.length - 1].id : result.length;
218
const page = await this.embeddings_search_call({
219
...query,
220
limit,
221
offset,
222
});
223
// Include the new elements
224
result.push(...page);
225
if (page.length < MAX_SEARCH_LIMIT) {
226
// didn't reach the limit, so we're done.
227
break;
228
}
229
}
230
}
231
return result;
232
}
233
234
private async embeddings_search_call({
235
scope,
236
limit,
237
text,
238
filter,
239
selector,
240
offset,
241
}: EmbeddingsQuery) {
242
text = text?.trim();
243
const resp = await this.client.async_call({
244
message: message.openai_embeddings_search({
245
scope,
246
text,
247
filter,
248
limit,
249
selector,
250
offset,
251
}),
252
});
253
return resp.matches;
254
}
255
256
public async embeddings_save({
257
project_id,
258
path,
259
data: data0,
260
}: {
261
project_id: string;
262
path: string;
263
data: EmbeddingData[];
264
}): Promise<string[]> {
265
this.assertHasNeuralSearch();
266
const { truncateMessage } = await import("@cocalc/frontend/misc/llm");
267
268
// Make data be data0, but without mutate data0
269
// and with any text truncated to fit in the
270
// embeddings limit.
271
const data: EmbeddingData[] = [];
272
for (const x of data0) {
273
const { text } = x;
274
if (typeof text != "string") {
275
throw Error("text must be a string");
276
}
277
const text1 = truncateMessage(text, MAX_EMBEDDINGS_TOKENS);
278
if (text1.length != text.length) {
279
data.push({ ...x, text: text1 });
280
} else {
281
data.push(x);
282
}
283
}
284
285
const ids: string[] = [];
286
let v = data;
287
while (v.length > 0) {
288
const resp = await this.client.async_call({
289
message: message.openai_embeddings_save({
290
project_id,
291
path,
292
data: v.slice(0, MAX_SAVE_LIMIT),
293
}),
294
});
295
ids.push(...resp.ids);
296
v = v.slice(MAX_SAVE_LIMIT);
297
}
298
299
return ids;
300
}
301
302
public async embeddings_remove({
303
project_id,
304
path,
305
data,
306
}: {
307
project_id: string;
308
path: string;
309
data: EmbeddingData[];
310
}): Promise<string[]> {
311
this.assertHasNeuralSearch();
312
313
const ids: string[] = [];
314
let v = data;
315
while (v.length > 0) {
316
const resp = await this.client.async_call({
317
message: message.openai_embeddings_remove({
318
project_id,
319
path,
320
data: v.slice(0, MAX_REMOVE_LIMIT),
321
}),
322
});
323
ids.push(...resp.ids);
324
v = v.slice(MAX_REMOVE_LIMIT);
325
}
326
327
return ids;
328
}
329
330
neuralSearchIsEnabled(): boolean {
331
return !!redux.getStore("customize").get("neural_search_enabled");
332
}
333
334
assertHasNeuralSearch() {
335
if (!this.neuralSearchIsEnabled()) {
336
throw Error("OpenAI support is not currently enabled on this server");
337
}
338
}
339
}
340
341
class ChatStream extends EventEmitter {
342
constructor() {
343
super();
344
}
345
346
process(text?: string) {
347
// emits undefined text when done (or err below)
348
this.emit("token", text);
349
}
350
351
error(err) {
352
this.emit("error", err);
353
}
354
}
355
356
export type { ChatStream };
357
358