CoCalc -- llm.ts

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/util/db-schema/llm.ts
Views: ⁶⁸⁷
1
// NOTE: this is not just OpenAI, but also includes other models that we use
2
// Mentally, just ignore "openai" and instead focus on "gpt-*" or "codey" or whatever they are called.
3
// TODO: refactor this, the names of the tables, etc. to be more generic.
4

5
import { History } from "@cocalc/util/types/llm";
6
import { CREATED_BY, ID } from "./crm";
7
import { SCHEMA as schema } from "./index";
8
import { LanguageModel } from "./llm-utils";
9
import { Table } from "./types";
10

11
export interface LLMLogEntry {
12
  id: number;
13
  account_id?: string;
14
  analytics_cookie?: string; // at least one of analytics_cookie or account_id will be set
15
  expire?: Date;
16
  history?: History;
17
  input: string;
18
  model?: LanguageModel;
19
  output: string;
20
  path?: string;
21
  project_id?: string;
22
  prompt_tokens: number;
23
  system?: string;
24
  tag?: string; // useful for keeping track of where queries come frome when doing analytics later
25
  time: Date;
26
  total_time_s: number; // how long the request took in s
27
  total_tokens: number;
28
}
29

30
Table({
31
  name: "openai_chatgpt_log", // historically a wrong name, don't change it
32
  fields: {
33
    id: ID,
34
    time: { type: "timestamp", desc: "When this particular chat happened." },
35
    analytics_cookie: {
36
      title: "Analytics Cookie",
37
      type: "string",
38
      desc: "The analytics cookie for the user that asked this question.",
39
    },
40
    account_id: CREATED_BY,
41
    system: {
42
      title: "System Context",
43
      type: "string",
44
      desc: "System context prompt.",
45
      render: {
46
        type: "markdown",
47
      },
48
    },
49
    input: {
50
      title: "Input",
51
      type: "string",
52
      desc: "Input text that was sent to chatgpt",
53
      render: {
54
        type: "markdown",
55
      },
56
    },
57
    output: {
58
      title: "Output",
59
      type: "string",
60
      desc: "Output text that was returned from chatgpt",
61
      render: {
62
        type: "markdown",
63
      },
64
    },
65
    history: {
66
      title: "History",
67
      type: "array",
68
      pg_type: "JSONB[]",
69
      desc: "Historical context for this thread of discussion",
70
      render: {
71
        type: "json",
72
      },
73
    },
74
    total_tokens: {
75
      type: "integer",
76
      desc: "The total number of tokens involved in this API call.",
77
    },
78
    prompt_tokens: {
79
      type: "integer",
80
      desc: "The number of tokens in the prompt.",
81
    },
82
    total_time_s: {
83
      type: "number",
84
      desc: "Total amount of time the API call took in seconds.",
85
    },
86
    project_id: {
87
      type: "uuid",
88
      render: { type: "project_link" },
89
    },
90
    path: {
91
      type: "string",
92
    },
93
    expire: {
94
      type: "timestamp",
95
      desc: "optional future date, when the entry will be deleted",
96
    },
97
    model: {
98
      type: "string",
99
    },
100
    tag: {
101
      type: "string",
102
      desc: "A string that the client can include that is useful for analytics later",
103
    },
104
  },
105
  rules: {
106
    desc: "Language Model Log",
107
    primary_key: "id",
108
    pg_indexes: ["account_id", "analytics_cookie", "time"],
109
    user_query: {
110
      get: {
111
        pg_where: [{ "account_id = $::UUID": "account_id" }],
112
        fields: {
113
          id: null,
114
          time: null,
115
          account_id: null,
116
          input: null,
117
          system: null,
118
          output: null,
119
          total_tokens: null,
120
          prompt_tokens: null,
121
          total_time_s: null,
122
          project_id: null,
123
          path: null,
124
          history: null,
125
          expire: null,
126
          model: null,
127
          tag: null,
128
        },
129
      },
130
      set: {
131
        // this is so that a user can expire any chats they wanted to have expunged from
132
        // the system completely.
133
        fields: {
134
          account_id: "account_id",
135
          id: true,
136
          expire: true,
137
        },
138
      },
139
    },
140
  },
141
});
142

143
Table({
144
  name: "crm_openai_chatgpt_log",
145
  rules: {
146
    virtual: "openai_chatgpt_log",
147
    primary_key: "id",
148
    user_query: {
149
      get: {
150
        pg_where: [],
151
        admin: true,
152
        fields: {
153
          id: null,
154
          time: null,
155
          account_id: null,
156
          analytics_cookie: null,
157
          input: null,
158
          system: null,
159
          output: null,
160
          total_tokens: null,
161
          prompt_tokens: null,
162
          total_time_s: null,
163
          project_id: null,
164
          path: null,
165
          history: null,
166
          model: null,
167
          tag: null,
168
        },
169
      },
170
    },
171
  },
172
  fields: schema.openai_chatgpt_log.fields,
173
});
174

175
export interface EmbeddingData {
176
  id: string; // fragment id, i.e., exactly what is after the # in the url
177
  text?: string; // test that is embedded using a model
178
  meta?: object; // extra metadata
179
  hash?: string; // hash that is used to know when we need to update the point; e.g., hash of text and meta.
180
}
181

182
// *technical* limit is 8K tokens, but there's no good reason for a search to be really longthere's no good reason for a search to be really long,
183
// and it could be costly.
184
export const MAX_SEARCH_TEXT = 4000;
185
// Limit on the number of outputs when doing a search.  This should stay under 10MB total,
186
// to avoid message size limits. Use paging for more, which app client automatically does.
187
export const MAX_SEARCH_LIMIT = 200;
188

189
// Maximum number of distinct embeddings that a single client can save at once.
190
// The app client itself will automatically chunk the saves at this size.
191
export const MAX_SAVE_LIMIT = 50;
192
// Similar limit on removing items; can be larger since no vector embedding computation, etc.
193
export const MAX_REMOVE_LIMIT = 100;
194
// See https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
195
export const MAX_EMBEDDINGS_TOKENS = 8191;
196

197
Table({
198
  name: "openai_embedding_log",
199
  fields: {
200
    id: ID,
201
    time: { type: "timestamp", desc: "When this particular chat happened." },
202
    account_id: CREATED_BY,
203
    tokens: {
204
      type: "integer",
205
      desc: "The total number of tokens of the input.",
206
    },
207
    model: {
208
      type: "string",
209
      desc: "The model that was used; if left blank it is assumed to be text-embedding-ada-002.",
210
    },
211
  },
212
  rules: {
213
    desc: "OpenAI Vector Embedding Log.  This logs who is responsible for calls to openai.  It is used to avoid abuse, have good analytics, and may eventually be used for pay-as-you-go, etc.",
214
    primary_key: "id",
215
    pg_indexes: ["((tokens IS NOT NULL))"],
216
  },
217
});
218

219
Table({
220
  name: "openai_embedding_cache",
221
  fields: {
222
    input_sha1: {
223
      title: "Sha1 hash of input",
224
      type: "string",
225
      pg_type: "char(40)",
226
    },
227
    vector: {
228
      type: "array",
229
      pg_type: "double precision[]",
230
      desc: "The vector obtained from openai.",
231
    },
232
    model: {
233
      type: "string",
234
      desc: "The model that was used; if left blank it is assumed to be text-embedding-ada-002.",
235
    },
236
    expire: {
237
      type: "timestamp",
238
      desc: "Date when the cache entry will be deleted.  Some entries correspond to queries users type, so may be very frequent, or content in shared notebooks (e.g., students in class), so caching is very valuable when it is actively happening.  Others don't get accessed, so we free up the space.",
239
    },
240
  },
241
  rules: {
242
    desc: "OpenAI Vector Embedding Cache.  This is a cache of embeddings that we computed using openai.  It helps us avoid having to recompute embeddings, which costs money and takes time.  It is only used as a CACHE by our system.  This entire table could be deleted at any time, and the only impact is that some things may be slower and we may have to pay to recompute embeddings, but nothing should *break*.",
243
    primary_key: "input_sha1",
244
    pg_indexes: ["((vector IS NOT NULL))"],
245
  },
246
});
247

248
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

Product

Resources

Company

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more, all in one place.

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.