CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/util/db-schema/projects.ts
Views: 687
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import { State } from "@cocalc/util/compute-states";
7
import { PurchaseInfo } from "@cocalc/util/licenses/purchase/types";
8
import { deep_copy } from "@cocalc/util/misc";
9
import {
10
ExecuteCodeOptions,
11
ExecuteCodeOptionsAsyncGet,
12
ExecuteCodeOutput,
13
} from "@cocalc/util/types/execute-code";
14
import { DEFAULT_QUOTAS } from "@cocalc/util/upgrade-spec";
15
16
import { NOTES } from "./crm";
17
import { FALLBACK_COMPUTE_IMAGE } from "./defaults";
18
import { SCHEMA as schema } from "./index";
19
import { Table } from "./types";
20
21
export const MAX_FILENAME_SEARCH_RESULTS = 100;
22
23
Table({
24
name: "projects",
25
rules: {
26
primary_key: "project_id",
27
//# A lot depends on this being right at all times, e.g., restart state,
28
//# so do not use db_standby yet.
29
//# It is simply not robust enough.
30
//# db_standby : 'safer'
31
32
pg_indexes: [
33
"last_edited",
34
"created", // TODO: this could have a fillfactor of 100
35
"USING GIN (users)", // so get_collaborator_ids is fast
36
"lti_id",
37
"USING GIN (state)", // so getting all running projects is fast (e.g. for site_license_usage_log... but also manage-state)
38
"((state #>> '{state}'))", // projecting the "state" (running, etc.) for its own index – the GIN index above still causes a scan, which we want to avoid.
39
"((state ->> 'state'))", // same reason as above. both syntaxes appear and we have to index both.
40
"((state IS NULL))", // not covered by the above
41
"((settings ->> 'always_running'))", // to quickly know which projects have this setting
42
"((run_quota ->> 'always_running'))", // same reason as above
43
"deleted", // in various queries we quickly fiter deleted projects
44
"site_license", // for queries across projects related to site_license#>>{license_id}
45
],
46
47
crm_indexes: ["last_edited"],
48
49
user_query: {
50
get: {
51
pg_where: ["last_edited >= NOW() - interval '21 days'", "projects"],
52
pg_where_load: ["last_edited >= NOW() - interval '2 days'", "projects"],
53
options: [{ limit: 100, order_by: "-last_edited" }],
54
options_load: [{ limit: 15, order_by: "-last_edited" }],
55
pg_changefeed: "projects",
56
throttle_changes: 2000,
57
fields: {
58
project_id: null,
59
name: null,
60
title: "",
61
description: "",
62
users: {},
63
invite: null, // who has been invited to this project via email
64
invite_requests: null, // who has requested to be invited
65
deleted: null,
66
host: null,
67
settings: DEFAULT_QUOTAS,
68
run_quota: null,
69
site_license: null,
70
status: null,
71
state: null,
72
last_edited: null,
73
last_active: null,
74
action_request: null, // last requested action -- {action:?, time:?, started:?, finished:?, err:?}
75
course: null,
76
// if the value is not set, we have to use the old default prior to summer 2020 (Ubuntu 18.04, not 20.04!)
77
compute_image: FALLBACK_COMPUTE_IMAGE,
78
created: null,
79
env: null,
80
sandbox: null,
81
avatar_image_tiny: null,
82
// do NOT add avatar_image_full here or it will get included in changefeeds, which we don't want.
83
// instead it gets its own virtual table.
84
pay_as_you_go_quotas: null,
85
},
86
},
87
set: {
88
// NOTE: for security reasons users CANNOT set the course field via a user query;
89
// instead use the api/v2/projects/course/set-course-field api endpoint.
90
fields: {
91
project_id: "project_write",
92
title: true,
93
name: true,
94
description: true,
95
deleted: true,
96
invite_requests: true, // project collabs can modify this (e.g., to remove from it once user added or rejected)
97
users(obj, db, account_id) {
98
return db._user_set_query_project_users(obj, account_id);
99
},
100
action_request: true, // used to request that an action be performed, e.g., "save"; handled by before_change
101
compute_image: true,
102
site_license: true,
103
env: true,
104
sandbox: true,
105
avatar_image_tiny: true,
106
avatar_image_full: true,
107
},
108
required_fields: {
109
project_id: true,
110
},
111
before_change(database, old_val, new_val, account_id, cb) {
112
database._user_set_query_project_change_before(
113
old_val,
114
new_val,
115
account_id,
116
cb,
117
);
118
},
119
120
on_change(database, old_val, new_val, account_id, cb) {
121
database._user_set_query_project_change_after(
122
old_val,
123
new_val,
124
account_id,
125
cb,
126
);
127
},
128
},
129
},
130
131
project_query: {
132
get: {
133
pg_where: [{ "project_id = $::UUID": "project_id" }],
134
fields: {
135
project_id: null,
136
title: null,
137
description: null,
138
status: null,
139
},
140
},
141
set: {
142
fields: {
143
project_id: "project_id",
144
title: true,
145
description: true,
146
status: true,
147
},
148
},
149
},
150
},
151
fields: {
152
project_id: {
153
type: "uuid",
154
desc: "The project id, which is the primary key that determines the project.",
155
},
156
name: {
157
type: "string",
158
pg_type: "VARCHAR(100)",
159
desc: "The optional name of this project. Must be globally unique (up to case) across all projects with a given *owner*. It can be between 1 and 100 characters from a-z A-Z 0-9 period and dash.",
160
render: { type: "text", maxLen: 100, editable: true },
161
},
162
title: {
163
type: "string",
164
desc: "The short title of the project. Should use no special formatting, except hashtags.",
165
render: { type: "project_link", project_id: "project_id" },
166
},
167
description: {
168
type: "string",
169
desc: "A longer textual description of the project. This can include hashtags and should be formatted using markdown.",
170
render: {
171
type: "markdown",
172
maxLen: 1024,
173
editable: true,
174
},
175
}, // markdown rendering possibly not implemented
176
users: {
177
title: "Collaborators",
178
type: "map",
179
desc: "This is a map from account_id's to {hide:bool, group:'owner'|'collaborator', upgrades:{memory:1000, ...}, ssh:{...}}.",
180
render: { type: "usersmap", editable: true },
181
},
182
invite: {
183
type: "map",
184
desc: "Map from email addresses to {time:when invite sent, error:error message if there was one}",
185
date: ["time"],
186
},
187
invite_requests: {
188
type: "map",
189
desc: "This is a map from account_id's to {timestamp:?, message:'i want to join because...'}.",
190
date: ["timestamp"],
191
},
192
deleted: {
193
type: "boolean",
194
desc: "Whether or not this project is deleted.",
195
render: { type: "boolean", editable: true },
196
},
197
host: {
198
type: "map",
199
desc: "This is a map {host:'hostname_of_server', assigned:timestamp of when assigned to that server}.",
200
date: ["assigned"],
201
},
202
settings: {
203
type: "map",
204
desc: 'This is a map that defines the free base quotas that a project has. It is of the form {cores: 1.5, cpu_shares: 768, disk_quota: 1000, memory: 2000, mintime: 36000000, network: 0, ephemeral_state:0, ephemeral_disk:0, always_running:0}. WARNING: some of the values are strings not numbers in the database right now, e.g., disk_quota:"1000".',
205
},
206
site_license: {
207
type: "map",
208
desc: "This is a map that defines upgrades (just when running the project) that come from a site license, and also the licenses that are applied to this project. The format is {license_id:{memory:?, mintime:?, ...}} where the target of the license_id is the same as for the settings field. The license_id is the uuid of the license that contributed these upgrades. To tell cocalc to use a license for a project, a user sets site_license to {license_id:{}}, and when it is requested to start the project, the backend decides what allocation license_id provides and changes the field accordingly, i.e., changes {license_id:{},...} to {license_id:{memory:?,...},...}",
209
},
210
status: {
211
type: "map",
212
desc: "This is a map computed by the status command run inside a project, and slightly enhanced by the compute server, which gives extensive status information about a project. See the exported ProjectStatus interface defined in the code here.",
213
},
214
state: {
215
type: "map",
216
desc: 'Info about the state of this project of the form {error: "", state: "running" (etc), time: timestamp, ip?:"ip address where project is"}, where time is when the state was last computed. See COMPUTE_STATES in the compute-states file for state.state and the ProjectState interface defined below in code.',
217
date: ["time"],
218
},
219
last_edited: {
220
type: "timestamp",
221
desc: "The last time some file was edited in this project. This is the last time that the file_use table was updated for this project.",
222
},
223
last_started: {
224
type: "timestamp",
225
desc: "The last time the project started running.",
226
},
227
last_active: {
228
type: "map",
229
desc: "Map from account_id's to the timestamp of when the user with that account_id touched this project.",
230
date: "all",
231
},
232
created: {
233
type: "timestamp",
234
desc: "When the project was created.",
235
},
236
action_request: {
237
type: "map",
238
desc: "Request state change action for project: {action:['start', 'stop'], started:timestamp, err:?, finished:timestamp}",
239
date: ["started", "finished"],
240
},
241
storage: {
242
type: "map",
243
desc: "(DEPRECATED) This is a map {host:'hostname_of_server', assigned:when first saved here, saved:when last saved here}.",
244
date: ["assigned", "saved"],
245
},
246
last_backup: {
247
type: "timestamp",
248
desc: "(DEPRECATED) Timestamp of last off-disk successful backup using bup to Google cloud storage",
249
},
250
storage_request: {
251
type: "map",
252
desc: "(DEPRECATED) {action:['save', 'close', 'move', 'open'], requested:timestap, pid:?, target:?, started:timestamp, finished:timestamp, err:?}",
253
date: ["started", "finished", "requested"],
254
},
255
course: {
256
type: "map",
257
desc: "{project_id:[id of project that contains .course file], path:[path to .course file], pay:?, payInfo:?, email_address:[optional email address of student -- used if account_id not known], account_id:[account id of student]}, where pay is either not set (or equals falseish) or is a timestamp by which the students must pay. If payInfo is set, it specifies the parameters of the license the students should purchase.",
258
date: ["pay"],
259
},
260
storage_server: {
261
type: "integer",
262
desc: "(DEPRECATED) Number of the Kubernetes storage server with the data for this project: one of 0, 1, 2, ...",
263
},
264
storage_ready: {
265
type: "boolean",
266
desc: "(DEPRECATED) Whether storage is ready to be used on the storage server. Do NOT try to start project until true; this gets set by storage daemon when it notices that run is true.",
267
},
268
disk_size: {
269
type: "integer",
270
desc: "Size in megabytes of the project disk.",
271
},
272
resources: {
273
type: "map",
274
desc: 'Object of the form {requests:{memory:"30Mi",cpu:"5m"}, limits:{memory:"100Mi",cpu:"300m"}} which is passed to the k8s resources section for this pod.',
275
},
276
preemptible: {
277
type: "boolean",
278
desc: "If true, allow to run on preemptible nodes.",
279
},
280
idle_timeout: {
281
type: "integer",
282
desc: "If given and nonzero, project will be killed if it is idle for this many **minutes**, where idle *means* that last_edited has not been updated.",
283
},
284
run_quota: {
285
type: "map",
286
desc: "If project is running, this is the quota that it is running with.",
287
},
288
compute_image: {
289
type: "string",
290
desc: "Specify the name of the underlying (kucalc) compute image.",
291
},
292
addons: {
293
type: "map",
294
desc: "Configure (kucalc specific) addons for projects. (e.g. academic software, license keys, ...)",
295
},
296
lti_id: {
297
type: "array",
298
pg_type: "TEXT[]",
299
desc: "This is a specific ID derived from an LTI context",
300
},
301
lti_data: {
302
type: "map",
303
desc: "extra information related to LTI",
304
},
305
env: {
306
type: "map",
307
desc: "Additional environment variables (TS: {[key:string]:string})",
308
render: { type: "json", editable: true },
309
},
310
sandbox: {
311
type: "boolean",
312
desc: "If set to true, then any user who attempts to access this project is automatically added as a collaborator to it. Only the project owner can change this setting.",
313
render: { type: "boolean", editable: true },
314
},
315
avatar_image_tiny: {
316
title: "Image",
317
type: "string",
318
desc: "tiny (32x32) visual image associated with the project. Suitable to include as part of changefeed, since about 3kb.",
319
render: { type: "image" },
320
},
321
avatar_image_full: {
322
title: "Image",
323
type: "string",
324
desc: "A visual image associated with the project. Could be 150kb. NOT include as part of changefeed of projects, since potentially big (e.g., 200kb x 1000 projects = 200MB!).",
325
render: { type: "image" },
326
},
327
pay_as_you_go_quotas: {
328
type: "map",
329
desc: "Pay as you go quotas that users set so that when they run this project, it gets upgraded to at least what is specified here, and user gets billed later for what is used. Any changes to this table could result in money being spent, so should only be done via the api. This is a map from the account_id of the user that set the quota to the value of the quota spec (which is purchase-quotas.ProjectQuota).",
330
render: { type: "json", editable: false },
331
},
332
notes: NOTES,
333
},
334
});
335
336
export interface ApiKeyInfo {
337
name: string;
338
trunc: string;
339
hash?: string;
340
used?: number;
341
}
342
343
// Same query above, but without the last_edited time constraint.
344
schema.projects_all = deep_copy(schema.projects);
345
if (
346
schema.projects_all.user_query?.get == null ||
347
schema.projects.user_query?.get == null
348
) {
349
throw Error("make typescript happy");
350
}
351
schema.projects_all.user_query.get.options = [];
352
schema.projects_all.virtual = "projects";
353
schema.projects_all.user_query.get.pg_where = ["projects"];
354
355
// Table that provides extended read info about a single project
356
// but *ONLY* for admin.
357
Table({
358
name: "projects_admin",
359
fields: schema.projects.fields,
360
rules: {
361
primary_key: schema.projects.primary_key,
362
virtual: "projects",
363
user_query: {
364
get: {
365
admin: true, // only admins can do get queries on this table
366
// (without this, users who have read access could read)
367
pg_where: [{ "project_id = $::UUID": "project_id" }],
368
fields: schema.projects.user_query.get.fields,
369
},
370
},
371
},
372
});
373
374
/*
375
Table that enables set queries to the course field of a project. Only
376
project owners are allowed to use this table. The point is that this makes
377
it possible for the owner of the project to set things, but not for the
378
collaborators to set those things.
379
**wARNING:** right now we're not using this since when multiple people add
380
students to a course and the 'course' field doesn't get properly set,
381
much confusion and misery arises.... and it is very hard to fix.
382
In theory a malicous student could not pay via this. But if they could
383
mess with their client, they could easily not pay anyways.
384
*/
385
Table({
386
name: "projects_owner",
387
rules: {
388
virtual: "projects",
389
user_query: {
390
set: {
391
fields: {
392
project_id: "project_owner",
393
course: true,
394
},
395
},
396
},
397
},
398
fields: {
399
project_id: true,
400
course: true,
401
},
402
});
403
404
/*
405
406
Table that enables any signed-in user to set an invite request.
407
Later: we can make an index so that users can see all outstanding requests they have made easily.
408
How to test this from the browser console:
409
project_id = '4e0f5bfd-3f1b-4d7b-9dff-456dcf8725b8' // id of a project you have
410
invite_requests = {}; invite_requests[smc.client.account_id] = {timestamp:new Date(), message:'please invite me'}
411
smc.client.query({cb:console.log, query:{project_invite_requests:{project_id:project_id, invite_requests:invite_requests}}}) // set it
412
smc.redux.getStore('projects').get_project(project_id).invite_requests // see requests for this project
413
414
CURRENTLY NOT USED, but probably will be...
415
416
database._user_set_query_project_invite_requests(old_val, new_val, account_id, cb)
417
For now don't check anything -- this is how we will make it secure later.
418
This will:
419
- that user setting this is signed in
420
- ensure user only modifies their own entry (for their own id).
421
- enforce some hard limit on number of outstanding invites (say 30).
422
- enforce limit on size of invite message.
423
- sanity check on timestamp
424
- with an index as mentioned above we could limit the number of projects
425
to which a single user has requested to be invited.
426
427
*/
428
Table({
429
name: "project_invite_requests",
430
rules: {
431
virtual: "projects",
432
primary_key: "project_id",
433
user_query: {
434
set: {
435
fields: {
436
project_id: true,
437
invite_requests: true,
438
},
439
before_change(_database, _old_val, _new_val, _account_id, cb) {
440
cb();
441
},
442
},
443
},
444
}, // actual function will be database._user...
445
fields: {
446
project_id: true,
447
invite_requests: true,
448
}, // {account_id:{timestamp:?, message:?}, ...}
449
});
450
451
/*
452
Virtual table to get project avatar_images.
453
We don't put this in the main projects table,
454
since we don't want the avatar_image_full to be
455
the projects queries or changefeeds, since it
456
is big, and by default all get fields appear there.
457
*/
458
459
Table({
460
name: "project_avatar_images",
461
rules: {
462
virtual: "projects",
463
primary_key: "project_id",
464
user_query: {
465
get: {
466
pg_where: ["projects"],
467
fields: {
468
project_id: null,
469
avatar_image_full: null,
470
},
471
},
472
},
473
},
474
fields: {
475
project_id: true,
476
avatar_image_full: true,
477
},
478
});
479
480
/*
481
Table to get/set the datastore config in addons.
482
483
The main idea is to set/update/delete entries in the dict addons.datastore.[key] = {...}
484
*/
485
Table({
486
name: "project_datastore",
487
rules: {
488
virtual: "projects",
489
primary_key: "project_id",
490
user_query: {
491
set: {
492
// this also deals with delete requests
493
fields: {
494
project_id: true,
495
addons: true,
496
},
497
async instead_of_change(
498
db,
499
_old_value,
500
new_val,
501
account_id,
502
cb,
503
): Promise<void> {
504
try {
505
// to delete an entry, pretend to set the datastore = {delete: [name]}
506
if (typeof new_val.addons.datastore.delete === "string") {
507
await db.project_datastore_del(
508
account_id,
509
new_val.project_id,
510
new_val.addons.datastore.delete,
511
);
512
cb(undefined);
513
} else {
514
// query should set addons.datastore.[new key] = config, such that we see here
515
// new_val = {"project_id":"...","addons":{"datastore":{"key3":{"type":"xxx", ...}}}}
516
// which will be merged into the existing addons.datastore dict
517
const res = await db.project_datastore_set(
518
account_id,
519
new_val.project_id,
520
new_val.addons.datastore,
521
);
522
cb(undefined, res);
523
}
524
} catch (err) {
525
cb(`${err}`);
526
}
527
},
528
},
529
get: {
530
fields: {
531
project_id: true,
532
addons: true,
533
},
534
async instead_of_query(db, opts, cb): Promise<void> {
535
if (opts.multi) {
536
throw Error("'multi' is not implemented");
537
}
538
try {
539
// important: the config dicts for each key must not expose secret credentials!
540
// check if opts.query.addons === null ?!
541
const data = await db.project_datastore_get(
542
opts.account_id,
543
opts.query.project_id,
544
);
545
cb(undefined, data);
546
} catch (err) {
547
cb(`${err}`);
548
}
549
},
550
},
551
},
552
},
553
fields: {
554
project_id: true,
555
addons: true,
556
},
557
});
558
559
export interface ProjectStatus {
560
"project.pid"?: number; // pid of project server process
561
"hub-server.port"?: number; // port of tcp server that is listening for conn from hub
562
"browser-server.port"?: number; // port listening for http/websocket conn from browser client
563
"sage_server.port"?: number; // port where sage server is listening.
564
"sage_server.pid"?: number; // pid of sage server process
565
start_ts?: number; // timestamp, when project server started
566
session_id?: string; // unique identifyer
567
secret_token?: string; // long random secret token that is needed to communicate with local_hub
568
version?: number; // version number of project code
569
disk_MB?: number; // MB of used disk
570
installed?: boolean; // whether code is installed
571
memory?: {
572
count?: number;
573
pss?: number;
574
rss?: number;
575
swap?: number;
576
uss?: number;
577
}; // output by smem
578
}
579
580
export interface ProjectState {
581
ip?: string; // where the project is running
582
error?: string;
583
state?: State; // running, stopped, etc.
584
time?: Date;
585
}
586
587
Table({
588
name: "crm_projects",
589
fields: schema.projects.fields,
590
rules: {
591
primary_key: schema.projects.primary_key,
592
virtual: "projects",
593
user_query: {
594
get: {
595
admin: true, // only admins can do get queries on this table
596
// (without this, users who have read access could read)
597
pg_where: [],
598
fields: {
599
...schema.projects.user_query?.get?.fields,
600
notes: null,
601
},
602
},
603
set: {
604
admin: true,
605
fields: {
606
project_id: true,
607
name: true,
608
title: true,
609
description: true,
610
deleted: true,
611
notes: true,
612
},
613
},
614
},
615
},
616
});
617
618
export type Datastore = boolean | string[] | undefined;
619
620
// in the future, we might want to extend this to include custom environmment variables
621
export interface EnvVarsRecord {
622
inherit?: boolean;
623
}
624
export type EnvVars = EnvVarsRecord | undefined;
625
626
export interface StudentProjectFunctionality {
627
disableActions?: boolean;
628
disableJupyterToggleReadonly?: boolean;
629
disableJupyterClassicServer?: boolean;
630
disableJupyterClassicMode?: boolean;
631
disableJupyterLabServer?: boolean;
632
disableRServer?: boolean;
633
disableVSCodeServer?: boolean;
634
disableLibrary?: boolean;
635
disableNetworkWarningBanner?: boolean;
636
disablePlutoServer?: boolean;
637
disableTerminals?: boolean;
638
disableUploads?: boolean;
639
disableNetwork?: boolean;
640
disableSSH?: boolean;
641
disableCollaborators?: boolean;
642
disableChatGPT?: boolean;
643
disableSharing?: boolean;
644
}
645
646
export interface CourseInfo {
647
type: "student" | "shared" | "nbgrader";
648
account_id?: string; // account_id of the student that this project is for.
649
project_id: string; // the course project, i.e., project with the .course file
650
path: string; // path to the .course file in project_id
651
pay?: string; // iso timestamp or ""
652
paid?: string; // iso timestamp with *when* they paid.
653
payInfo?: PurchaseInfo;
654
email_address?: string;
655
datastore: Datastore;
656
student_project_functionality?: StudentProjectFunctionality;
657
envvars?: EnvVars;
658
}
659
660
type ExecOptsCommon = {
661
project_id: string;
662
cb?: Function; // if given use a callback interface *instead* of async.
663
};
664
665
export type ExecOptsBlocking = ExecOptsCommon & {
666
compute_server_id?: number; // if true, run on the compute server (if available)
667
filesystem?: boolean; // run in fileserver container on compute server; otherwise, runs on main compute container.
668
path?: string;
669
command: string;
670
args?: string[];
671
timeout?: number;
672
max_output?: number;
673
bash?: boolean;
674
aggregate?: string | number | { value: string | number };
675
err_on_exit?: boolean;
676
env?: { [key: string]: string }; // custom environment variables.
677
async_call?: ExecuteCodeOptions["async_call"];
678
};
679
680
export type ExecOptsAsync = ExecOptsCommon & {
681
async_get?: ExecuteCodeOptionsAsyncGet["async_get"];
682
async_stats?: ExecuteCodeOptionsAsyncGet["async_stats"];
683
async_await?: ExecuteCodeOptionsAsyncGet["async_await"];
684
};
685
686
export type ExecOpts = ExecOptsBlocking | ExecOptsAsync;
687
688
export function isExecOptsBlocking(opts: unknown): opts is ExecOptsBlocking {
689
return (
690
typeof opts === "object" &&
691
typeof (opts as any).project_id === "string" &&
692
typeof (opts as any).command === "string"
693
);
694
}
695
696
export type ExecOutput = ExecuteCodeOutput & {
697
time: number; // time in ms, from user point of view.
698
};
699
700