CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/util/db-schema/blobs.ts
Views: 923
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import { Table } from "./types";
7
8
// Note that github has a 10MB limit --
9
// https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/attaching-files
10
// All code in cocalc (frontend, etc.) should use this,
11
// rather than copying or defining their own!
12
export const MAX_BLOB_SIZE = 10000000;
13
14
// some throttling -- note that after a bit, most blobs end up longterm
15
// cloud storage and are never accessed. This is mainly a limit to
16
// prevent abuse.
17
export const MAX_BLOB_SIZE_PER_PROJECT_PER_DAY = {
18
licensed: 100 * MAX_BLOB_SIZE,
19
unlicensed: 10 * MAX_BLOB_SIZE,
20
};
21
22
Table({
23
name: "blobs",
24
fields: {
25
id: {
26
type: "uuid",
27
desc: "The uuid of this blob, which is a uuid derived from the Sha1 hash of the blob content.",
28
},
29
blob: {
30
type: "Buffer",
31
desc: "The actual blob content",
32
},
33
expire: {
34
type: "timestamp",
35
desc: "When to expire this blob (when delete_expired is called on the database).",
36
},
37
created: {
38
type: "timestamp",
39
desc: "When the blob was created.",
40
},
41
project_id: {
42
// I'm not really sure why we record a project associated to the blob, rather
43
// than something else (e.g., account_id)-- update: added that. However, it's useful for abuse, since
44
// if abuse happened with a project, we could easily delete all corresponding blobs,
45
// and also it's a good tag for throttling.
46
type: "string",
47
desc: "The uuid of the project that created the blob, if it is associated to a project.",
48
},
49
account_id: {
50
type: "uuid",
51
desc: "The uuid of the account that created the blob. (Only started recording in late 2024. Will make it so a user can optionally delete any blobs associated to their account when deleting their account.)",
52
},
53
last_active: {
54
type: "timestamp",
55
desc: "When the blob was last pulled from the database.",
56
},
57
count: {
58
type: "number",
59
desc: "How many times the blob has been pulled from the database.",
60
},
61
size: {
62
type: "number",
63
desc: "The size in bytes of the blob.",
64
},
65
gcloud: {
66
type: "string",
67
desc: "name of a bucket that contains the actual blob, if available.",
68
},
69
backup: {
70
type: "boolean",
71
desc: "if true, then this blob was saved to an offsite backup",
72
},
73
compress: {
74
type: "string",
75
desc: "optional compression used: 'gzip' or 'zlib'",
76
},
77
},
78
rules: {
79
desc: "Table that stores blobs mainly generated as output of Sage worksheets.",
80
primary_key: "id",
81
// these indices speed up the search been done in 'copy_all_blobs_to_gcloud'
82
// less important to make this query fast, but we want to avoid thrashing cache
83
pg_indexes: ["((expire IS NULL))", "((gcloud IS NULL))", "last_active"],
84
user_query: {
85
get: {
86
async instead_of_query(database, opts, cb): Promise<void> {
87
const obj: any = Object.assign({}, opts.query);
88
if (obj == null || obj.id == null) {
89
cb("id must be specified");
90
return;
91
}
92
database.get_blob({
93
uuid: obj.id,
94
cb(err, blob) {
95
if (err) {
96
cb(err);
97
} else {
98
cb(undefined, { id: obj.id, blob });
99
}
100
},
101
});
102
},
103
fields: {
104
id: null,
105
blob: null,
106
},
107
},
108
set: {
109
// NOTE: we put "as any" for fields below because ttl is not an actual field but
110
// it is allowed for set queries and determine the expire field. I would rather
111
// do this (which *is* supported by the backend) then not restrict the fields keys
112
// for other schema entries. Alternatively, we could have a special kind of field
113
// above that is "virtual", but that requires writing more code in the backend. We'll
114
// do that if necessary.
115
fields: {
116
id: true,
117
blob: true,
118
project_id: "project_write",
119
account_id: "account_id",
120
ttl: 0,
121
} as any,
122
required_fields: {
123
id: true,
124
blob: true,
125
project_id: true,
126
},
127
async instead_of_change(
128
database,
129
_old_value,
130
new_val,
131
account_id,
132
cb,
133
): Promise<void> {
134
database.save_blob({
135
uuid: new_val.id,
136
blob: new_val.blob,
137
ttl: new_val.ttl,
138
project_id: new_val.project_id,
139
account_id,
140
check: true, // can't trust the user!
141
cb,
142
});
143
},
144
},
145
},
146
},
147
});
148
149