CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/jupyter/blobs/sqlite.ts
Views: 687
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
/*
7
Jupyter's blob store (based on sqlite), which hooks into the raw http server.
8
*/
9
10
import Database from "better-sqlite3";
11
import * as fs from "node:fs";
12
13
import Logger from "@cocalc/backend/logger";
14
import { sha1 as misc_node_sha1 } from "@cocalc/backend/misc_node";
15
import type { BlobStoreInterface } from "@cocalc/jupyter/types/project-interface";
16
import { months_ago } from "@cocalc/util/misc";
17
import { readFile } from "fs/promises";
18
import { BASE64_TYPES } from "./get";
19
20
const winston = Logger("jupyter-blobs:sqlite");
21
22
const JUPYTER_BLOBS_DB_FILE: string =
23
process.env.JUPYTER_BLOBS_DB_FILE ??
24
`${process.env.SMC_LOCAL_HUB_HOME ?? process.env.HOME}/.jupyter-blobs-v0.db`;
25
26
export class BlobStoreSqlite implements BlobStoreInterface {
27
private db: Database.Database;
28
private stmt_insert;
29
private stmt_update;
30
private stmt_get;
31
private stmt_data;
32
private stmt_ipynb;
33
private stmt_keys;
34
35
constructor() {
36
winston.debug("jupyter BlobStore: constructor");
37
try {
38
this.init();
39
winston.debug(`jupyter BlobStore: ${JUPYTER_BLOBS_DB_FILE} opened fine`);
40
} catch (err) {
41
winston.debug(
42
`jupyter BlobStore: ${JUPYTER_BLOBS_DB_FILE} open error - ${err}`
43
);
44
// File may be corrupt/broken/etc. -- in this case, remove and try again.
45
// This database is only an image *cache*, so this is fine.
46
// See https://github.com/sagemathinc/cocalc/issues/2766
47
// Using sync is also fine, since this only happens once
48
// during initialization.
49
winston.debug("jupyter BlobStore: resetting database cache");
50
try {
51
fs.unlinkSync(JUPYTER_BLOBS_DB_FILE);
52
} catch (error) {
53
err = error;
54
winston.debug(
55
`Error trying to delete ${JUPYTER_BLOBS_DB_FILE}... ignoring: `,
56
err
57
);
58
}
59
this.init();
60
}
61
}
62
63
init(): void {
64
if (JUPYTER_BLOBS_DB_FILE == "memory") {
65
// as any, because @types/better-sqlite3 is not yet updated to support this
66
// doc about the constructor: https://wchargin.com/better-sqlite3/api.html#new-databasepath-options
67
this.db = new Database(".db", { memory: true } as any);
68
} else {
69
this.db = new Database(JUPYTER_BLOBS_DB_FILE);
70
}
71
72
this.init_table();
73
this.init_statements(); // table must exist!
74
75
if (JUPYTER_BLOBS_DB_FILE !== "memory") {
76
this.clean(); // do this once on start
77
this.db.exec("VACUUM");
78
}
79
}
80
81
private init_table() {
82
this.db
83
.prepare(
84
"CREATE TABLE IF NOT EXISTS blobs (sha1 TEXT, data BLOB, type TEXT, ipynb TEXT, time INTEGER)"
85
)
86
.run();
87
}
88
89
private init_statements() {
90
this.stmt_insert = this.db.prepare(
91
"INSERT INTO blobs VALUES(?, ?, ?, ?, ?)"
92
);
93
this.stmt_update = this.db.prepare("UPDATE blobs SET time=? WHERE sha1=?");
94
this.stmt_get = this.db.prepare("SELECT * FROM blobs WHERE sha1=?");
95
this.stmt_data = this.db.prepare("SELECT data FROM blobs where sha1=?");
96
this.stmt_keys = this.db.prepare("SELECT sha1 FROM blobs");
97
this.stmt_ipynb = this.db.prepare(
98
"SELECT ipynb, type, data FROM blobs where sha1=?"
99
);
100
}
101
102
private clean(): void {
103
this.clean_old();
104
this.clean_filesize();
105
}
106
107
private clean_old() {
108
// Delete anything old...
109
// The main point of this blob store being in the db is to ensure that when the
110
// project restarts, then user saves an ipynb,
111
// that they do not loose any work. So a few weeks should be way more than enough.
112
// Note that TimeTravel may rely on these old blobs, so images in TimeTravel may
113
// stop working after this long. That's a tradeoff.
114
this.db
115
.prepare("DELETE FROM blobs WHERE time <= ?")
116
.run(months_ago(1).getTime());
117
}
118
119
private clean_filesize() {
120
// we also check for the actual filesize and in case, get rid of half of the old blobs
121
try {
122
const stats = fs.statSync(JUPYTER_BLOBS_DB_FILE);
123
const size_mb = stats.size / (1024 * 1024);
124
if (size_mb > 128) {
125
const cnt = this.db
126
.prepare("SELECT COUNT(*) as cnt FROM blobs")
127
.get() as { cnt: number } | undefined;
128
if (cnt?.cnt == null) return;
129
const n = Math.floor(cnt.cnt / 2);
130
winston.debug(
131
`jupyter BlobStore: large file of ${size_mb}MiB detected – deleting ${n} old rows.`
132
);
133
if (n == 0) return;
134
const when = this.db
135
.prepare("SELECT time FROM blobs ORDER BY time ASC LIMIT 1 OFFSET ?")
136
.get(n) as { time?: number } | undefined;
137
if (when?.time == null) return;
138
winston.debug(`jupyter BlobStore: delete starting from ${when.time}`);
139
this.db.prepare("DELETE FROM blobs WHERE time <= ?").run(when.time);
140
}
141
} catch (err) {
142
winston.debug(`jupyter BlobStore: clean_filesize error: ${err}`);
143
}
144
}
145
146
// used in testing
147
async delete_all_blobs(): Promise<void> {
148
this.db.prepare("DELETE FROM blobs").run();
149
}
150
151
// data could, e.g., be a uuencoded image
152
// We return the sha1 hash of it, and store it, along with a reference count.
153
// ipynb = (optional) text that is also stored and will be
154
// returned when get_ipynb is called
155
// This is used for some iframe support code.
156
save(data: string, type: string, ipynb?: string): string {
157
const buf: Buffer = BASE64_TYPES.includes(type as any)
158
? Buffer.from(data, "base64")
159
: Buffer.from(data);
160
161
const sha1: string = misc_node_sha1(buf);
162
const row = this.stmt_get.get(sha1);
163
if (row == null) {
164
this.stmt_insert.run([sha1, buf, type, ipynb, Date.now()]);
165
} else {
166
this.stmt_update.run([Date.now(), sha1]);
167
}
168
return sha1;
169
}
170
171
// Read a file from disk and save it in the database.
172
// Returns the sha1 hash of the file.
173
async readFile(path: string, type: string): Promise<string> {
174
const content = await readFile(path);
175
if (typeof content === "string") {
176
return await this.save(content, type);
177
} else {
178
// This case never happens, because readFile without encoding returns a string.
179
// We include it to make TypeScript happy.
180
return await this.save(content.toString(), type);
181
}
182
}
183
184
/*
185
free(sha1: string): void {
186
// instead, stuff gets freed 1 month after last save.
187
}
188
*/
189
190
// Return data with given sha1, or undefined if no such data.
191
get(sha1: string): undefined | Buffer {
192
const x = this.stmt_data.get(sha1);
193
if (x != null) {
194
return x.data;
195
}
196
}
197
198
get_ipynb(sha1: string): string | undefined {
199
const row = this.stmt_ipynb.get(sha1);
200
if (row == null) {
201
return;
202
}
203
if (row.ipynb != null) {
204
return row.ipynb;
205
}
206
if (BASE64_TYPES.includes(row.type)) {
207
return row.data.toString("base64");
208
} else {
209
return row.data.toString();
210
}
211
}
212
213
async keys(): Promise<string[]> {
214
return this.stmt_keys.all().map((x) => x.sha1);
215
}
216
}
217
218