Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/jupyter/blobs/sqlite.ts
Views: 687
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45/*6Jupyter's blob store (based on sqlite), which hooks into the raw http server.7*/89import Database from "better-sqlite3";10import * as fs from "node:fs";1112import Logger from "@cocalc/backend/logger";13import { sha1 as misc_node_sha1 } from "@cocalc/backend/misc_node";14import type { BlobStoreInterface } from "@cocalc/jupyter/types/project-interface";15import { months_ago } from "@cocalc/util/misc";16import { readFile } from "fs/promises";17import { BASE64_TYPES } from "./get";1819const winston = Logger("jupyter-blobs:sqlite");2021const JUPYTER_BLOBS_DB_FILE: string =22process.env.JUPYTER_BLOBS_DB_FILE ??23`${process.env.SMC_LOCAL_HUB_HOME ?? process.env.HOME}/.jupyter-blobs-v0.db`;2425export class BlobStoreSqlite implements BlobStoreInterface {26private db: Database.Database;27private stmt_insert;28private stmt_update;29private stmt_get;30private stmt_data;31private stmt_ipynb;32private stmt_keys;3334constructor() {35winston.debug("jupyter BlobStore: constructor");36try {37this.init();38winston.debug(`jupyter BlobStore: ${JUPYTER_BLOBS_DB_FILE} opened fine`);39} catch (err) {40winston.debug(41`jupyter BlobStore: ${JUPYTER_BLOBS_DB_FILE} open error - ${err}`42);43// File may be corrupt/broken/etc. -- in this case, remove and try again.44// This database is only an image *cache*, so this is fine.45// See https://github.com/sagemathinc/cocalc/issues/276646// Using sync is also fine, since this only happens once47// during initialization.48winston.debug("jupyter BlobStore: resetting database cache");49try {50fs.unlinkSync(JUPYTER_BLOBS_DB_FILE);51} catch (error) {52err = error;53winston.debug(54`Error trying to delete ${JUPYTER_BLOBS_DB_FILE}... ignoring: `,55err56);57}58this.init();59}60}6162init(): void {63if (JUPYTER_BLOBS_DB_FILE == "memory") {64// as any, because @types/better-sqlite3 is not yet updated to support this65// doc about the constructor: https://wchargin.com/better-sqlite3/api.html#new-databasepath-options66this.db = new Database(".db", { memory: true } as any);67} else {68this.db = new Database(JUPYTER_BLOBS_DB_FILE);69}7071this.init_table();72this.init_statements(); // table must exist!7374if (JUPYTER_BLOBS_DB_FILE !== "memory") {75this.clean(); // do this once on start76this.db.exec("VACUUM");77}78}7980private init_table() {81this.db82.prepare(83"CREATE TABLE IF NOT EXISTS blobs (sha1 TEXT, data BLOB, type TEXT, ipynb TEXT, time INTEGER)"84)85.run();86}8788private init_statements() {89this.stmt_insert = this.db.prepare(90"INSERT INTO blobs VALUES(?, ?, ?, ?, ?)"91);92this.stmt_update = this.db.prepare("UPDATE blobs SET time=? WHERE sha1=?");93this.stmt_get = this.db.prepare("SELECT * FROM blobs WHERE sha1=?");94this.stmt_data = this.db.prepare("SELECT data FROM blobs where sha1=?");95this.stmt_keys = this.db.prepare("SELECT sha1 FROM blobs");96this.stmt_ipynb = this.db.prepare(97"SELECT ipynb, type, data FROM blobs where sha1=?"98);99}100101private clean(): void {102this.clean_old();103this.clean_filesize();104}105106private clean_old() {107// Delete anything old...108// The main point of this blob store being in the db is to ensure that when the109// project restarts, then user saves an ipynb,110// that they do not loose any work. So a few weeks should be way more than enough.111// Note that TimeTravel may rely on these old blobs, so images in TimeTravel may112// stop working after this long. That's a tradeoff.113this.db114.prepare("DELETE FROM blobs WHERE time <= ?")115.run(months_ago(1).getTime());116}117118private clean_filesize() {119// we also check for the actual filesize and in case, get rid of half of the old blobs120try {121const stats = fs.statSync(JUPYTER_BLOBS_DB_FILE);122const size_mb = stats.size / (1024 * 1024);123if (size_mb > 128) {124const cnt = this.db125.prepare("SELECT COUNT(*) as cnt FROM blobs")126.get() as { cnt: number } | undefined;127if (cnt?.cnt == null) return;128const n = Math.floor(cnt.cnt / 2);129winston.debug(130`jupyter BlobStore: large file of ${size_mb}MiB detected – deleting ${n} old rows.`131);132if (n == 0) return;133const when = this.db134.prepare("SELECT time FROM blobs ORDER BY time ASC LIMIT 1 OFFSET ?")135.get(n) as { time?: number } | undefined;136if (when?.time == null) return;137winston.debug(`jupyter BlobStore: delete starting from ${when.time}`);138this.db.prepare("DELETE FROM blobs WHERE time <= ?").run(when.time);139}140} catch (err) {141winston.debug(`jupyter BlobStore: clean_filesize error: ${err}`);142}143}144145// used in testing146async delete_all_blobs(): Promise<void> {147this.db.prepare("DELETE FROM blobs").run();148}149150// data could, e.g., be a uuencoded image151// We return the sha1 hash of it, and store it, along with a reference count.152// ipynb = (optional) text that is also stored and will be153// returned when get_ipynb is called154// This is used for some iframe support code.155save(data: string, type: string, ipynb?: string): string {156const buf: Buffer = BASE64_TYPES.includes(type as any)157? Buffer.from(data, "base64")158: Buffer.from(data);159160const sha1: string = misc_node_sha1(buf);161const row = this.stmt_get.get(sha1);162if (row == null) {163this.stmt_insert.run([sha1, buf, type, ipynb, Date.now()]);164} else {165this.stmt_update.run([Date.now(), sha1]);166}167return sha1;168}169170// Read a file from disk and save it in the database.171// Returns the sha1 hash of the file.172async readFile(path: string, type: string): Promise<string> {173const content = await readFile(path);174if (typeof content === "string") {175return await this.save(content, type);176} else {177// This case never happens, because readFile without encoding returns a string.178// We include it to make TypeScript happy.179return await this.save(content.toString(), type);180}181}182183/*184free(sha1: string): void {185// instead, stuff gets freed 1 month after last save.186}187*/188189// Return data with given sha1, or undefined if no such data.190get(sha1: string): undefined | Buffer {191const x = this.stmt_data.get(sha1);192if (x != null) {193return x.data;194}195}196197get_ipynb(sha1: string): string | undefined {198const row = this.stmt_ipynb.get(sha1);199if (row == null) {200return;201}202if (row.ipynb != null) {203return row.ipynb;204}205if (BASE64_TYPES.includes(row.type)) {206return row.data.toString("base64");207} else {208return row.data.toString();209}210}211212async keys(): Promise<string[]> {213return this.stmt_keys.all().map((x) => x.sha1);214}215}216217218