Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/util/db-schema/blobs.ts
Views: 687
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45import { Table } from "./types";67// Note that github has a 10MB limit --8// https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/attaching-files9// All code in cocalc (frontend, etc.) should use this,10// rather than copying or defining their own!11export const MAX_BLOB_SIZE = 10000000;1213// some throttling -- note that after a bit, most blobs end up longterm14// cloud storage and are never accessed. This is mainly a limit to15// prevent abuse.16export const MAX_BLOB_SIZE_PER_PROJECT_PER_DAY = {17licensed: 100 * MAX_BLOB_SIZE,18unlicensed: 10 * MAX_BLOB_SIZE,19};2021Table({22name: "blobs",23fields: {24id: {25type: "uuid",26desc: "The uuid of this blob, which is a uuid derived from the Sha1 hash of the blob content.",27},28blob: {29type: "Buffer",30desc: "The actual blob content",31},32expire: {33type: "timestamp",34desc: "When to expire this blob (when delete_expired is called on the database).",35},36created: {37type: "timestamp",38desc: "When the blob was created.",39},40project_id: {41// I'm not really sure why we record a project associated to the blob, rather42// than something else (e.g., account_id). However, it's useful for abuse, since43// if abuse happened with a project, we could easily delete all corresponding blobs,44// and also it's a good tag for throttling.45type: "string",46desc: "The uuid of the project that created the blob.",47},48last_active: {49type: "timestamp",50desc: "When the blob was last pulled from the database.",51},52count: {53type: "number",54desc: "How many times the blob has been pulled from the database.",55},56size: {57type: "number",58desc: "The size in bytes of the blob.",59},60gcloud: {61type: "string",62desc: "name of a bucket that contains the actual blob, if available.",63},64backup: {65type: "boolean",66desc: "if true, then this blob was saved to an offsite backup",67},68compress: {69type: "string",70desc: "optional compression used: 'gzip' or 'zlib'",71},72},73rules: {74desc: "Table that stores blobs mainly generated as output of Sage worksheets.",75primary_key: "id",76// these indices speed up the search been done in 'copy_all_blobs_to_gcloud'77// less important to make this query fast, but we want to avoid thrashing cache78pg_indexes: ["((expire IS NULL))", "((gcloud IS NULL))", "last_active"],79user_query: {80get: {81async instead_of_query(database, opts, cb): Promise<void> {82const obj: any = Object.assign({}, opts.query);83if (obj == null || obj.id == null) {84cb("id must be specified");85return;86}87database.get_blob({88uuid: obj.id,89cb(err, blob) {90if (err) {91cb(err);92} else {93cb(undefined, { id: obj.id, blob });94}95},96});97},98fields: {99id: null,100blob: null,101},102},103set: {104// NOTE: we put "as any" for fields below because ttl is not an actual field but105// it is allowed for set queries and determine the expire field. I would rather106// do this (which *is* supported by the backend) then not restrict the fields keys107// for other schema entries. Alternatively, we could have a special kind of field108// above that is "virtual", but that requires writing more code in the backend. We'll109// do that if necessary.110fields: {111id: true,112blob: true,113project_id: "project_write",114ttl: 0,115} as any,116required_fields: {117id: true,118blob: true,119project_id: true,120},121async instead_of_change(122database,123_old_value,124new_val,125_account_id,126cb,127): Promise<void> {128database.save_blob({129uuid: new_val.id,130blob: new_val.blob,131ttl: new_val.ttl,132project_id: new_val.project_id,133check: true, // can't trust the user!134cb,135});136},137},138},139},140});141142143