Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/util/db-schema/cloud-filesystems.ts
Views: 687
/*1Configuration of network mounted shared POSIX filesystems associated2to projects for use initially by the compute servers.34Initially these will get mounted by all compute servers uniformly (mostly),5and later the project will also mount these via a sidecar.67This is 100% built on juicefs/keydb instead of gcs/s3, etc., since:89- there are so many gotchas with directly using fuse mounted gcs/s3,10- people can just use those directly or mount them directly easily11anyways (since they are root)12*/1314import { Table } from "./types";15import { ID, NOTES } from "./crm";16import { SCHEMA as schema } from "./index";1718// We do NOT charge to make a cloud file system. However, we require that19// the user have enough money to make a CREATE_CLOUD_FILESYSTEM_AMOUNT purchase.20// One reason to require credit is because billing is delayed by several days,21// and a user could spend substantially during that time (e.g., over $100022// seems possible, e.g., bandwidth egress to China is $0.23/GB, and you can23// probably download 100MB/s or over 300GB/hour, or over $3000 in 2 days).24export const CREATE_CLOUD_FILESYSTEM_AMOUNT = 10;2526export const DEFAULT_LOCK = "DELETE";27// Since all storage gets mounted on all compute servers, and basically28// you only need one shared storage volume in most cases, we do put a global29// limit to avoid abuse and efficiency issues for now.30export const MAX_CLOUD_FILESYSTEMS_PER_PROJECT = 100;31// We use a random port on the VPN between MIN_PORT and MAX_PORT.32export const MIN_PORT = 40000;33export const MAX_PORT = 48000;34export const MIN_BLOCK_SIZE = 1;35// requires my fork of juicefs to get above 16 (supports 64)!36// do not use non-fork on a file system with a block size bigger37// than 16, as it may corrupt it...38// Just in case -- for now we will restrict to 16 anyways.39export const MAX_BLOCK_SIZE = 16;40export const RECOMMENDED_BLOCK_SIZE = 16;4142export interface GoogleCloudServiceAccountKey {43type: "service_account";44project_id: string;45private_key_id: string;46private_key: string;47client_email: string;48client_id: string;49auth_uri: string;50token_uri: string;51auth_provider_x509_cert_url: string;52client_x509_cert_url: string;53universe_domain: "googleapis.com";54}5556export type Compression = "lz4" | "zstd" | "none";57export const GOOGLE_CLOUD_BUCKET_STORAGE_CLASSES = [58"standard",59"nearline",60"coldline",61"archive",62"autoclass-nearline",63"autoclass-archive",64];65export const GOOGLE_CLOUD_BUCKET_STORAGE_CLASSES_DESC = {66"autoclass-nearline": {67desc: "Autoclass - transitions objects between Standard or Nearline based on activity",68},69"autoclass-archive": {70desc: "Autoclass - transitions objects between Standard, Nearline, Coldline, and Archive based on activity",71},72standard: {73desc: "Standard - short-term storage and frequently accessed data",74minStorageDays: 0,75},76nearline: {77desc: "Nearline - backups and data accessed less than once a month",78minStorageDays: 30,79},80coldline: {81desc: "Coldline - disaster recovery and data accessed less than once a quarter",82minStorageDays: 90,83},84archive: {85desc: "Archive - long-term digital preservation of data accessed less than once a year",86minStorageDays: 365,87},88};89export type GoogleCloudBucketStorageClass =90(typeof GOOGLE_CLOUD_BUCKET_STORAGE_CLASSES)[number];9192// We implement the three multiregions: asia, eu, and us.93// We also support *all* single regions. Dual regions are94// complicated to specify and have subtle restrictions and95// probably aren't that critical for our users, so we don't96// support them.97export const GOOGLE_CLOUD_MULTIREGIONS = ["us", "eu", "asia"];98// We will have to update the zone list when google adds more zones, since I didn't99// want to have a dependency on my package @cocalc/gcloud-pricing-calculator.100// However it's easy using that package:101// a =require('@cocalc/gcloud-pricing-calculator')102// z = new Set(Object.keys((await a.getData()).zones).map((x)=>{i=x.lastIndexOf('-');return x.slice(0,i)}))103export const GOOGLE_CLOUD_REGIONS = [104"us-central1",105"us-east1",106"us-east4",107"us-east5",108"us-west1",109"us-west2",110"us-west3",111"us-west4",112"us-south1",113"northamerica-northeast1",114"northamerica-northeast2",115"europe-north1",116"europe-central2",117"europe-southwest1",118"europe-west1",119"europe-west2",120"europe-west3",121"europe-west4",122"europe-west6",123"europe-west8",124"europe-west9",125"europe-west10",126"europe-west12",127"southamerica-east1",128"southamerica-west1",129"africa-south1",130"asia-east1",131"asia-east2",132"asia-northeast1",133"asia-northeast2",134"asia-northeast3",135"asia-south1",136"asia-south2",137"asia-southeast1",138"asia-southeast2",139"australia-southeast1",140"australia-southeast2",141"me-central1",142"me-central2",143"me-west1",144];145146export const GOOGLE_REGION_PREFIX_TO_LOCATION = {147us: "North America",148northamerica: "North America",149europe: "Europe",150southamerica: "South America",151africa: "South Africa",152asia: "APAC",153australia: "APAC",154me: "Middle East",155eu: "Europe",156};157158export type GoogleCloudBucketLocation =159| (typeof GOOGLE_CLOUD_MULTIREGIONS)[number]160| (typeof GOOGLE_CLOUD_REGIONS)[number];161162export interface CloudFilesystem {163id: number;164project_specific_id: number;165project_id: string;166account_id: string;167created: Date;168bucket?: string;169mountpoint: string;170mount?: boolean; // whether it should get mounted right now171secret_key?: GoogleCloudServiceAccountKey;172port: number;173compression: Compression;174block_size: number;175trash_days: number;176bucket_location: GoogleCloudBucketLocation;177bucket_storage_class: GoogleCloudBucketStorageClass;178mount_options?: string;179keydb_options?: string;180title?: string;181color?: string;182deleting?: boolean;183error?: string;184notes?: string;185lock?: string;186position?: number;187last_edited?: Date;188purchase_id?: number;189bytes_used?: number;190}191// See https://juicefs.com/docs/community/command_reference#mount192193//194195export type CreateCloudFilesystem = Pick<196CloudFilesystem,197| "project_id"198| "mountpoint"199| "mount"200| "compression"201| "block_size"202| "trash_days"203| "title"204| "color"205| "notes"206| "position"207| "mount_options"208| "keydb_options"209| "bucket_location"210| "bucket_storage_class"211>;212213export const DEFAULT_CONFIGURATION = {214mountpoint: "cloud",215mount: true,216compression: "lz4",217block_size: RECOMMENDED_BLOCK_SIZE,218trash_days: 0,219title: "Untitled",220lock: "DELETE",221//222// Without writeback things are quite slow (with GCS), so it's enabled.223// "-o allow_other" is because:224// - makes 'juicefs rmr /home/user/cloudfs/.trash' to empty the trash *possible*;225// as non-root there is no way to empty trash!226// - makes it possible to use ZFS on top of this, which may be interesting later.227// - --open-cache=(something) is needed since otherwise juicefs tries to use redis for network228// locks, which just don't work with async replication.229mount_options:230"--writeback -o allow_other --open-cache=1 --backup-meta=7200 --backup-skip-trash",231keydb_options: "",232bucket_location: "us-east1", // where cocalc.com is233bucket_storage_class: "autoclass-archive",234} as const;235236export interface EditCloudFilesystem237extends Pick<238CloudFilesystem,239| "id"240| "mount"241| "title"242| "color"243| "notes"244| "position"245| "mount_options"246| "keydb_options"247| "lock"248> {249// making these optional250project_id?: string;251mountpoint?: string;252trash_days?: number;253bucket_storage_class?: GoogleCloudBucketStorageClass;254}255256export const CHANGE_MOUNTED = new Set([257"title",258"color",259"notes",260"lock",261"mount",262"position",263"bucket_storage_class",264"trash_days",265]);266export const CHANGE_UNMOUNTED = new Set([267"project_id",268"mountpoint",269"mount_options",270"keydb_options",271"port",272]);273274Table({275name: "cloud_filesystems",276rules: {277primary_key: "id",278// unique mountpoint *within* a given project; also unique port in case the279// storage service requires a port to sync (e.g., keydb).280pg_unique_indexes: [281"(project_id, mountpoint)",282"(project_id, port)",283"(project_id, project_specific_id)",284"bucket",285],286user_query: {287get: {288pg_where: [{ "project_id = $::UUID": "project_id" }],289throttle_changes: 0,290fields: {291id: null,292project_specific_id: null,293project_id: null,294account_id: null,295bucket: null,296mountpoint: null,297mount: null,298port: null,299compression: null,300block_size: null,301trash_days: null,302bucket_location: null,303bucket_storage_class: null,304title: null,305color: null,306error: null,307notes: null,308lock: null,309position: null,310last_edited: null,311purchase_id: null,312deleting: null,313mount_options: null,314keydb_options: null,315bytes_used: null,316},317},318set: {319fields: {320project_id: "project_write",321id: true,322mount: true,323error: true,324notes: true,325title: true,326color: true,327position: true,328lock: true,329},330},331},332},333fields: {334id: ID,335project_specific_id: {336not_null: true,337type: "integer",338desc: "A unique project-specific id assigned to this cloud file system. This is a positive integer that is guaranteed to be unique for cloud filesystems *in a given project* and minimal when assigned (so it is as small as possible). For now at least, I'm not using this in any way except as something to display to users. Internally we always use the global id.",339},340project_id: {341not_null: true,342type: "uuid",343desc: "The project id that this compute server provides compute for.",344render: { type: "project_link" },345},346account_id: {347not_null: true,348type: "uuid",349desc: "User that owns this cloud file system (they pay)",350render: { type: "account" },351},352created: {353not_null: true,354type: "timestamp",355desc: "When the compute server was created.",356},357bucket: {358type: "string",359pg_type: "VARCHAR(63)",360desc: "Google cloud storage bucket backing this filesystem",361render: { type: "text", maxLength: 63, editable: false },362},363bucket_storage_class: {364not_null: true,365type: "string",366pg_type: "VARCHAR(64)",367desc: "Default storage class of the google cloud storage bucket",368render: { type: "text", maxLength: 64, editable: false },369},370bucket_location: {371not_null: true,372type: "string",373pg_type: "VARCHAR(64)",374desc: "Where the google cloud storage bucket is stored.",375render: { type: "text", maxLength: 64, editable: false },376},377mountpoint: {378not_null: true,379type: "string",380pg_type: "VARCHAR(4096)",381desc: "Where compute server is mounted in the file system. If a relative path, then relative to home directory. Target path does not have to be empty. For sanity we restrict this string more than an arbitrary linux path.",382render: { type: "text", maxLength: 4096, editable: true },383},384mount: {385type: "boolean",386desc: "If true, then this cloud file system will be mounted on all compute servers associated to the project.",387},388secret_key: {389type: "map",390pg_type: "jsonb",391desc: "Secret key needed to use the bucket. It's a structured jsonb object. For google cloud storage, it's exactly the service account. This will only be not set if something went wrong initializing this storage.",392},393port: {394type: "integer",395desc: "Numerical port where local service runs on each client for the file system. E.g., this is keydb for juicefs.",396},397compression: {398not_null: true,399type: "string",400pg_type: "VARCHAR(64)",401desc: "Compression for the file system: lz4, zstd or none. Cannot be changed.",402render: { type: "text", maxLength: 64, editable: false },403},404block_size: {405type: "integer",406not_null: true,407desc: "Block size of file system in MB: between 1 and 64, inclusive. Cannot be changed.",408},409trash_days: {410type: "integer",411not_null: true,412desc: "Number of days to store deleted files. Use 0 to disable.",413},414mount_options: {415type: "string",416pg_type: "VARCHAR(4096)",417desc: "Options passed to the command line when running juicefs mount. See https://juicefs.com/docs/community/command_reference#mount This exact string is literally put on the command line after 'juicefs mount', and obviously getting it mangled can break mounting the file system.",418render: { type: "text", maxLength: 4096, editable: true },419},420keydb_options: {421type: "string",422pg_type: "VARCHAR(16384)",423desc: "Keydb (/Redis) configuration. This is placed at the end of keydb.conf and can be used to override or add to the keydb configuration used on each client.",424render: { type: "text", maxLength: 16384, editable: true },425},426title: {427type: "string",428pg_type: "VARCHAR(254)",429desc: "Title of this computer server. Used purely to make it easier for the user to keep track of it.",430render: { type: "text", maxLength: 254, editable: true },431},432color: {433type: "string",434desc: "A user configurable color, which is used for tags and UI to indicate where a tab is running.",435pg_type: "VARCHAR(30)",436render: { type: "color", editable: true },437},438deleting: {439type: "boolean",440desc: "True if this filesystem is in the process of being deleted.",441},442error: {443type: "string",444desc: "In case something went wrong, e.g., in starting this compute server, this field will get set with a string error message to show the user. It's also cleared right when we try to start server.",445},446notes: NOTES,447lock: {448type: "string",449pg_type: "VARCHAR(128)",450desc: "String that you must provide as part of any API call to delete this object. Use this as a personal reminder of conditions under which it is OK to delete this.",451render: { type: "text", maxLength: 128, editable: true },452},453position: {454type: "number",455desc: "Used for sorting a list of cloud file systems in the UI.",456},457last_edited: {458type: "timestamp",459desc: "Last time some field was changed. Also, this gets updated when the volume is actively mounted by some compute server, since the files are likely edited.",460},461purchase_id: {462type: "number",463desc: "if there is a current active purchase related to this compute server, this is the id of that purchase in the purchases table",464},465bytes_used: {466not_null: true,467type: "integer",468pg_type: "bigint",469desc: "The total number of bytes of data stored in the file system -- it's the output of df. It is not impacted by compression, i.e., it's not the bucket size itself.",470},471},472});473474Table({475name: "crm_cloud_filesystems",476fields: schema.cloud_filesystems.fields,477rules: {478primary_key: schema.cloud_filesystems.primary_key,479virtual: "cloud_filesystems",480user_query: {481get: {482admin: true,483pg_where: [],484fields: {485...schema.cloud_filesystems.user_query?.get?.fields,486template: null,487},488},489set: {490admin: true,491fields: {492id: true,493title: true,494color: true,495notes: true,496mount_options: true,497keydb_options: true,498},499},500},501},502});503504// some sanity checks505export function assertValidCompression(compression: Compression) {506if (507typeof compression == "string" &&508["lz4", "zstd", "none"].includes(compression)509) {510return;511}512throw Error(`compression must be 'lz4', 'zstd', or 'none'`);513}514515export function assertValidPath(path: string) {516if (typeof path != "string") {517throw Error("path must be a string");518}519if (520path.includes("\0") ||521path.includes("\n") ||522path.includes("~") ||523path.includes("\\")524) {525throw Error(526`invalid path '${path}' -- must not include newlines or null characters or ~ or \\`,527);528}529if (path.length > 4096) {530throw Error(`invalid path '${path}' -- must be at most 4096 characters`);531}532for (let i = 0; i < path.length; i++) {533const charCode = path.charCodeAt(i);534if ((charCode >= 0x00 && charCode <= 0x1f) || charCode === 0x7f) {535throw Error(`invalid path '${path}' -- must not include control codes`);536}537}538}539540Table({541name: "crm_cloud_filesystems",542fields: schema.cloud_filesystems.fields,543rules: {544primary_key: schema.cloud_filesystems.primary_key,545virtual: "cloud_filesystems",546user_query: {547get: {548admin: true,549pg_where: [],550fields: {551...schema.cloud_filesystems.user_query?.get?.fields,552template: null,553},554},555set: {556admin: true,557fields: {558id: true,559title: true,560color: true,561notes: true,562mount_options: true,563keydb_options: true,564},565},566},567},568});569570export interface CloudFilesystemMetric {571timestamp: number; // what we get back from api since it's json -- ms since epoch572compute_server_id: number;573bytes_used: number;574process_uptime: number;575bytes_put?: number | null;576bytes_get?: number | null;577objects_put?: number | null;578objects_get?: number | null;579objects_delete?: number | null;580bucket_location: string;581bucket_storage_class: GoogleCloudBucketStorageClass;582compute_server_location: GoogleCloudBucketLocation;583cost?: number | null;584}585586Table({587name: "cloud_filesystem_metrics",588rules: {589primary_key: ["timestamp", "cloud_filesystem_id", "compute_server_id"],590},591fields: {592timestamp: {593type: "timestamp",594desc: "When the metric was submitted. This is assigned by the database when data is inserted, so should be assumed correct and non-decreasing.",595},596cloud_filesystem_id: {597type: "integer",598desc: "The id of the cloud file system that this is a metric for.",599},600compute_server_id: {601type: "integer",602desc: "The id of the compute server that is submitting this metric.",603},604bytes_used: {605not_null: true,606type: "integer",607pg_type: "bigint",608desc: "The total number of bytes of data stored in the file system -- it's the output of df. It is not impacted by compression, i.e., it's not the bucket size itself.",609},610process_uptime: {611not_null: true,612type: "number",613desc: "Seconds since the process started collecting these metrics.",614},615bytes_put: {616type: "integer",617pg_type: "bigint",618desc: "The number of bytes of data that was written to cloud storage: juicefs_object_request_data_bytes_PUT in .stats",619},620bytes_get: {621type: "integer",622pg_type: "bigint",623desc: "The number of bytes of data that were written to cloud storage: juicefs_object_request_data_bytes_GET in .stats",624},625objects_put: {626type: "integer",627pg_type: "bigint",628desc: "Class A Operation: The number of distinct objects that were written to cloud storage: juicefs_object_request_durations_histogram_seconds_PUT_total in .stats",629},630objects_get: {631type: "integer",632pg_type: "bigint",633desc: "Class B Operation: The number of distinct objects that were read from cloud storage: juicefs_object_request_durations_histogram_seconds_GET_total in .stats",634},635objects_delete: {636type: "integer",637pg_type: "bigint",638desc: "Free Operation: The number of distinct objects that were deleted from cloud storage: juicefs_object_request_durations_histogram_seconds_DELETE_total in .stats",639},640bucket_location: {641not_null: true,642type: "string",643pg_type: "VARCHAR(64)",644desc: "Where the google cloud storage bucket is stored. A GCP region or 'us','eu','asia' for multiregion buckets.",645render: { type: "text", maxLength: 64, editable: false },646},647bucket_storage_class: {648not_null: true,649type: "string",650pg_type: "VARCHAR(64)",651desc: "Default storage class of the google cloud storage bucket at this point in time: 'standard', 'nearline', 'coldline', 'archive', 'autoclass-nearline' or 'autoclass-archive'",652render: { type: "text", maxLength: 64, editable: false },653},654compute_server_location: {655not_null: true,656type: "string",657pg_type: "VARCHAR(64)",658desc: "A GCP region or 'world', 'china', 'australia', 'unknown'. Here 'world' means something oether than 'china' or 'australia'. Also HK doesn't count as 'china'.",659render: { type: "text", maxLength: 64, editable: false },660},661cost: {662type: "number",663pg_type: "double precision",664desc: "The estimated accumulated total cost from when the bucket was created until this point in time. This could be recomputed, but is nice to have easily available, and means we can delete old data.",665},666// cost_state: {667// type: "object",668// desc: "Extra data at this point in time that can be used somehow in our cost estimation heuristic. E.g., {'bytes_used_standard':20000} would mean that we should assume going forward that 20000 bytes of data is of the standard storage class, irregardless of the current storage class because of a change of class. Obviously, some of this data could be deleted, but we don't know.",669// },670},671});672673674