Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/database/postgres-ops.coffee
Views: 687
#########################################################################1# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2# License: MS-RSL – see LICENSE.md for details3#########################################################################45###6PostgreSQL -- operations code, e.g., backups, maintenance, etc.78COPYRIGHT : (c) 2017 SageMath, Inc.9LICENSE : MS-RSL10###1112fs = require('fs')13async = require('async')1415misc_node = require('@cocalc/backend/misc_node')1617{defaults} = misc = require('@cocalc/util/misc')18required = defaults.required1920{SCHEMA} = require('@cocalc/util/schema')2122exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext23# Backups up the indicated tables.24# WARNING: This is NOT guaranteed to give a point25# in time backup of the entire database across tables!26# The backup of each table is only consistent within that27# table. For CoCalc, this tends to be fine, due to our design.28# The advantage of this is that we can backup huge tables29# only once a week, and other smaller tables much more frequently.3031# For tables:32# - a list of tables33# - 'all' (the string) -- backs up everything in the SMC schema (not the database!)34# - 'critical' -- backs up only smaller critical tables, which we would desperately35# need for disaster recovery36backup_tables: (opts) =>37opts = defaults opts,38tables : required # list of tables, 'all' or 'critical'39path : 'backup'40limit : 3 # number of tables to backup in parallel41bup : true # creates/updates a bup archive in backup/.bup,42# so we have snapshots of all past backups!43cb : required44tables = @_get_backup_tables(opts.tables)45dbg = @_dbg("backup_tables()")46dbg("backing up tables: #{misc.to_json(tables)}")47async.series([48(cb) =>49backup = (table, cb) =>50dbg("backup '#{table}'")51@_backup_table52table : table53path : opts.path54cb : cb55async.mapLimit(tables, opts.limit, backup, cb)56(cb) =>57@_backup_bup58path : opts.path59cb : cb60], (err) => opts.cb(err))6162_backup_table: (opts) =>63opts = defaults opts,64table : required65path : 'backup'66cb : required67dbg = @_dbg("_backup_table(table='#{opts.table}')")68cmd = "mkdir -p #{opts.path}; time pg_dump -Fc --table #{opts.table} #{@_database} > #{opts.path}/#{opts.table}.bak"69dbg(cmd)70misc_node.execute_code71command : cmd72timeout : 073home : '.'74env :75PGPASSWORD : @_password76PGUSER : 'smc'77PGHOST : @_host78err_on_exit : true79cb : opts.cb8081_backup_bup: (opts) =>82opts = defaults opts,83path : 'backup'84cb : required85dbg = @_dbg("_backup_bup(path='#{opts.path}')")86# We use no compression because the backup files are already all highly compressed.87cmd = "mkdir -p '#{opts.path}' && export && bup init && bup index '#{opts.path}' && bup save --strip --compress=0 '#{opts.path}' -n master"88dbg(cmd)89misc_node.execute_code90command : cmd91timeout : 092home : '.'93env :94BUP_DIR : "#{opts.path}/.bup"95err_on_exit : true96cb : opts.cb9798_get_backup_tables: (tables) =>99if misc.is_array(tables)100return tables101all = (t for t,s of SCHEMA when not s.virtual)102if tables == 'all'103return all104else if tables == 'critical'105# TODO: critical for backup or not should probably be in the schema itself, not here.106v = []107non_critical = ['stats','syncstrings','file_use','eval_outputs','blobs','eval_inputs','patches','cursors']108for x in all109if x.indexOf('log') == -1 and x not in non_critical110v.push(x)111return v112else113return [tables]114115# Restore the given tables from the backup in the given directory.116restore_tables: (opts) =>117opts = defaults opts,118tables : undefined # same as for backup_tables, or undefined to use whatever we have in the path119path : '/backup/postgres'120limit : 5121cb : required122backed_up_tables = (filename[...-4] for filename in fs.readdirSync(opts.path) when filename[-4..] == '.bak')123if not opts.tables?124tables = backed_up_tables125else126tables = @_get_backup_tables(opts.tables)127for table in tables128if table not in backed_up_tables129opts.cb("there is no backup of '#{table}'")130return131dbg = @_dbg("restore_tables()")132dbg("restoring tables: #{misc.to_json(tables)}")133restore = (table, cb) =>134dbg("restore '#{table}'")135@_restore_table136table : table137path : opts.path138cb : cb139async.mapLimit(tables, opts.limit, restore, (err)=>opts.cb(err))140141_restore_table: (opts) =>142opts = defaults opts,143table : required144path : 'backup'145cb : required146dbg = @_dbg("_restore_table(table='#{opts.table}')")147async.series([148(cb) =>149dbg("dropping existing table if it exists")150@_query151query : "DROP TABLE IF EXISTS #{opts.table}"152cb : cb153(cb) =>154cmd = "time pg_restore -C -d #{@_database} #{opts.path}/#{opts.table}.bak"155dbg(cmd)156misc_node.execute_code157command : cmd158timeout : 0159home : '.'160env :161PGPASSWORD : @_password162PGUSER : @_user163PGHOST : @_host164err_on_exit : true165cb : cb166], (err) => opts.cb(err))167168169170