CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/database/postgres-ops.coffee
Views: 687
1
#########################################################################
2
# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
# License: MS-RSL – see LICENSE.md for details
4
#########################################################################
5
6
###
7
PostgreSQL -- operations code, e.g., backups, maintenance, etc.
8
9
COPYRIGHT : (c) 2017 SageMath, Inc.
10
LICENSE : MS-RSL
11
###
12
13
fs = require('fs')
14
async = require('async')
15
16
misc_node = require('@cocalc/backend/misc_node')
17
18
{defaults} = misc = require('@cocalc/util/misc')
19
required = defaults.required
20
21
{SCHEMA} = require('@cocalc/util/schema')
22
23
exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
24
# Backups up the indicated tables.
25
# WARNING: This is NOT guaranteed to give a point
26
# in time backup of the entire database across tables!
27
# The backup of each table is only consistent within that
28
# table. For CoCalc, this tends to be fine, due to our design.
29
# The advantage of this is that we can backup huge tables
30
# only once a week, and other smaller tables much more frequently.
31
32
# For tables:
33
# - a list of tables
34
# - 'all' (the string) -- backs up everything in the SMC schema (not the database!)
35
# - 'critical' -- backs up only smaller critical tables, which we would desperately
36
# need for disaster recovery
37
backup_tables: (opts) =>
38
opts = defaults opts,
39
tables : required # list of tables, 'all' or 'critical'
40
path : 'backup'
41
limit : 3 # number of tables to backup in parallel
42
bup : true # creates/updates a bup archive in backup/.bup,
43
# so we have snapshots of all past backups!
44
cb : required
45
tables = @_get_backup_tables(opts.tables)
46
dbg = @_dbg("backup_tables()")
47
dbg("backing up tables: #{misc.to_json(tables)}")
48
async.series([
49
(cb) =>
50
backup = (table, cb) =>
51
dbg("backup '#{table}'")
52
@_backup_table
53
table : table
54
path : opts.path
55
cb : cb
56
async.mapLimit(tables, opts.limit, backup, cb)
57
(cb) =>
58
@_backup_bup
59
path : opts.path
60
cb : cb
61
], (err) => opts.cb(err))
62
63
_backup_table: (opts) =>
64
opts = defaults opts,
65
table : required
66
path : 'backup'
67
cb : required
68
dbg = @_dbg("_backup_table(table='#{opts.table}')")
69
cmd = "mkdir -p #{opts.path}; time pg_dump -Fc --table #{opts.table} #{@_database} > #{opts.path}/#{opts.table}.bak"
70
dbg(cmd)
71
misc_node.execute_code
72
command : cmd
73
timeout : 0
74
home : '.'
75
env :
76
PGPASSWORD : @_password
77
PGUSER : 'smc'
78
PGHOST : @_host
79
err_on_exit : true
80
cb : opts.cb
81
82
_backup_bup: (opts) =>
83
opts = defaults opts,
84
path : 'backup'
85
cb : required
86
dbg = @_dbg("_backup_bup(path='#{opts.path}')")
87
# We use no compression because the backup files are already all highly compressed.
88
cmd = "mkdir -p '#{opts.path}' && export && bup init && bup index '#{opts.path}' && bup save --strip --compress=0 '#{opts.path}' -n master"
89
dbg(cmd)
90
misc_node.execute_code
91
command : cmd
92
timeout : 0
93
home : '.'
94
env :
95
BUP_DIR : "#{opts.path}/.bup"
96
err_on_exit : true
97
cb : opts.cb
98
99
_get_backup_tables: (tables) =>
100
if misc.is_array(tables)
101
return tables
102
all = (t for t,s of SCHEMA when not s.virtual)
103
if tables == 'all'
104
return all
105
else if tables == 'critical'
106
# TODO: critical for backup or not should probably be in the schema itself, not here.
107
v = []
108
non_critical = ['stats','syncstrings','file_use','eval_outputs','blobs','eval_inputs','patches','cursors']
109
for x in all
110
if x.indexOf('log') == -1 and x not in non_critical
111
v.push(x)
112
return v
113
else
114
return [tables]
115
116
# Restore the given tables from the backup in the given directory.
117
restore_tables: (opts) =>
118
opts = defaults opts,
119
tables : undefined # same as for backup_tables, or undefined to use whatever we have in the path
120
path : '/backup/postgres'
121
limit : 5
122
cb : required
123
backed_up_tables = (filename[...-4] for filename in fs.readdirSync(opts.path) when filename[-4..] == '.bak')
124
if not opts.tables?
125
tables = backed_up_tables
126
else
127
tables = @_get_backup_tables(opts.tables)
128
for table in tables
129
if table not in backed_up_tables
130
opts.cb("there is no backup of '#{table}'")
131
return
132
dbg = @_dbg("restore_tables()")
133
dbg("restoring tables: #{misc.to_json(tables)}")
134
restore = (table, cb) =>
135
dbg("restore '#{table}'")
136
@_restore_table
137
table : table
138
path : opts.path
139
cb : cb
140
async.mapLimit(tables, opts.limit, restore, (err)=>opts.cb(err))
141
142
_restore_table: (opts) =>
143
opts = defaults opts,
144
table : required
145
path : 'backup'
146
cb : required
147
dbg = @_dbg("_restore_table(table='#{opts.table}')")
148
async.series([
149
(cb) =>
150
dbg("dropping existing table if it exists")
151
@_query
152
query : "DROP TABLE IF EXISTS #{opts.table}"
153
cb : cb
154
(cb) =>
155
cmd = "time pg_restore -C -d #{@_database} #{opts.path}/#{opts.table}.bak"
156
dbg(cmd)
157
misc_node.execute_code
158
command : cmd
159
timeout : 0
160
home : '.'
161
env :
162
PGPASSWORD : @_password
163
PGUSER : @_user
164
PGHOST : @_host
165
err_on_exit : true
166
cb : cb
167
], (err) => opts.cb(err))
168
169
170