CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/hub/hub.ts
Views: 687
1
//########################################################################
2
// This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
// License: MS-RSL – see LICENSE.md for details
4
//########################################################################
5
6
// This is the CoCalc Global HUB. It runs as a daemon, sitting in the
7
// middle of the action, connected to potentially thousands of clients,
8
// many Sage sessions, and PostgreSQL database.
9
10
import TTLCache from "@isaacs/ttlcache";
11
import { callback } from "awaiting";
12
import blocked from "blocked";
13
import { spawn } from "child_process";
14
import { program as commander, Option } from "commander";
15
import basePath from "@cocalc/backend/base-path";
16
import {
17
pghost as DEFAULT_DB_HOST,
18
pgdatabase as DEFAULT_DB_NAME,
19
pguser as DEFAULT_DB_USER,
20
} from "@cocalc/backend/data";
21
import { trimLogFileSize } from "@cocalc/backend/logger";
22
import port from "@cocalc/backend/port";
23
import { init_start_always_running_projects } from "@cocalc/database/postgres/always-running";
24
import { load_server_settings_from_env } from "@cocalc/database/settings/server-settings";
25
import { init_passport } from "@cocalc/server/hub/auth";
26
import { initialOnPremSetup } from "@cocalc/server/initial-onprem-setup";
27
import initHandleMentions from "@cocalc/server/mentions/handle";
28
import initProjectControl, {
29
COCALC_MODES,
30
} from "@cocalc/server/projects/control";
31
import initIdleTimeout from "@cocalc/server/projects/control/stop-idle-projects";
32
import initNewProjectPoolMaintenanceLoop from "@cocalc/server/projects/pool/maintain";
33
import initPurchasesMaintenanceLoop from "@cocalc/server/purchases/maintenance";
34
import initSalesloftMaintenance from "@cocalc/server/salesloft/init";
35
import { stripe_sync } from "@cocalc/server/stripe/sync";
36
import { callback2, retry_until_success } from "@cocalc/util/async-utils";
37
import { getClients } from "./clients";
38
import { set_agent_endpoint } from "./health-checks";
39
import { start as startHubRegister } from "./hub_register";
40
import { getLogger } from "./logger";
41
import initDatabase, { database } from "./servers/database";
42
import initExpressApp from "./servers/express-app";
43
import initHttpRedirect from "./servers/http-redirect";
44
import initPrimus from "./servers/primus";
45
import initVersionServer from "./servers/version";
46
47
const MetricsRecorder = require("./metrics-recorder"); // import * as MetricsRecorder from "./metrics-recorder";
48
49
// Logger tagged with 'hub' for this file.
50
const winston = getLogger("hub");
51
52
// program gets populated with the command line options below.
53
let program: { [option: string]: any } = {};
54
export { program };
55
56
// How frequently to register with the database that this hub is up and running,
57
// and also report number of connected clients.
58
const REGISTER_INTERVAL_S = 20;
59
60
// the jsmap of connected clients
61
const clients = getClients();
62
63
async function reset_password(email_address: string): Promise<void> {
64
try {
65
await callback2(database.reset_password, { email_address });
66
winston.info(`Password changed for ${email_address}`);
67
} catch (err) {
68
winston.info(`Error resetting password -- ${err}`);
69
}
70
}
71
72
// This calculates and updates the statistics for the /stats endpoint.
73
// It's important that we call this periodically, because otherwise the /stats data is outdated.
74
async function init_update_stats(): Promise<void> {
75
winston.info("init updating stats periodically");
76
const update = () => callback2(database.get_stats);
77
// Do it every minute:
78
setInterval(() => update(), 60000);
79
// Also do it once now:
80
await update();
81
}
82
83
// This calculates and updates the site_license_usage_log.
84
// It's important that we call this periodically, if we want
85
// to be able to monitor site license usage. This is enabled
86
// by default only for dev mode (so for development).
87
async function init_update_site_license_usage_log() {
88
winston.info("init updating site license usage log periodically");
89
const update = async () => await database.update_site_license_usage_log();
90
setInterval(update, 31000);
91
await update();
92
}
93
94
async function initMetrics() {
95
winston.info("Initializing Metrics Recorder...");
96
await callback(MetricsRecorder.init, winston);
97
return {
98
metric_blocked: MetricsRecorder.new_counter(
99
"blocked_ms_total",
100
'accumulates the "blocked" time in the hub [ms]',
101
),
102
uncaught_exception_total: MetricsRecorder.new_counter(
103
"uncaught_exception_total",
104
'counts "BUG"s',
105
),
106
};
107
}
108
109
async function startServer(): Promise<void> {
110
winston.info("start_server");
111
112
winston.info(`basePath='${basePath}'`);
113
winston.info(
114
`database: name="${program.databaseName}" nodes="${program.databaseNodes}" user="${program.databaseUser}"`,
115
);
116
117
const { metric_blocked, uncaught_exception_total } = await initMetrics();
118
119
// Log anything that blocks the CPU for more than ~100ms -- see https://github.com/tj/node-blocked
120
blocked((ms: number) => {
121
if (ms > 100) {
122
metric_blocked.inc(ms);
123
}
124
// record that something blocked:
125
if (ms > 100) {
126
winston.debug(`BLOCKED for ${ms}ms`);
127
}
128
});
129
130
// Wait for database connection to work. Everything requires this.
131
await retry_until_success({
132
f: async () => await callback2(database.connect),
133
start_delay: 1000,
134
max_delay: 10000,
135
});
136
winston.info("connected to database.");
137
138
if (program.updateDatabaseSchema) {
139
winston.info("Update database schema");
140
await callback2(database.update_schema);
141
142
// in those cases where we initialize the database upon startup
143
// (essentially only relevant for kucalc's hub-websocket)
144
if (program.mode === "kucalc") {
145
// and for on-prem setups, also initialize the admin account, set a registration token, etc.
146
await initialOnPremSetup(database);
147
}
148
}
149
150
// set server settings based on environment variables
151
await load_server_settings_from_env(database);
152
153
if (program.agentPort) {
154
winston.info("Configure agent port");
155
set_agent_endpoint(program.agentPort, program.hostname);
156
}
157
158
// Mentions
159
if (program.mentions) {
160
winston.info("enabling handling of mentions...");
161
initHandleMentions();
162
}
163
164
// Project control
165
winston.info("initializing project control...");
166
const projectControl = initProjectControl(program.mode);
167
// used for nextjs hot module reloading dev server
168
process.env["COCALC_MODE"] = program.mode;
169
170
if (program.mode != "kucalc" && program.websocketServer) {
171
// We handle idle timeout of projects.
172
// This can be disabled via COCALC_NO_IDLE_TIMEOUT.
173
// This only uses the admin-configurable settings field of projects
174
// in the database and isn't aware of licenses or upgrades.
175
initIdleTimeout(projectControl);
176
}
177
178
if (program.websocketServer) {
179
// Initialize the version server -- must happen after updating schema
180
// (for first ever run).
181
await initVersionServer();
182
183
if (program.mode == "single-user" && process.env.USER == "user") {
184
// Definitely in dev mode, probably on cocalc.com in a project, so we kill
185
// all the running projects when starting the hub:
186
// Whenever we start the dev server, we just assume
187
// all projects are stopped, since assuming they are
188
// running when they are not is bad. Something similar
189
// is done in cocalc-docker.
190
winston.info("killing all projects...");
191
await callback2(database._query, {
192
safety_check: false,
193
query: 'update projects set state=\'{"state":"opened"}\'',
194
});
195
await spawn("pkill", ["-f", "node_modules/.bin/cocalc-project"]);
196
197
// Also, unrelated to killing projects, for purposes of developing
198
// custom software images, we inject a couple of random nonsense entries
199
// into the table in the DB:
200
winston.info("inserting random nonsense compute images in database");
201
await callback2(database.insert_random_compute_images);
202
}
203
204
if (program.mode != "kucalc") {
205
await init_update_stats();
206
await init_update_site_license_usage_log();
207
// This is async but runs forever, so don't wait for it.
208
winston.info("init starting always running projects");
209
init_start_always_running_projects(database);
210
}
211
}
212
213
const { router, httpServer } = await initExpressApp({
214
isPersonal: program.personal,
215
projectControl,
216
proxyServer: !!program.proxyServer,
217
nextServer: !!program.nextServer,
218
cert: program.httpsCert,
219
key: program.httpsKey,
220
listenersHack:
221
program.mode == "single-user" &&
222
program.proxyServer &&
223
program.nextServer &&
224
program.websocketServer &&
225
process.env["NODE_ENV"] == "development",
226
});
227
228
// The express app create via initExpressApp above **assumes** that init_passport is done
229
// or complains a lot. This is obviously not really necessary, but we leave it for now.
230
await callback2(init_passport, {
231
router,
232
database,
233
host: program.hostname,
234
});
235
236
winston.info(`starting webserver listening on ${program.hostname}:${port}`);
237
await callback(httpServer.listen.bind(httpServer), port, program.hostname);
238
239
if (port == 443 && program.httpsCert && program.httpsKey) {
240
// also start a redirect from port 80 to port 443.
241
await initHttpRedirect(program.hostname);
242
}
243
244
if (program.websocketServer) {
245
winston.info("initializing primus websocket server");
246
initPrimus({
247
httpServer,
248
router,
249
projectControl,
250
clients,
251
host: program.hostname,
252
port,
253
isPersonal: program.personal,
254
});
255
}
256
257
if (program.websocketServer || program.proxyServer || program.nextServer) {
258
winston.info(
259
"Starting registering periodically with the database and updating a health check...",
260
);
261
262
// register the hub with the database periodically, and
263
// also confirms that database is working.
264
await callback2(startHubRegister, {
265
database,
266
clients,
267
host: program.hostname,
268
port,
269
interval_s: REGISTER_INTERVAL_S,
270
});
271
272
const protocol = program.httpsKey ? "https" : "http";
273
const target = `${protocol}://${program.hostname}:${port}${basePath}`;
274
275
const msg = `Started HUB!\n\n-----------\n\n The following URL *might* work: ${target}\n\n\nPORT=${port}\nBASE_PATH=${basePath}\nPROTOCOL=${protocol}\n\n${
276
basePath.length <= 1
277
? ""
278
: "If you are developing cocalc inside of cocalc, take the URL of the host cocalc\nand append " +
279
basePath +
280
" to it."
281
}\n\n-----------\n\n`;
282
winston.info(msg);
283
console.log(msg);
284
285
// this is not so robust, so disabled for now.
286
// if (
287
// program.websocketServer &&
288
// program.nextServer &&
289
// process.env["NODE_ENV"] != "production"
290
// ) {
291
// // This is entirely to deal with conflicts between both nextjs and webpack when doing
292
// // hot module reloading. They fight with each other, and the we -- the developers --
293
// // win only AFTER the fight is done. So we force the fight automatically, rather than
294
// // manually, which is confusing.
295
// console.log(
296
// `launch get of ${target} so that webpack and nextjs websockets can fight things out`,
297
// );
298
// const process = spawn(
299
// "chromium-browser",
300
// ["--no-sandbox", "--headless", target],
301
// { detached: true, stdio: "ignore" },
302
// );
303
// process.unref();
304
// }
305
}
306
307
if (program.all || program.mentions) {
308
// kucalc: for now we just have the hub-mentions servers
309
// do the new project pool maintenance, since there is only
310
// one hub-stats.
311
// On non-cocalc it'll get done by *the* hub because of program.all.
312
initNewProjectPoolMaintenanceLoop();
313
// Starts periodic maintenance on pay-as-you-go purchases, e.g., quota
314
// upgrades of projects.
315
initPurchasesMaintenanceLoop();
316
initSalesloftMaintenance();
317
setInterval(trimLogFileSize, 1000 * 60 * 3);
318
}
319
320
addErrorListeners(uncaught_exception_total);
321
}
322
323
// addErrorListeners: after successful startup, don't crash on routine errors.
324
// We don't do this until startup, since we do want to crash on errors on startup.
325
326
// Use cache to not save the SAME error to the database (and prometheus)
327
// more than once per minute.
328
const errorReportCache = new TTLCache({ ttl: 60 * 1000 });
329
330
function addErrorListeners(uncaught_exception_total) {
331
process.addListener("uncaughtException", function (err) {
332
winston.error(
333
"BUG ****************************************************************************",
334
);
335
winston.error("Uncaught exception: " + err);
336
console.error(err.stack);
337
winston.error(err.stack);
338
winston.error(
339
"BUG ****************************************************************************",
340
);
341
const key = `${err}`;
342
if (errorReportCache.has(key)) {
343
return;
344
}
345
errorReportCache.set(key, true);
346
database?.uncaught_exception(err);
347
uncaught_exception_total.inc(1);
348
});
349
350
return process.on("unhandledRejection", function (reason, p) {
351
winston.error(
352
"BUG UNHANDLED REJECTION *********************************************************",
353
);
354
console.error(p, reason); // strangely sometimes winston.error can't actually show the traceback...
355
winston.error("Unhandled Rejection at:", p, "reason:", reason);
356
winston.error(
357
"BUG UNHANDLED REJECTION *********************************************************",
358
);
359
const key = `${p}${reason}`;
360
if (errorReportCache.has(key)) {
361
return;
362
}
363
errorReportCache.set(key, true);
364
database?.uncaught_exception(reason);
365
uncaught_exception_total.inc(1);
366
});
367
}
368
369
//############################################
370
// Process command line arguments
371
//############################################
372
async function main(): Promise<void> {
373
commander
374
.name("cocalc-hub-server")
375
.usage("options")
376
.addOption(
377
new Option(
378
"--mode [string]",
379
`REQUIRED mode in which to run CoCalc (${COCALC_MODES.join(
380
", ",
381
)}) - or set COCALC_MODE env var`,
382
).choices(COCALC_MODES as any as string[]),
383
)
384
.option(
385
"--all",
386
"runs all of the servers: websocket, proxy, next (so you don't have to pass all those opts separately), and also mentions updator and updates db schema on startup; use this in situations where there is a single hub that serves everything (instead of a microservice situation like kucalc)",
387
)
388
.option("--websocket-server", "run the websocket server")
389
.option("--proxy-server", "run the proxy server")
390
.option(
391
"--next-server",
392
"run the nextjs server (landing pages, share server, etc.)",
393
)
394
.option(
395
"--https-key [string]",
396
"serve over https. argument should be a key filename (both https-key and https-cert must be specified)",
397
)
398
.option(
399
"--https-cert [string]",
400
"serve over https. argument should be a cert filename (both https-key and https-cert must be specified)",
401
)
402
.option(
403
"--agent-port <n>",
404
"port for HAProxy agent-check (default: 0 -- do not start)",
405
(n) => parseInt(n),
406
0,
407
)
408
.option(
409
"--hostname [string]",
410
'host of interface to bind to (default: "127.0.0.1")',
411
"127.0.0.1",
412
)
413
.option(
414
"--database-nodes <string,string,...>",
415
`database address (default: '${DEFAULT_DB_HOST}')`,
416
DEFAULT_DB_HOST,
417
)
418
.option(
419
"--database-name [string]",
420
`Database name to use (default: "${DEFAULT_DB_NAME}")`,
421
DEFAULT_DB_NAME,
422
)
423
.option(
424
"--database-user [string]",
425
`Database username to use (default: "${DEFAULT_DB_USER}")`,
426
DEFAULT_DB_USER,
427
)
428
.option("--passwd [email_address]", "Reset password of given user", "")
429
.option(
430
"--update-database-schema",
431
"If specified, updates database schema on startup (always happens when mode is not kucalc).",
432
)
433
.option(
434
"--stripe-sync",
435
"Sync stripe subscriptions to database for all users with stripe id",
436
"yes",
437
)
438
.option(
439
"--update-stats",
440
"Calculates the statistics for the /stats endpoint and stores them in the database",
441
"yes",
442
)
443
.option("--delete-expired", "Delete expired data from the database", "yes")
444
.option(
445
"--blob-maintenance",
446
"Do blob-related maintenance (dump to tarballs, offload to gcloud)",
447
"yes",
448
)
449
.option(
450
"--mentions",
451
"if given, periodically handle mentions; on kucalc there is only one of these. It also managed the new project pool. Maybe this should be renamed --singleton!",
452
)
453
.option(
454
"--test",
455
"terminate after setting up the hub -- used to test if it starts up properly",
456
)
457
.option(
458
"--db-concurrent-warn <n>",
459
"be very unhappy if number of concurrent db requests exceeds this (default: 300)",
460
(n) => parseInt(n),
461
300,
462
)
463
.option(
464
"--personal",
465
"run VERY UNSAFE: there is only one user and no authentication",
466
)
467
.parse(process.argv);
468
// Everywhere else in our code, we just refer to program.[options] since we
469
// wrote this code against an ancient version of commander.
470
const opts = commander.opts();
471
for (const name in opts) {
472
program[name] = opts[name];
473
}
474
if (!program.mode) {
475
program.mode = process.env.COCALC_MODE;
476
if (!program.mode) {
477
throw Error(
478
`the --mode option must be specified or the COCALC_MODE env var set to one of ${COCALC_MODES.join(
479
", ",
480
)}`,
481
);
482
process.exit(1);
483
}
484
}
485
if (program.all) {
486
program.websocketServer =
487
program.proxyServer =
488
program.nextServer =
489
program.mentions =
490
program.updateDatabaseSchema =
491
true;
492
}
493
494
//console.log("got opts", opts);
495
496
try {
497
// Everything we do here requires the database to be initialized. Once
498
// this is called, require('@cocalc/database/postgres/database').default() is a valid db
499
// instance that can be used.
500
initDatabase({
501
host: program.databaseNodes,
502
database: program.databaseName,
503
user: program.databaseUser,
504
concurrent_warn: program.dbConcurrentWarn,
505
});
506
507
if (program.passwd) {
508
winston.debug("Resetting password");
509
await reset_password(program.passwd);
510
process.exit();
511
} else if (program.stripeSync) {
512
winston.debug("Stripe sync");
513
await stripe_sync({ database, logger: winston });
514
process.exit();
515
} else if (program.deleteExpired) {
516
await callback2(database.delete_expired, {
517
count_only: false,
518
});
519
process.exit();
520
} else if (program.blobMaintenance) {
521
await callback2(database.blob_maintenance);
522
process.exit();
523
} else if (program.updateStats) {
524
await callback2(database.get_stats);
525
process.exit();
526
} else {
527
await startServer();
528
}
529
} catch (err) {
530
console.log(err);
531
winston.error("Error -- ", err);
532
process.exit(1);
533
}
534
}
535
536
main();
537
538