CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/database/postgres/stats.ts
Views: 687
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import { callback2 as cb2 } from "@cocalc/util/async-utils";
7
import { RECENT_TIMES, RECENT_TIMES_KEY } from "@cocalc/util/schema";
8
import * as misc from "@cocalc/util/misc";
9
const { defaults } = misc;
10
const required = defaults.required;
11
import { map, zipObject } from "lodash";
12
13
import { PostgreSQL } from "./types";
14
import { EXTENSIONS } from "@cocalc/util/db-schema/stats";
15
const { all_results } = require("../postgres-base");
16
17
// some stats queries have to crunch a lot of rows, which could take a bit
18
// we give them a couple of minutes each…
19
const QUERY_TIMEOUT_S = 300;
20
21
interface Opts {
22
ttl_dt: number; // 15 secs subtracted from ttl to compensate for computation duration when called via a cronjob
23
ttl: number; // how long cached version lives (in seconds)
24
ttl_db: number; // how long a valid result from a db query is cached in any case
25
update: boolean; // if true: recalculate if older than ttl; false: don't recalculate and pick it from the DB (locally cached for ttl secs)
26
cb: (err, stats) => void;
27
}
28
29
type Data = { [key: string]: number };
30
31
interface RunningProjects {
32
free: number;
33
member: number;
34
}
35
36
// TODO type this to fit with fields defined in db-schema/stats.ts
37
interface Stats {
38
id: string;
39
time: Date;
40
accounts: number;
41
projects: number;
42
projects_created: Data;
43
projects_edited: Data;
44
accounts_created: Data;
45
accounts_active: Data;
46
running_projects: RunningProjects;
47
hub_servers: any;
48
files_opened: {
49
distinct: Data;
50
total: Data;
51
};
52
}
53
54
let _stats_cached: any = null;
55
let _stats_cached_db_query: Date | null = null;
56
57
async function _count_timespan(db: PostgreSQL, opts): Promise<any> {
58
opts = defaults(opts, {
59
table: required,
60
field: undefined,
61
age_m: undefined,
62
upper_m: undefined, // defaults to zero minutes (i.e. "now")
63
});
64
const { table, field, age_m, upper_m } = opts;
65
const where = {};
66
if (field != null) {
67
if (age_m != null) {
68
where[`${field} >= $::TIMESTAMP`] = misc.minutes_ago(age_m);
69
}
70
if (upper_m != null) {
71
where[`${field} <= $::TIMESTAMP`] = misc.minutes_ago(upper_m);
72
}
73
}
74
const result = await cb2(db._query, {
75
query: `SELECT COUNT(*) FROM ${table}`,
76
where,
77
timeout_s: QUERY_TIMEOUT_S,
78
});
79
// count_result
80
return parseInt(result?.rows?.[0]?.count);
81
}
82
83
function _count_opened_files_query(distinct: boolean): string {
84
const extensions = EXTENSIONS.map((x) => `'${x}'`).join(", ");
85
return `\
86
WITH filenames AS (
87
SELECT ${distinct ? "DISTINCT" : ""} event ->> 'filename' AS fn
88
FROM project_log
89
WHERE time BETWEEN $1::TIMESTAMP AND NOW()
90
AND event @> '{"action" : "open"}'::jsonb
91
), ext_count AS (
92
SELECT COUNT(*) as cnt, lower(reverse(split_part(reverse(fn), '.', 1))) AS ext
93
FROM filenames
94
GROUP BY ext
95
)
96
SELECT ext, cnt
97
FROM ext_count
98
WHERE ext IN (${extensions}) ORDER BY ext
99
`;
100
}
101
102
async function _count_opened_files(db: PostgreSQL, opts): Promise<void> {
103
opts = defaults(opts, {
104
age_m: undefined,
105
key: required,
106
data: required,
107
distinct: required, // true or false
108
});
109
const { age_m, key, data, distinct } = opts;
110
const q = _count_opened_files_query(distinct);
111
112
const res = await cb2(db._query, {
113
query: q,
114
params: [misc.minutes_ago(age_m)],
115
timeout_s: QUERY_TIMEOUT_S,
116
});
117
118
// misc.copy? see "all_results"
119
const rows = res.rows.map((x) => misc.copy(x));
120
const values = zipObject(map(rows, "ext"), map(rows, "cnt"));
121
data[key] = values;
122
}
123
124
function check_local_cache({ update, ttl_dt, ttl, ttl_db, dbg }): Stats | null {
125
if (_stats_cached == null) return null;
126
127
// decide if cache should be used -- tighten interval if we are allowed to update
128
const offset_dt = update ? ttl_dt : 0;
129
const is_cache_recent =
130
_stats_cached.time > misc.seconds_ago(ttl - offset_dt);
131
// in case we aren't allowed to update and the cache is outdated, do not query db too often
132
const did_query_recently =
133
_stats_cached_db_query != null &&
134
_stats_cached_db_query > misc.seconds_ago(ttl_db);
135
if (is_cache_recent || did_query_recently) {
136
dbg(
137
`using locally cached stats from ${
138
(new Date().getTime() - _stats_cached.time) / 1000
139
} secs ago.`,
140
);
141
return _stats_cached;
142
}
143
return null;
144
}
145
146
async function check_db_cache({
147
db,
148
update,
149
ttl,
150
ttl_dt,
151
dbg,
152
}): Promise<Stats | null> {
153
try {
154
const res = await cb2(db._query, {
155
query: "SELECT * FROM stats ORDER BY time DESC LIMIT 1",
156
});
157
if (res?.rows?.length != 1) {
158
dbg("no data (1)");
159
return null;
160
}
161
162
const x = misc.map_without_undefined_and_null(res.rows[0]) as any;
163
if (x == null) {
164
dbg("no data (2)");
165
return null;
166
}
167
168
dbg(`check_db_cache x = ${misc.to_json(x)}`);
169
170
_stats_cached_db_query = new Date();
171
if (update && x.time < misc.seconds_ago(ttl - ttl_dt)) {
172
dbg("cache outdated -- will update stats");
173
return null;
174
} else {
175
dbg(
176
`using db stats from ${
177
(new Date().getTime() - x.time) / 1000
178
} secs ago.`,
179
);
180
// storing still valid result in local cache
181
_stats_cached = misc.deep_copy(x);
182
return _stats_cached;
183
}
184
} catch (err) {
185
dbg("problem with query -- no stats in db?");
186
throw err;
187
}
188
}
189
190
const running_projects_query = `\
191
SELECT count(*), run_quota -> 'member_host' AS member
192
FROM projects
193
WHERE state ->> 'state' in ('running', 'starting')
194
GROUP BY member`;
195
196
async function calc_running_projects(db): Promise<RunningProjects> {
197
const data = { free: 0, member: 0 };
198
const res = await cb2(db._query, { query: running_projects_query });
199
for (const row of res.rows) {
200
if (row.member) {
201
data.member = parseInt(row.count);
202
} else {
203
data.free = parseInt(row.count);
204
}
205
}
206
return data;
207
}
208
209
async function _calc_stats({ db, dbg, start_t }): Promise<Stats> {
210
const stats: Stats = {
211
id: misc.uuid(),
212
time: new Date(),
213
accounts: 0,
214
projects: 0,
215
projects_created: {},
216
projects_edited: {},
217
accounts_created: {},
218
accounts_active: {},
219
files_opened: { distinct: {}, total: {} },
220
hub_servers: [],
221
running_projects: { free: 0, member: 0 },
222
};
223
const R = RECENT_TIMES;
224
const K = RECENT_TIMES_KEY;
225
226
stats.accounts = await _count_timespan(db, {
227
table: "accounts",
228
});
229
230
stats.projects = await _count_timespan(db, {
231
table: "projects",
232
});
233
234
stats.projects_edited[K.active] = await _count_timespan(db, {
235
table: "projects",
236
field: "last_edited",
237
age_m: R.active,
238
});
239
240
stats.accounts_active[K.active] = await _count_timespan(db, {
241
table: "accounts",
242
field: "last_active",
243
age_m: R.active,
244
});
245
246
await new Promise<void>((done, reject) => {
247
db._query({
248
query: "SELECT expire, host, clients FROM hub_servers",
249
cb: all_results((err, hub_servers) => {
250
if (err) {
251
reject(err);
252
} else {
253
const now = new Date();
254
stats.hub_servers = [];
255
for (let x of hub_servers) {
256
if (x.expire > now) {
257
delete x.expire;
258
stats.hub_servers.push(x);
259
}
260
}
261
done();
262
}
263
}),
264
});
265
});
266
267
// this was running in parallel, but there is no hurry updating the stats...
268
for (const tkey of ["last_month", "last_week", "last_day", "last_hour"]) {
269
await _count_opened_files(db, {
270
age_m: R[tkey],
271
key: K[tkey],
272
data: stats.files_opened.distinct,
273
distinct: true,
274
});
275
await _count_opened_files(db, {
276
age_m: R[tkey],
277
key: K[tkey],
278
data: stats.files_opened.total,
279
distinct: false,
280
});
281
stats.projects_edited[K[tkey]] = await _count_timespan(db, {
282
table: "projects",
283
field: "last_edited",
284
age_m: R[tkey],
285
});
286
stats.projects_created[K[tkey]] = await _count_timespan(db, {
287
table: "projects",
288
field: "created",
289
age_m: R[tkey],
290
});
291
stats.accounts_active[K[tkey]] = await _count_timespan(db, {
292
table: "accounts",
293
field: "last_active",
294
age_m: R[tkey],
295
});
296
stats.accounts_created[K[tkey]] = await _count_timespan(db, {
297
table: "accounts",
298
field: "created",
299
age_m: R[tkey],
300
});
301
}
302
303
stats.running_projects = await calc_running_projects(db);
304
305
const elapsed_t = process.hrtime(start_t);
306
const duration_s = (elapsed_t[0] + elapsed_t[1] / 1e9).toFixed(4);
307
dbg(
308
`everything succeeded above after ${duration_s} secs -- now insert stats`,
309
);
310
// storing in local and db cache
311
_stats_cached = misc.deep_copy(stats);
312
await cb2(db._query, {
313
query: "INSERT INTO stats",
314
values: stats,
315
});
316
317
return stats;
318
}
319
320
export async function calc_stats(db: PostgreSQL, opts: Opts) {
321
const { ttl_dt, ttl, ttl_db, update, cb } = opts;
322
323
const start_t = process.hrtime();
324
const dbg = db._dbg("get_stats");
325
326
let stats: Stats | null = null;
327
stats = check_local_cache({ update, ttl_dt, ttl, ttl_db, dbg });
328
if (stats == null) {
329
dbg("checking db cache?");
330
stats = await check_db_cache({ db, update, ttl, ttl_dt, dbg });
331
}
332
333
if (stats != null) {
334
dbg(`stats != null → nothing to do`);
335
} else if (!update) {
336
dbg("warning: no recent stats but not allowed to update");
337
} else {
338
dbg("we're actually recomputing the stats");
339
try {
340
stats = await _calc_stats({ db, dbg, start_t });
341
} catch (err) {
342
dbg(`error calculating stats: err=${err}`);
343
cb?.(err, null);
344
return;
345
}
346
}
347
348
dbg(`stats=${misc.to_json(stats)})`);
349
// uncomment to fully debug the resulting stats object
350
//console.debug(JSON.stringify(stats, null, 2));
351
//process.exit();
352
cb?.(undefined, stats);
353
return stats;
354
}
355
356
// for testing only
357
if (process.env["NODE_DEV"] === "TEST") {
358
exports._count_opened_files_query = _count_opened_files_query;
359
}
360
361