Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/database/postgres/stats.ts
Views: 687
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45import { callback2 as cb2 } from "@cocalc/util/async-utils";6import { RECENT_TIMES, RECENT_TIMES_KEY } from "@cocalc/util/schema";7import * as misc from "@cocalc/util/misc";8const { defaults } = misc;9const required = defaults.required;10import { map, zipObject } from "lodash";1112import { PostgreSQL } from "./types";13import { EXTENSIONS } from "@cocalc/util/db-schema/stats";14const { all_results } = require("../postgres-base");1516// some stats queries have to crunch a lot of rows, which could take a bit17// we give them a couple of minutes each…18const QUERY_TIMEOUT_S = 300;1920interface Opts {21ttl_dt: number; // 15 secs subtracted from ttl to compensate for computation duration when called via a cronjob22ttl: number; // how long cached version lives (in seconds)23ttl_db: number; // how long a valid result from a db query is cached in any case24update: boolean; // if true: recalculate if older than ttl; false: don't recalculate and pick it from the DB (locally cached for ttl secs)25cb: (err, stats) => void;26}2728type Data = { [key: string]: number };2930interface RunningProjects {31free: number;32member: number;33}3435// TODO type this to fit with fields defined in db-schema/stats.ts36interface Stats {37id: string;38time: Date;39accounts: number;40projects: number;41projects_created: Data;42projects_edited: Data;43accounts_created: Data;44accounts_active: Data;45running_projects: RunningProjects;46hub_servers: any;47files_opened: {48distinct: Data;49total: Data;50};51}5253let _stats_cached: any = null;54let _stats_cached_db_query: Date | null = null;5556async function _count_timespan(db: PostgreSQL, opts): Promise<any> {57opts = defaults(opts, {58table: required,59field: undefined,60age_m: undefined,61upper_m: undefined, // defaults to zero minutes (i.e. "now")62});63const { table, field, age_m, upper_m } = opts;64const where = {};65if (field != null) {66if (age_m != null) {67where[`${field} >= $::TIMESTAMP`] = misc.minutes_ago(age_m);68}69if (upper_m != null) {70where[`${field} <= $::TIMESTAMP`] = misc.minutes_ago(upper_m);71}72}73const result = await cb2(db._query, {74query: `SELECT COUNT(*) FROM ${table}`,75where,76timeout_s: QUERY_TIMEOUT_S,77});78// count_result79return parseInt(result?.rows?.[0]?.count);80}8182function _count_opened_files_query(distinct: boolean): string {83const extensions = EXTENSIONS.map((x) => `'${x}'`).join(", ");84return `\85WITH filenames AS (86SELECT ${distinct ? "DISTINCT" : ""} event ->> 'filename' AS fn87FROM project_log88WHERE time BETWEEN $1::TIMESTAMP AND NOW()89AND event @> '{"action" : "open"}'::jsonb90), ext_count AS (91SELECT COUNT(*) as cnt, lower(reverse(split_part(reverse(fn), '.', 1))) AS ext92FROM filenames93GROUP BY ext94)95SELECT ext, cnt96FROM ext_count97WHERE ext IN (${extensions}) ORDER BY ext98`;99}100101async function _count_opened_files(db: PostgreSQL, opts): Promise<void> {102opts = defaults(opts, {103age_m: undefined,104key: required,105data: required,106distinct: required, // true or false107});108const { age_m, key, data, distinct } = opts;109const q = _count_opened_files_query(distinct);110111const res = await cb2(db._query, {112query: q,113params: [misc.minutes_ago(age_m)],114timeout_s: QUERY_TIMEOUT_S,115});116117// misc.copy? see "all_results"118const rows = res.rows.map((x) => misc.copy(x));119const values = zipObject(map(rows, "ext"), map(rows, "cnt"));120data[key] = values;121}122123function check_local_cache({ update, ttl_dt, ttl, ttl_db, dbg }): Stats | null {124if (_stats_cached == null) return null;125126// decide if cache should be used -- tighten interval if we are allowed to update127const offset_dt = update ? ttl_dt : 0;128const is_cache_recent =129_stats_cached.time > misc.seconds_ago(ttl - offset_dt);130// in case we aren't allowed to update and the cache is outdated, do not query db too often131const did_query_recently =132_stats_cached_db_query != null &&133_stats_cached_db_query > misc.seconds_ago(ttl_db);134if (is_cache_recent || did_query_recently) {135dbg(136`using locally cached stats from ${137(new Date().getTime() - _stats_cached.time) / 1000138} secs ago.`,139);140return _stats_cached;141}142return null;143}144145async function check_db_cache({146db,147update,148ttl,149ttl_dt,150dbg,151}): Promise<Stats | null> {152try {153const res = await cb2(db._query, {154query: "SELECT * FROM stats ORDER BY time DESC LIMIT 1",155});156if (res?.rows?.length != 1) {157dbg("no data (1)");158return null;159}160161const x = misc.map_without_undefined_and_null(res.rows[0]) as any;162if (x == null) {163dbg("no data (2)");164return null;165}166167dbg(`check_db_cache x = ${misc.to_json(x)}`);168169_stats_cached_db_query = new Date();170if (update && x.time < misc.seconds_ago(ttl - ttl_dt)) {171dbg("cache outdated -- will update stats");172return null;173} else {174dbg(175`using db stats from ${176(new Date().getTime() - x.time) / 1000177} secs ago.`,178);179// storing still valid result in local cache180_stats_cached = misc.deep_copy(x);181return _stats_cached;182}183} catch (err) {184dbg("problem with query -- no stats in db?");185throw err;186}187}188189const running_projects_query = `\190SELECT count(*), run_quota -> 'member_host' AS member191FROM projects192WHERE state ->> 'state' in ('running', 'starting')193GROUP BY member`;194195async function calc_running_projects(db): Promise<RunningProjects> {196const data = { free: 0, member: 0 };197const res = await cb2(db._query, { query: running_projects_query });198for (const row of res.rows) {199if (row.member) {200data.member = parseInt(row.count);201} else {202data.free = parseInt(row.count);203}204}205return data;206}207208async function _calc_stats({ db, dbg, start_t }): Promise<Stats> {209const stats: Stats = {210id: misc.uuid(),211time: new Date(),212accounts: 0,213projects: 0,214projects_created: {},215projects_edited: {},216accounts_created: {},217accounts_active: {},218files_opened: { distinct: {}, total: {} },219hub_servers: [],220running_projects: { free: 0, member: 0 },221};222const R = RECENT_TIMES;223const K = RECENT_TIMES_KEY;224225stats.accounts = await _count_timespan(db, {226table: "accounts",227});228229stats.projects = await _count_timespan(db, {230table: "projects",231});232233stats.projects_edited[K.active] = await _count_timespan(db, {234table: "projects",235field: "last_edited",236age_m: R.active,237});238239stats.accounts_active[K.active] = await _count_timespan(db, {240table: "accounts",241field: "last_active",242age_m: R.active,243});244245await new Promise<void>((done, reject) => {246db._query({247query: "SELECT expire, host, clients FROM hub_servers",248cb: all_results((err, hub_servers) => {249if (err) {250reject(err);251} else {252const now = new Date();253stats.hub_servers = [];254for (let x of hub_servers) {255if (x.expire > now) {256delete x.expire;257stats.hub_servers.push(x);258}259}260done();261}262}),263});264});265266// this was running in parallel, but there is no hurry updating the stats...267for (const tkey of ["last_month", "last_week", "last_day", "last_hour"]) {268await _count_opened_files(db, {269age_m: R[tkey],270key: K[tkey],271data: stats.files_opened.distinct,272distinct: true,273});274await _count_opened_files(db, {275age_m: R[tkey],276key: K[tkey],277data: stats.files_opened.total,278distinct: false,279});280stats.projects_edited[K[tkey]] = await _count_timespan(db, {281table: "projects",282field: "last_edited",283age_m: R[tkey],284});285stats.projects_created[K[tkey]] = await _count_timespan(db, {286table: "projects",287field: "created",288age_m: R[tkey],289});290stats.accounts_active[K[tkey]] = await _count_timespan(db, {291table: "accounts",292field: "last_active",293age_m: R[tkey],294});295stats.accounts_created[K[tkey]] = await _count_timespan(db, {296table: "accounts",297field: "created",298age_m: R[tkey],299});300}301302stats.running_projects = await calc_running_projects(db);303304const elapsed_t = process.hrtime(start_t);305const duration_s = (elapsed_t[0] + elapsed_t[1] / 1e9).toFixed(4);306dbg(307`everything succeeded above after ${duration_s} secs -- now insert stats`,308);309// storing in local and db cache310_stats_cached = misc.deep_copy(stats);311await cb2(db._query, {312query: "INSERT INTO stats",313values: stats,314});315316return stats;317}318319export async function calc_stats(db: PostgreSQL, opts: Opts) {320const { ttl_dt, ttl, ttl_db, update, cb } = opts;321322const start_t = process.hrtime();323const dbg = db._dbg("get_stats");324325let stats: Stats | null = null;326stats = check_local_cache({ update, ttl_dt, ttl, ttl_db, dbg });327if (stats == null) {328dbg("checking db cache?");329stats = await check_db_cache({ db, update, ttl, ttl_dt, dbg });330}331332if (stats != null) {333dbg(`stats != null → nothing to do`);334} else if (!update) {335dbg("warning: no recent stats but not allowed to update");336} else {337dbg("we're actually recomputing the stats");338try {339stats = await _calc_stats({ db, dbg, start_t });340} catch (err) {341dbg(`error calculating stats: err=${err}`);342cb?.(err, null);343return;344}345}346347dbg(`stats=${misc.to_json(stats)})`);348// uncomment to fully debug the resulting stats object349//console.debug(JSON.stringify(stats, null, 2));350//process.exit();351cb?.(undefined, stats);352return stats;353}354355// for testing only356if (process.env["NODE_DEV"] === "TEST") {357exports._count_opened_files_query = _count_opened_files_query;358}359360361