Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/database/postgres/retention/retained-users.ts
Views: 687
import getLogger from "@cocalc/backend/logger";1const log = getLogger("database:retention:retained-users");23export default async function retainedUsers({4model,5table,6last_start_time,7pool,8start,9stop,10period,11}) {12const query = `WITH13cohort AS (SELECT account_id FROM accounts WHERE created >= $1::timestamp AND created < $2::timestamp),14periods0 AS (15SELECT $1::timestamp + (n * $3::interval) AS period_start,16$1::timestamp + ((n + 1) * $3::interval) AS period_end17FROM generate_series(0, floor(EXTRACT(EPOCH FROM (now() - $1::timestamp - '1 second'::interval)) / EXTRACT(EPOCH FROM $3::interval))::integer) AS n18),19periods AS (SELECT * FROM periods0 ${20last_start_time == null ? "" : "WHERE period_start > $4"21}),22period_counts AS (23SELECT periods.period_start, COUNT(DISTINCT ${table}.account_id) AS count24FROM periods25LEFT JOIN ${table} ON ${table}.time >= periods.period_start AND ${table}.time < periods.period_end26JOIN cohort ON ${table}.account_id = cohort.account_id27GROUP BY periods.period_start28)29SELECT periods.period_start, periods.period_end, COALESCE(period_counts.count, 0) AS count30FROM periods31LEFT JOIN period_counts ON periods.period_start = period_counts.period_start32WHERE periods.period_end <= NOW()33ORDER BY periods.period_start`;34if (last_start_time == null) {35log.debug("just compute all the data");36const { rows } = await pool.query(query, [start, stop, period]);37if (rows.length == 0) {38// shouldn't happen because should get excluded above...39return;40}41const active = rows.map((x) => parseInt(x.count));42const last_start_time = rows[rows.length - 1].period_start;43const size = (44await pool.query(45"SELECT count(*) as size FROM accounts WHERE created >= $1::timestamp AND created < $2::timestamp",46[start, stop]47)48).rows[0].size;49await pool.query(50"INSERT INTO crm_retention(start,stop,model,period,size,active,last_start_time) VALUES($1,$2,$3,$4,$5,$6,$7)",51[start, stop, model, period, size, active, last_start_time]52);53} else {54log.debug("compute the missing data and put it into the database");55const { rows } = await pool.query(query, [56start,57stop,58period,59last_start_time,60]);61if (rows.length == 0) {62// shouldn't happen because should get excluded above...63return;64}65const active = rows.map((x) => parseInt(x.count));66const new_last_start_time = rows[rows.length - 1].period_start;67await pool.query(68"UPDATE crm_retention SET last_start_time=$5::timestamp, active = array_cat(active, $6::integer[]) WHERE start=$1 AND stop=$2 AND model=$3 AND period=$4",69[start, stop, model, period, new_last_start_time, active]70);71}72}7374/*75NOTES on this query:7677This is particularly complicated and we use the period_counts CTE because we want to include 0's even when78there are no matches on a given day, so we can just take the counts exactly and put them in the database.79The actual query below that we really use is even more complicated because it also has to deal with80both doing the original query and updating it as time progresses.8182WITH83cohort AS (SELECT account_id FROM accounts WHERE created >= '2023-04-03'::timestamp AND created < '2023-04-03'::timestamp + interval '1 day'),84periods AS (85SELECT '2023-04-03'::timestamp + (n * '1 day'::interval) AS period_start,86'2023-04-03'::timestamp + ((n + 1) * '1 day'::interval) AS period_end87FROM generate_series(0, floor(EXTRACT(EPOCH FROM (now() - '2023-04-03'::timestamp - '1 second'::interval)) / EXTRACT(EPOCH FROM '1 day'::interval))::integer) AS n88),89period_counts AS (90SELECT periods.period_start, COUNT(DISTINCT file_access_log.account_id) AS count91FROM periods92LEFT JOIN file_access_log ON file_access_log.time >= periods.period_start AND file_access_log.time < periods.period_end93JOIN cohort ON file_access_log.account_id = cohort.account_id94GROUP BY periods.period_start95)96SELECT periods.period_start, periods.period_end, COALESCE(period_counts.count, 0) AS count97FROM periods98LEFT JOIN period_counts ON periods.period_start = period_counts.period_start99WHERE periods.period_end <= NOW()100ORDER BY periods.period_start;101102*/103104105