CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/database/postgres/retention/retained-users.ts
Views: 687
1
import getLogger from "@cocalc/backend/logger";
2
const log = getLogger("database:retention:retained-users");
3
4
export default async function retainedUsers({
5
model,
6
table,
7
last_start_time,
8
pool,
9
start,
10
stop,
11
period,
12
}) {
13
const query = `WITH
14
cohort AS (SELECT account_id FROM accounts WHERE created >= $1::timestamp AND created < $2::timestamp),
15
periods0 AS (
16
SELECT $1::timestamp + (n * $3::interval) AS period_start,
17
$1::timestamp + ((n + 1) * $3::interval) AS period_end
18
FROM generate_series(0, floor(EXTRACT(EPOCH FROM (now() - $1::timestamp - '1 second'::interval)) / EXTRACT(EPOCH FROM $3::interval))::integer) AS n
19
),
20
periods AS (SELECT * FROM periods0 ${
21
last_start_time == null ? "" : "WHERE period_start > $4"
22
}),
23
period_counts AS (
24
SELECT periods.period_start, COUNT(DISTINCT ${table}.account_id) AS count
25
FROM periods
26
LEFT JOIN ${table} ON ${table}.time >= periods.period_start AND ${table}.time < periods.period_end
27
JOIN cohort ON ${table}.account_id = cohort.account_id
28
GROUP BY periods.period_start
29
)
30
SELECT periods.period_start, periods.period_end, COALESCE(period_counts.count, 0) AS count
31
FROM periods
32
LEFT JOIN period_counts ON periods.period_start = period_counts.period_start
33
WHERE periods.period_end <= NOW()
34
ORDER BY periods.period_start`;
35
if (last_start_time == null) {
36
log.debug("just compute all the data");
37
const { rows } = await pool.query(query, [start, stop, period]);
38
if (rows.length == 0) {
39
// shouldn't happen because should get excluded above...
40
return;
41
}
42
const active = rows.map((x) => parseInt(x.count));
43
const last_start_time = rows[rows.length - 1].period_start;
44
const size = (
45
await pool.query(
46
"SELECT count(*) as size FROM accounts WHERE created >= $1::timestamp AND created < $2::timestamp",
47
[start, stop]
48
)
49
).rows[0].size;
50
await pool.query(
51
"INSERT INTO crm_retention(start,stop,model,period,size,active,last_start_time) VALUES($1,$2,$3,$4,$5,$6,$7)",
52
[start, stop, model, period, size, active, last_start_time]
53
);
54
} else {
55
log.debug("compute the missing data and put it into the database");
56
const { rows } = await pool.query(query, [
57
start,
58
stop,
59
period,
60
last_start_time,
61
]);
62
if (rows.length == 0) {
63
// shouldn't happen because should get excluded above...
64
return;
65
}
66
const active = rows.map((x) => parseInt(x.count));
67
const new_last_start_time = rows[rows.length - 1].period_start;
68
await pool.query(
69
"UPDATE crm_retention SET last_start_time=$5::timestamp, active = array_cat(active, $6::integer[]) WHERE start=$1 AND stop=$2 AND model=$3 AND period=$4",
70
[start, stop, model, period, new_last_start_time, active]
71
);
72
}
73
}
74
75
/*
76
NOTES on this query:
77
78
This is particularly complicated and we use the period_counts CTE because we want to include 0's even when
79
there are no matches on a given day, so we can just take the counts exactly and put them in the database.
80
The actual query below that we really use is even more complicated because it also has to deal with
81
both doing the original query and updating it as time progresses.
82
83
WITH
84
cohort AS (SELECT account_id FROM accounts WHERE created >= '2023-04-03'::timestamp AND created < '2023-04-03'::timestamp + interval '1 day'),
85
periods AS (
86
SELECT '2023-04-03'::timestamp + (n * '1 day'::interval) AS period_start,
87
'2023-04-03'::timestamp + ((n + 1) * '1 day'::interval) AS period_end
88
FROM generate_series(0, floor(EXTRACT(EPOCH FROM (now() - '2023-04-03'::timestamp - '1 second'::interval)) / EXTRACT(EPOCH FROM '1 day'::interval))::integer) AS n
89
),
90
period_counts AS (
91
SELECT periods.period_start, COUNT(DISTINCT file_access_log.account_id) AS count
92
FROM periods
93
LEFT JOIN file_access_log ON file_access_log.time >= periods.period_start AND file_access_log.time < periods.period_end
94
JOIN cohort ON file_access_log.account_id = cohort.account_id
95
GROUP BY periods.period_start
96
)
97
SELECT periods.period_start, periods.period_end, COALESCE(period_counts.count, 0) AS count
98
FROM periods
99
LEFT JOIN period_counts ON periods.period_start = period_counts.period_start
100
WHERE periods.period_end <= NOW()
101
ORDER BY periods.period_start;
102
103
*/
104
105