Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/backend/process-stats.ts
5767 views
1
/*
2
* This file is part of CoCalc: Copyright © 2020–2026 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import { exec as cp_exec } from "node:child_process";
7
import { readFile, readdir, readlink } from "node:fs/promises";
8
import { join } from "node:path";
9
import { promisify } from "node:util";
10
11
import { mapParallelLimit } from "@cocalc/util/async-utils";
12
import { reuseInFlight } from "@cocalc/util/reuse-in-flight";
13
import {
14
Cpu,
15
Process,
16
Processes,
17
Stat,
18
State,
19
} from "@cocalc/util/types/project-info/types";
20
import { getLogger } from "./logger";
21
import { envToInt } from "./misc/env-to-number";
22
23
const dbg = getLogger("process-stats").debug;
24
25
const exec = promisify(cp_exec);
26
27
/**
28
* Return information about all processes (up to a limit or filter) in the environment, where this node.js process runs.
29
* This has been refactored out of project/project-info/server.ts.
30
* It is also used by the backend itself in "execute-code.ts" – to gather info about a spawned async process.
31
*/
32
33
// this is a hard limit on the number of processes we gather, just to
34
// be on the safe side to avoid processing too much data.
35
const LIMIT = envToInt("COCALC_PROJECT_INFO_PROC_LIMIT", 1024);
36
37
export class ProcessStats {
38
private static instance: ProcessStats;
39
40
private readonly procLimit: number;
41
42
private testing: boolean;
43
private ticks: number;
44
private pagesize: number;
45
private lastByKey = new Map<
46
string,
47
{ timestamp: number; processes: Processes }
48
>();
49
50
private constructor() {
51
this.procLimit = LIMIT;
52
this.init();
53
}
54
55
public static getInstance(): ProcessStats {
56
if (!ProcessStats.instance) {
57
ProcessStats.instance = new ProcessStats();
58
}
59
return ProcessStats.instance;
60
}
61
62
public setTesting(testing: boolean): void {
63
this.testing = testing;
64
}
65
66
// this grabs some kernel configuration values we need. they won't change
67
public init = reuseInFlight(async () => {
68
if (this.ticks == null) {
69
const [p_ticks, p_pagesize] = await Promise.all([
70
exec("getconf CLK_TCK"),
71
exec("getconf PAGESIZE"),
72
]);
73
// should be 100, usually
74
this.ticks = parseInt(p_ticks.stdout.trim());
75
// 4096?
76
this.pagesize = parseInt(p_pagesize.stdout.trim());
77
}
78
});
79
80
// the "stat" file contains all the information
81
// this page explains what is what
82
// https://man7.org/linux/man-pages/man5/proc.5.html
83
private async stat(path: string): Promise<Stat> {
84
// all time-values are in seconds
85
const raw = await readFile(path, "utf8");
86
// the "comm" field could contain additional spaces or parents
87
const [i, j] = [raw.indexOf("("), raw.lastIndexOf(")")];
88
const start = raw.slice(0, i - 1).trim();
89
const end = raw.slice(j + 1).trim();
90
const data = `${start} comm ${end}`.split(" ");
91
const get = (idx) => parseInt(data[idx]);
92
// "comm" is now a placeholder to keep indices as they are.
93
// don't forget to account for 0 vs. 1 based indexing.
94
const ret = {
95
ppid: get(3),
96
state: data[2] as State,
97
utime: get(13) / this.ticks, // CPU time spent in user code, measured in clock ticks (#14)
98
stime: get(14) / this.ticks, // CPU time spent in kernel code, measured in clock ticks (#15)
99
cutime: get(15) / this.ticks, // Waited-for children's CPU time spent in user code (in clock ticks) (#16)
100
cstime: get(16) / this.ticks, // Waited-for children's CPU time spent in kernel code (in clock ticks) (#17)
101
starttime: get(21) / this.ticks, // Time when the process started, measured in clock ticks (#22)
102
nice: get(18),
103
num_threads: get(19),
104
mem: { rss: (get(23) * this.pagesize) / (1024 * 1024) }, // MiB
105
};
106
return ret;
107
}
108
109
// delta-time for this and the previous process information
110
private dt(timestamp: number, lastTimestamp?: number) {
111
return (timestamp - (lastTimestamp ?? 0)) / 1000;
112
}
113
114
// calculate cpu times
115
private cpu({
116
pid,
117
stat,
118
timestamp,
119
lastProcesses,
120
lastTimestamp,
121
}: {
122
pid: number;
123
stat: Stat;
124
timestamp: number;
125
lastProcesses?: Processes;
126
lastTimestamp?: number;
127
}): Cpu {
128
// we are interested in that processes total usage: user + system
129
const total_cpu = stat.utime + stat.stime;
130
// the fallback is chosen in such a way, that it says 0% if we do not have historic data
131
const prev_cpu = lastProcesses?.[pid]?.cpu.secs ?? total_cpu;
132
const dt = this.dt(timestamp, lastTimestamp);
133
// how much cpu time was used since last time we checked this process…
134
const pct = dt > 0 ? 100 * ((total_cpu - prev_cpu) / dt) : 0;
135
return { pct: pct, secs: total_cpu };
136
}
137
138
private async cmdline(path: string): Promise<string[]> {
139
// we split at the null-delimiter and filter all empty elements
140
return (await readFile(path, "utf8"))
141
.split("\0")
142
.filter((c) => c.length > 0);
143
}
144
145
// this gathers all the information for a specific process with the given pid
146
private async process({
147
pid: pid_str,
148
uptime,
149
timestamp,
150
lastProcesses,
151
lastTimestamp,
152
}: {
153
pid: string;
154
uptime: number;
155
timestamp: number;
156
lastProcesses?: Processes;
157
lastTimestamp?: number;
158
}): Promise<Process> {
159
const base = join("/proc", pid_str);
160
const pid = parseInt(pid_str);
161
const fn = (name) => join(base, name);
162
const [cmdline, exe, stat] = await Promise.all([
163
this.cmdline(fn("cmdline")),
164
readlink(fn("exe")),
165
this.stat(fn("stat")),
166
]);
167
return {
168
pid,
169
ppid: stat.ppid,
170
cmdline,
171
exe,
172
stat,
173
cpu: this.cpu({ pid, timestamp, stat, lastProcesses, lastTimestamp }),
174
uptime: uptime - stat.starttime,
175
};
176
}
177
178
// this is how long the underlying machine is running
179
// we need this information, because the processes' start time is
180
// measured in "ticks" since the machine started
181
private async uptime(): Promise<[number, Date]> {
182
// return uptime in secs
183
const out = await readFile("/proc/uptime", "utf8");
184
const uptime = parseFloat(out.split(" ")[0]);
185
const boottime = new Date(new Date().getTime() - 1000 * uptime);
186
return [uptime, boottime];
187
}
188
189
// this is where we gather information about all running processes
190
public async processes(
191
timestamp?: number,
192
sampleKey = "default",
193
): Promise<{ procs: Processes; uptime: number; boottime: Date }> {
194
timestamp ??= new Date().getTime();
195
const [uptime, boottime] = await this.uptime();
196
const last = this.lastByKey.get(sampleKey);
197
198
const procs: Processes = {};
199
let pids = (await readdir("/proc")).filter((pid) => pid.match(/^[0-9]+$/));
200
201
if (pids.length > this.procLimit) {
202
dbg(`too many processes – limit of ${this.procLimit} reached!`);
203
// we avoid processing and sending too much data
204
pids = pids.slice(0, this.procLimit);
205
}
206
207
await mapParallelLimit(
208
pids,
209
async (pid) => {
210
try {
211
const proc = await this.process({
212
pid,
213
uptime,
214
timestamp,
215
lastProcesses: last?.processes,
216
lastTimestamp: last?.timestamp,
217
});
218
procs[proc.pid] = proc;
219
} catch (err) {
220
if (this.testing)
221
dbg(`process ${pid} likely vanished – could happen – ${err}`);
222
}
223
},
224
20,
225
);
226
227
this.lastByKey.set(sampleKey, { timestamp, processes: procs });
228
return { procs, uptime, boottime };
229
}
230
}
231
232
export interface ProcessTreeStats {
233
rss: number;
234
cpu_secs: number;
235
cpu_pct: number;
236
}
237
238
/**
239
* Recursively sum process statistics for a process and all its children.
240
* This function aggregates CPU time, memory usage, and CPU percentage
241
* for a process tree starting from the given PID.
242
*/
243
export function sumChildren(
244
procs: Processes,
245
children: { [pid: number]: number[] },
246
pid: number,
247
): ProcessTreeStats | null {
248
const proc = procs[`${pid}`];
249
if (proc == null) {
250
return null;
251
}
252
253
let rss = proc.stat.mem.rss;
254
let cpu_secs = proc.cpu.secs;
255
let cpu_pct = proc.cpu.pct;
256
257
for (const ch of children[pid] ?? []) {
258
const sc = sumChildren(procs, children, ch);
259
if (sc == null) return null;
260
rss += sc.rss;
261
cpu_secs += sc.cpu_secs;
262
cpu_pct += sc.cpu_pct;
263
}
264
265
return { rss, cpu_secs, cpu_pct };
266
}
267
268