Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/backend/process-stats.ts
Views: 687
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45import { exec as cp_exec } from "node:child_process";6import { readFile, readdir, readlink } from "node:fs/promises";7import { join } from "node:path";8import { promisify } from "node:util";910import { reuseInFlight } from "@cocalc/util/reuse-in-flight";11import {12Cpu,13Process,14Processes,15Stat,16State,17} from "@cocalc/util/types/project-info/types";18import { getLogger } from "./logger";19import { envToInt } from "./misc/env-to-number";2021const exec = promisify(cp_exec);2223/**24* Return information about all processes (up to a limit or filter) in the environment, where this node.js process runs.25* This has been refactored out of project/project-info/server.ts.26* It is also used by the backend itself in "execute-code.ts" – to gather info about a spawned async process.27*/2829// this is a hard limit on the number of processes we gather, just to30// be on the safe side to avoid processing too much data.31const LIMIT = envToInt("COCALC_PROJECT_INFO_PROC_LIMIT", 256);3233interface ProcessStatsOpts {34procLimit?: number;35testing?: boolean;36dbg?: Function;37}3839export class ProcessStats {40private readonly testing: boolean;41private readonly procLimit: number;42private readonly dbg: Function;43private ticks: number;44private pagesize: number;45private last?: { timestamp: number; processes: Processes };4647constructor(opts?: ProcessStatsOpts) {48this.procLimit = opts?.procLimit ?? LIMIT;49this.dbg = opts?.dbg ?? getLogger("process-stats").debug;50this.init();51}5253// this grabs some kernel configuration values we need. they won't change54public init = reuseInFlight(async () => {55if (this.ticks == null) {56const [p_ticks, p_pagesize] = await Promise.all([57exec("getconf CLK_TCK"),58exec("getconf PAGESIZE"),59]);60// should be 100, usually61this.ticks = parseInt(p_ticks.stdout.trim());62// 4096?63this.pagesize = parseInt(p_pagesize.stdout.trim());64}65});6667// the "stat" file contains all the information68// this page explains what is what69// https://man7.org/linux/man-pages/man5/proc.5.html70private async stat(path: string): Promise<Stat> {71// all time-values are in seconds72const raw = await readFile(path, "utf8");73// the "comm" field could contain additional spaces or parents74const [i, j] = [raw.indexOf("("), raw.lastIndexOf(")")];75const start = raw.slice(0, i - 1).trim();76const end = raw.slice(j + 1).trim();77const data = `${start} comm ${end}`.split(" ");78const get = (idx) => parseInt(data[idx]);79// "comm" is now a placeholder to keep indices as they are.80// don't forget to account for 0 vs. 1 based indexing.81const ret = {82ppid: get(3),83state: data[2] as State,84utime: get(13) / this.ticks, // CPU time spent in user code, measured in clock ticks (#14)85stime: get(14) / this.ticks, // CPU time spent in kernel code, measured in clock ticks (#15)86cutime: get(15) / this.ticks, // Waited-for children's CPU time spent in user code (in clock ticks) (#16)87cstime: get(16) / this.ticks, // Waited-for children's CPU time spent in kernel code (in clock ticks) (#17)88starttime: get(21) / this.ticks, // Time when the process started, measured in clock ticks (#22)89nice: get(18),90num_threads: get(19),91mem: { rss: (get(23) * this.pagesize) / (1024 * 1024) }, // MiB92};93return ret;94}9596// delta-time for this and the previous process information97private dt(timestamp) {98return (timestamp - (this.last?.timestamp ?? 0)) / 1000;99}100101// calculate cpu times102private cpu({ pid, stat, timestamp }): Cpu {103// we are interested in that processes total usage: user + system104const total_cpu = stat.utime + stat.stime;105// the fallback is chosen in such a way, that it says 0% if we do not have historic data106const prev_cpu = this.last?.processes?.[pid]?.cpu.secs ?? total_cpu;107const dt = this.dt(timestamp);108// how much cpu time was used since last time we checked this process…109const pct = 100 * ((total_cpu - prev_cpu) / dt);110return { pct: pct, secs: total_cpu };111}112113private async cmdline(path: string): Promise<string[]> {114// we split at the null-delimiter and filter all empty elements115return (await readFile(path, "utf8"))116.split("\0")117.filter((c) => c.length > 0);118}119120// this gathers all the information for a specific process with the given pid121private async process({ pid: pid_str, uptime, timestamp }): Promise<Process> {122const base = join("/proc", pid_str);123const pid = parseInt(pid_str);124const fn = (name) => join(base, name);125const [cmdline, exe, stat] = await Promise.all([126this.cmdline(fn("cmdline")),127readlink(fn("exe")),128this.stat(fn("stat")),129]);130return {131pid,132ppid: stat.ppid,133cmdline,134exe,135stat,136cpu: this.cpu({ pid, timestamp, stat }),137uptime: uptime - stat.starttime,138};139}140141// this is how long the underlying machine is running142// we need this information, because the processes' start time is143// measured in "ticks" since the machine started144private async uptime(): Promise<[number, Date]> {145// return uptime in secs146const out = await readFile("/proc/uptime", "utf8");147const uptime = parseFloat(out.split(" ")[0]);148const boottime = new Date(new Date().getTime() - 1000 * uptime);149return [uptime, boottime];150}151152// this is where we gather information about all running processes153public async processes(154timestamp?: number,155): Promise<{ procs: Processes; uptime: number; boottime: Date }> {156timestamp ??= new Date().getTime();157const [uptime, boottime] = await this.uptime();158159const procs: Processes = {};160let n = 0;161for (const pid of await readdir("/proc")) {162if (!pid.match(/^[0-9]+$/)) continue;163try {164const proc = await this.process({ pid, uptime, timestamp });165procs[proc.pid] = proc;166} catch (err) {167if (this.testing)168this.dbg(`process ${pid} likely vanished – could happen – ${err}`);169}170// we avoid processing and sending too much data171if (n > this.procLimit) {172this.dbg(`too many processes – limit of ${this.procLimit} reached!`);173break;174} else {175n += 1;176}177}178this.last = { timestamp, processes: procs };179return { procs, uptime, boottime };180}181}182183184