Path: blob/master/src/packages/jupyter/nbgrader/jupyter-run.ts
6141 views
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45import {6type JupyterNotebook,7type RunNotebookOptions,8type Limits,9DEFAULT_LIMITS,10} from "@cocalc/util/jupyter/nbgrader-types";11import type { JupyterKernelInterface as JupyterKernel } from "@cocalc/jupyter/types/project-interface";12import { is_object, len, uuid, trunc_middle } from "@cocalc/util/misc";13import { retry_until_success } from "@cocalc/util/async-utils";14import { kernel } from "@cocalc/jupyter/kernel";15import { mkdir, unlink, writeFile } from "node:fs/promises";16import { dirname, join } from "node:path";17import getLogger from "@cocalc/backend/logger";18export type { Limits };1920const logger = getLogger("jupyter:nbgrader:jupyter-run");2122function global_timeout_exceeded(limits: Limits): boolean {23if (limits.timeout_ms == null || limits.start_time == null) return false;24return Date.now() - limits.start_time >= limits.timeout_ms;25}2627export async function jupyter_run_notebook(28opts: RunNotebookOptions,29): Promise<string> {30const log = (...args) => {31logger.debug("jupyter_run_notebook", ...args);32};33log(trunc_middle(JSON.stringify(opts)));34const notebook: JupyterNotebook = JSON.parse(opts.ipynb);3536let limits: Limits = {37timeout_ms: opts.limits?.max_total_time_ms ?? 0,38timeout_ms_per_cell: opts.limits?.max_time_per_cell_ms ?? 0,39max_output: opts.limits?.max_output ?? 0,40max_output_per_cell: opts.limits?.max_output_per_cell ?? 0,41start_time: Date.now(),42total_output: 0,43};4445// path is random so it doesn't randomly conflict with46// something else running at the same time.47const path = join(opts.path, `${uuid()}.ipynb`);48try {49await mkdir(dirname(path), { recursive: true });50await writeFile(path, opts.ipynb);5152const name = notebook.metadata.kernelspec.name;53let jupyter: JupyterKernel | undefined = undefined;5455/* We use retry_until_success to spawn the kernel, since56it makes people's lives much easier if this works even57if there is a temporary issue. Also, in testing, I've58found that sometimes if you try to spawn two kernels at59the exact same time as the same user things can fail60This is possibly an upstream Jupyter bug, but let's61just work around it since we want extra reliability62anyways.63*/64async function init_jupyter0(): Promise<void> {65log("init_jupyter", jupyter != null);66jupyter?.close();67jupyter = undefined;68jupyter = kernel({ name, path });69log("init_jupyter: spawning");70// for Python, we suppress all warnings71// they end up as stderr-output and hence would imply 0 points72const env = { PYTHONWARNINGS: "ignore" };73await jupyter.spawn({ env });74log("init_jupyter: spawned");75}7677async function init_jupyter(): Promise<void> {78await retry_until_success({79f: init_jupyter0,80start_delay: 1000,81max_delay: 5000,82factor: 1.4,83max_time: 30000,84log: function (...args) {85log("init_jupyter - retry_until_success", ...args);86},87});88}8990try {91log("init_jupyter...");92await init_jupyter();93log("init_jupyter: done");94for (const cell of notebook.cells) {95try {96if (jupyter == null) {97log("BUG: jupyter==null");98throw Error("jupyter can't be null since it was initialized above");99}100log("run_cell...");101await run_cell(jupyter, limits, cell); // mutates cell by putting in outputs102log("run_cell: done");103} catch (err) {104// fatal error occured, e.g,. timeout, broken kernel, etc.105if (cell.outputs == null) {106cell.outputs = [];107}108cell.outputs.push({ traceback: [`${err}`] });109if (!global_timeout_exceeded(limits)) {110// close existing jupyter and spawn new one, so we can robustly run more cells.111// Obviously, only do this if we are not out of time.112log("timeout exceeded so restarting...");113await init_jupyter();114log("timeout exceeded restart done");115}116}117}118} finally {119log("in finally");120if (jupyter != null) {121log("jupyter != null so closing");122// @ts-ignore123jupyter.close();124jupyter = undefined;125}126}127log("returning result");128return JSON.stringify(notebook);129} finally {130try {131await unlink(path);132} catch {}133}134}135136export async function run_cell(137jupyter: JupyterKernel,138limits0: Partial<Limits>,139cell,140): Promise<void> {141if (jupyter == null) {142throw Error("jupyter must be defined");143}144const limits = { ...DEFAULT_LIMITS, ...limits0 };145146if (limits.timeout_ms && global_timeout_exceeded(limits)) {147// the total time has been exceeded -- this will mark outputs as error148// for each cell in the rest of the notebook.149throw Error(150`Total time limit (=${Math.round(151limits.timeout_ms / 1000,152)} seconds) exceeded`,153);154}155156if (cell.cell_type != "code") {157// skip all non-code cells -- nothing to run158return;159}160const code = cell.source.join("");161if (cell.outputs == null) {162// shouldn't happen, since this would violate nbformat, but let's ensure163// it anyways, just in case.164cell.outputs = [];165}166167const result = await jupyter.execute_code_now({168code,169timeout_ms: limits.timeout_ms_per_cell,170});171172let cell_output_chars = 0;173for (const x of result) {174if (x == null) continue;175if (x["msg_type"] == "clear_output") {176cell.outputs = [];177}178const mesg: any = x["content"];179if (mesg == null) continue;180if (mesg.comm_id != null) {181// ignore any comm/widget related messages182continue;183}184delete mesg.execution_state;185delete mesg.execution_count;186delete mesg.payload;187delete mesg.code;188delete mesg.status;189delete mesg.source;190for (const k in mesg) {191const v = mesg[k];192if (is_object(v) && len(v) === 0) {193delete mesg[k];194}195}196if (len(mesg) == 0) continue;197const n = JSON.stringify(mesg).length;198limits.total_output += n;199if (limits.max_output_per_cell) {200cell_output_chars += n;201}202if (mesg["traceback"] != null) {203// always include tracebacks204cell.outputs.push(mesg);205} else {206if (207limits.max_output_per_cell &&208cell_output_chars > limits.max_output_per_cell209) {210// Use stdout stream -- it's not an *error* that there is211// truncated output; just something we want to mention.212cell.outputs.push({213name: "stdout",214output_type: "stream",215text: [216`Output truncated since it exceeded the cell output limit of ${limits.max_output_per_cell} characters`,217],218});219} else if (limits.max_output && limits.total_output > limits.max_output) {220cell.outputs.push({221name: "stdout",222output_type: "stream",223text: [224`Output truncated since it exceeded the global output limit of ${limits.max_output} characters`,225],226});227} else {228cell.outputs.push(mesg);229}230}231}232}233234235