Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/jupyter/nbgrader/jupyter-run.ts
6141 views
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import {
7
type JupyterNotebook,
8
type RunNotebookOptions,
9
type Limits,
10
DEFAULT_LIMITS,
11
} from "@cocalc/util/jupyter/nbgrader-types";
12
import type { JupyterKernelInterface as JupyterKernel } from "@cocalc/jupyter/types/project-interface";
13
import { is_object, len, uuid, trunc_middle } from "@cocalc/util/misc";
14
import { retry_until_success } from "@cocalc/util/async-utils";
15
import { kernel } from "@cocalc/jupyter/kernel";
16
import { mkdir, unlink, writeFile } from "node:fs/promises";
17
import { dirname, join } from "node:path";
18
import getLogger from "@cocalc/backend/logger";
19
export type { Limits };
20
21
const logger = getLogger("jupyter:nbgrader:jupyter-run");
22
23
function global_timeout_exceeded(limits: Limits): boolean {
24
if (limits.timeout_ms == null || limits.start_time == null) return false;
25
return Date.now() - limits.start_time >= limits.timeout_ms;
26
}
27
28
export async function jupyter_run_notebook(
29
opts: RunNotebookOptions,
30
): Promise<string> {
31
const log = (...args) => {
32
logger.debug("jupyter_run_notebook", ...args);
33
};
34
log(trunc_middle(JSON.stringify(opts)));
35
const notebook: JupyterNotebook = JSON.parse(opts.ipynb);
36
37
let limits: Limits = {
38
timeout_ms: opts.limits?.max_total_time_ms ?? 0,
39
timeout_ms_per_cell: opts.limits?.max_time_per_cell_ms ?? 0,
40
max_output: opts.limits?.max_output ?? 0,
41
max_output_per_cell: opts.limits?.max_output_per_cell ?? 0,
42
start_time: Date.now(),
43
total_output: 0,
44
};
45
46
// path is random so it doesn't randomly conflict with
47
// something else running at the same time.
48
const path = join(opts.path, `${uuid()}.ipynb`);
49
try {
50
await mkdir(dirname(path), { recursive: true });
51
await writeFile(path, opts.ipynb);
52
53
const name = notebook.metadata.kernelspec.name;
54
let jupyter: JupyterKernel | undefined = undefined;
55
56
/* We use retry_until_success to spawn the kernel, since
57
it makes people's lives much easier if this works even
58
if there is a temporary issue. Also, in testing, I've
59
found that sometimes if you try to spawn two kernels at
60
the exact same time as the same user things can fail
61
This is possibly an upstream Jupyter bug, but let's
62
just work around it since we want extra reliability
63
anyways.
64
*/
65
async function init_jupyter0(): Promise<void> {
66
log("init_jupyter", jupyter != null);
67
jupyter?.close();
68
jupyter = undefined;
69
jupyter = kernel({ name, path });
70
log("init_jupyter: spawning");
71
// for Python, we suppress all warnings
72
// they end up as stderr-output and hence would imply 0 points
73
const env = { PYTHONWARNINGS: "ignore" };
74
await jupyter.spawn({ env });
75
log("init_jupyter: spawned");
76
}
77
78
async function init_jupyter(): Promise<void> {
79
await retry_until_success({
80
f: init_jupyter0,
81
start_delay: 1000,
82
max_delay: 5000,
83
factor: 1.4,
84
max_time: 30000,
85
log: function (...args) {
86
log("init_jupyter - retry_until_success", ...args);
87
},
88
});
89
}
90
91
try {
92
log("init_jupyter...");
93
await init_jupyter();
94
log("init_jupyter: done");
95
for (const cell of notebook.cells) {
96
try {
97
if (jupyter == null) {
98
log("BUG: jupyter==null");
99
throw Error("jupyter can't be null since it was initialized above");
100
}
101
log("run_cell...");
102
await run_cell(jupyter, limits, cell); // mutates cell by putting in outputs
103
log("run_cell: done");
104
} catch (err) {
105
// fatal error occured, e.g,. timeout, broken kernel, etc.
106
if (cell.outputs == null) {
107
cell.outputs = [];
108
}
109
cell.outputs.push({ traceback: [`${err}`] });
110
if (!global_timeout_exceeded(limits)) {
111
// close existing jupyter and spawn new one, so we can robustly run more cells.
112
// Obviously, only do this if we are not out of time.
113
log("timeout exceeded so restarting...");
114
await init_jupyter();
115
log("timeout exceeded restart done");
116
}
117
}
118
}
119
} finally {
120
log("in finally");
121
if (jupyter != null) {
122
log("jupyter != null so closing");
123
// @ts-ignore
124
jupyter.close();
125
jupyter = undefined;
126
}
127
}
128
log("returning result");
129
return JSON.stringify(notebook);
130
} finally {
131
try {
132
await unlink(path);
133
} catch {}
134
}
135
}
136
137
export async function run_cell(
138
jupyter: JupyterKernel,
139
limits0: Partial<Limits>,
140
cell,
141
): Promise<void> {
142
if (jupyter == null) {
143
throw Error("jupyter must be defined");
144
}
145
const limits = { ...DEFAULT_LIMITS, ...limits0 };
146
147
if (limits.timeout_ms && global_timeout_exceeded(limits)) {
148
// the total time has been exceeded -- this will mark outputs as error
149
// for each cell in the rest of the notebook.
150
throw Error(
151
`Total time limit (=${Math.round(
152
limits.timeout_ms / 1000,
153
)} seconds) exceeded`,
154
);
155
}
156
157
if (cell.cell_type != "code") {
158
// skip all non-code cells -- nothing to run
159
return;
160
}
161
const code = cell.source.join("");
162
if (cell.outputs == null) {
163
// shouldn't happen, since this would violate nbformat, but let's ensure
164
// it anyways, just in case.
165
cell.outputs = [];
166
}
167
168
const result = await jupyter.execute_code_now({
169
code,
170
timeout_ms: limits.timeout_ms_per_cell,
171
});
172
173
let cell_output_chars = 0;
174
for (const x of result) {
175
if (x == null) continue;
176
if (x["msg_type"] == "clear_output") {
177
cell.outputs = [];
178
}
179
const mesg: any = x["content"];
180
if (mesg == null) continue;
181
if (mesg.comm_id != null) {
182
// ignore any comm/widget related messages
183
continue;
184
}
185
delete mesg.execution_state;
186
delete mesg.execution_count;
187
delete mesg.payload;
188
delete mesg.code;
189
delete mesg.status;
190
delete mesg.source;
191
for (const k in mesg) {
192
const v = mesg[k];
193
if (is_object(v) && len(v) === 0) {
194
delete mesg[k];
195
}
196
}
197
if (len(mesg) == 0) continue;
198
const n = JSON.stringify(mesg).length;
199
limits.total_output += n;
200
if (limits.max_output_per_cell) {
201
cell_output_chars += n;
202
}
203
if (mesg["traceback"] != null) {
204
// always include tracebacks
205
cell.outputs.push(mesg);
206
} else {
207
if (
208
limits.max_output_per_cell &&
209
cell_output_chars > limits.max_output_per_cell
210
) {
211
// Use stdout stream -- it's not an *error* that there is
212
// truncated output; just something we want to mention.
213
cell.outputs.push({
214
name: "stdout",
215
output_type: "stream",
216
text: [
217
`Output truncated since it exceeded the cell output limit of ${limits.max_output_per_cell} characters`,
218
],
219
});
220
} else if (limits.max_output && limits.total_output > limits.max_output) {
221
cell.outputs.push({
222
name: "stdout",
223
output_type: "stream",
224
text: [
225
`Output truncated since it exceeded the global output limit of ${limits.max_output} characters`,
226
],
227
});
228
} else {
229
cell.outputs.push(mesg);
230
}
231
}
232
}
233
}
234
235