CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/jupyter/nbgrader/jupyter-run.ts
Views: 687
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import type { RunNotebookOptions } from "@cocalc/jupyter/nbgrader/types";
7
import type { JupyterNotebook } from "@cocalc/jupyter/nbgrader/types";
8
import type { JupyterKernelInterface as JupyterKernel } from "@cocalc/jupyter/types/project-interface";
9
import { is_object, len, uuid, trunc_middle } from "@cocalc/util/misc";
10
import { retry_until_success } from "@cocalc/util/async-utils";
11
import { kernel } from "@cocalc/jupyter/kernel";
12
13
// For tracking limits during the run:
14
export interface Limits {
15
timeout_ms_per_cell: number;
16
max_output_per_cell: number;
17
max_output: number;
18
total_output: number;
19
timeout_ms?: number;
20
start_time?: number;
21
}
22
23
function global_timeout_exceeded(limits: Limits): boolean {
24
if (limits.timeout_ms == null || limits.start_time == null) return false;
25
return Date.now() - limits.start_time >= limits.timeout_ms;
26
}
27
28
export async function jupyter_run_notebook(
29
logger,
30
opts: RunNotebookOptions
31
): Promise<string> {
32
const log = (...args) => {
33
logger.debug("jupyter_run_notebook", ...args);
34
};
35
log(trunc_middle(JSON.stringify(opts)));
36
const notebook: JupyterNotebook = JSON.parse(opts.ipynb);
37
38
let limits: Limits = {
39
timeout_ms: opts.limits?.max_total_time_ms ?? 0,
40
timeout_ms_per_cell: opts.limits?.max_time_per_cell_ms ?? 0,
41
max_output: opts.limits?.max_output ?? 0,
42
max_output_per_cell: opts.limits?.max_output_per_cell ?? 0,
43
start_time: Date.now(),
44
total_output: 0,
45
};
46
47
const name = notebook.metadata.kernelspec.name;
48
let jupyter: JupyterKernel | undefined = undefined;
49
50
/* We use retry_until_success to spawn the kernel, since
51
it makes people's lives much easier if this works even
52
if there is a temporary issue. Also, in testing, I've
53
found that sometimes if you try to spawn two kernels at
54
the exact same time as the same user things can fail
55
This is possibly an upstream Jupyter bug, but let's
56
just work around it since we want extra reliability
57
anyways.
58
*/
59
async function init_jupyter0(): Promise<void> {
60
log("init_jupyter", jupyter != null);
61
jupyter?.close();
62
jupyter = undefined;
63
// path is random so it doesn't randomly conflict with
64
// something else running at the same time.
65
const path = opts.path + `/${uuid()}.ipynb`;
66
jupyter = kernel({ name, path });
67
log("init_jupyter: spawning");
68
// for Python, we suppress all warnings
69
// they end up as stderr-output and hence would imply 0 points
70
const env = { PYTHONWARNINGS: "ignore" };
71
await jupyter.spawn({ env });
72
log("init_jupyter: spawned");
73
}
74
75
async function init_jupyter(): Promise<void> {
76
await retry_until_success({
77
f: init_jupyter0,
78
start_delay: 1000,
79
max_delay: 5000,
80
factor: 1.4,
81
max_time: 30000,
82
log: function (...args) {
83
log("init_jupyter - retry_until_success", ...args);
84
},
85
});
86
}
87
88
try {
89
log("init_jupyter...");
90
await init_jupyter();
91
log("init_jupyter: done");
92
for (const cell of notebook.cells) {
93
try {
94
if (jupyter == null) {
95
log("BUG: jupyter==null");
96
throw Error("jupyter can't be null since it was initialized above");
97
}
98
log("run_cell...");
99
await run_cell(jupyter, limits, cell); // mutates cell by putting in outputs
100
log("run_cell: done");
101
} catch (err) {
102
// fatal error occured, e.g,. timeout, broken kernel, etc.
103
if (cell.outputs == null) {
104
cell.outputs = [];
105
}
106
cell.outputs.push({ traceback: [`${err}`] });
107
if (!global_timeout_exceeded(limits)) {
108
// close existing jupyter and spawn new one, so we can robustly run more cells.
109
// Obviously, only do this if we are not out of time.
110
log("timeout exceeded so restarting...");
111
await init_jupyter();
112
log("timeout exceeded restart done");
113
}
114
}
115
}
116
} finally {
117
log("in finally");
118
if (jupyter != null) {
119
log("jupyter != null so closing");
120
// @ts-ignore
121
jupyter.close();
122
jupyter = undefined;
123
}
124
}
125
log("returning result");
126
return JSON.stringify(notebook);
127
}
128
129
export async function run_cell(
130
jupyter: JupyterKernel,
131
limits: Limits,
132
cell
133
): Promise<void> {
134
if (jupyter == null) {
135
throw Error("jupyter must be defined");
136
}
137
138
if (limits.timeout_ms && global_timeout_exceeded(limits)) {
139
// the total time has been exceeded -- this will mark outputs as error
140
// for each cell in the rest of the notebook.
141
throw Error(
142
`Total time limit (=${Math.round(
143
limits.timeout_ms / 1000
144
)} seconds) exceeded`
145
);
146
}
147
148
if (cell.cell_type != "code") {
149
// skip all non-code cells -- nothing to run
150
return;
151
}
152
const code = cell.source.join("");
153
if (cell.outputs == null) {
154
// shouldn't happen, since this would violate nbformat, but let's ensure
155
// it anyways, just in case.
156
cell.outputs = [];
157
}
158
159
const result = await jupyter.execute_code_now({
160
code,
161
timeout_ms: limits.timeout_ms_per_cell,
162
});
163
164
let cell_output_chars = 0;
165
for (const x of result) {
166
if (x == null) continue;
167
if (x["msg_type"] == "clear_output") {
168
cell.outputs = [];
169
}
170
const mesg: any = x["content"];
171
if (mesg == null) continue;
172
if (mesg.comm_id != null) {
173
// ignore any comm/widget related messages
174
continue;
175
}
176
delete mesg.execution_state;
177
delete mesg.execution_count;
178
delete mesg.payload;
179
delete mesg.code;
180
delete mesg.status;
181
delete mesg.source;
182
for (const k in mesg) {
183
const v = mesg[k];
184
if (is_object(v) && len(v) === 0) {
185
delete mesg[k];
186
}
187
}
188
if (len(mesg) == 0) continue;
189
const n = JSON.stringify(mesg).length;
190
limits.total_output += n;
191
if (limits.max_output_per_cell) {
192
cell_output_chars += n;
193
}
194
if (mesg["traceback"] != null) {
195
// always include tracebacks
196
cell.outputs.push(mesg);
197
} else {
198
if (
199
limits.max_output_per_cell &&
200
cell_output_chars > limits.max_output_per_cell
201
) {
202
// Use stdout stream -- it's not an *error* that there is
203
// truncated output; just something we want to mention.
204
cell.outputs.push({
205
name: "stdout",
206
output_type: "stream",
207
text: [
208
`Output truncated since it exceeded the cell output limit of ${limits.max_output_per_cell} characters`,
209
],
210
});
211
} else if (limits.max_output && limits.total_output > limits.max_output) {
212
cell.outputs.push({
213
name: "stdout",
214
output_type: "stream",
215
text: [
216
`Output truncated since it exceeded the global output limit of ${limits.max_output} characters`,
217
],
218
});
219
} else {
220
cell.outputs.push(mesg);
221
}
222
}
223
}
224
}
225
226