CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/jupyter/ipynb/import-from-ipynb.ts
Views: 687
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
/*
7
Importing from an ipynb object (in-memory version of .ipynb file)
8
*/
9
10
import * as misc from "@cocalc/util/misc";
11
import { JUPYTER_MIMETYPES } from "@cocalc/jupyter/util/misc";
12
13
const DEFAULT_IPYNB = {
14
cells: [
15
{
16
cell_type: "code",
17
execution_count: null,
18
metadata: {},
19
outputs: [],
20
source: [],
21
},
22
],
23
metadata: {
24
kernelspec: undefined,
25
language_info: undefined,
26
},
27
nbformat: 4,
28
nbformat_minor: 4,
29
};
30
31
export class IPynbImporter {
32
private _ipynb: any;
33
private _new_id: any;
34
private _output_handler: any;
35
private _process_attachment: any;
36
private _existing_ids: any;
37
private _cells: any;
38
private _kernel: any;
39
private _metadata: any;
40
private _language_info: any;
41
import = (opts: any) => {
42
opts = misc.defaults(opts, {
43
ipynb: {},
44
new_id: undefined, // function that returns an unused id given
45
// an is_available function; new_id(is_available) = a new id.
46
existing_ids: [], // re-use these on loading for efficiency purposes
47
output_handler: undefined, // h = output_handler(cell); h.message(...) -- hard to explain
48
process_attachment: undefined,
49
}); // process attachments: attachment(base64, mime) --> sha1
50
51
this._ipynb = misc.deep_copy(opts.ipynb);
52
this._new_id = opts.new_id;
53
this._output_handler = opts.output_handler;
54
this._process_attachment = opts.process_attachment;
55
this._existing_ids = opts.existing_ids; // option to re-use existing ids
56
57
this._handle_old_versions(); // must come before sanity checks, as old versions are "insane". -- see https://github.com/sagemathinc/cocalc/issues/1937
58
this._sanity_improvements();
59
this._import_settings();
60
this._import_metadata();
61
this._read_in_cells();
62
};
63
cells = () => {
64
return this._cells;
65
};
66
67
kernel = () => {
68
return this._kernel;
69
};
70
71
metadata = () => {
72
return this._metadata;
73
};
74
75
close = () => {
76
delete this._cells;
77
delete this._kernel;
78
delete this._metadata;
79
delete this._language_info;
80
delete this._ipynb;
81
delete this._existing_ids;
82
delete this._new_id;
83
delete this._output_handler;
84
delete this._process_attachment;
85
};
86
87
// Everything below is the internal private implementation.
88
89
private _sanity_improvements = () => {
90
// Do some basic easy sanity improvements to ipynb boject,
91
// in case parts of the object are missing.
92
const ipynb = this._ipynb;
93
if (ipynb.cells == null || ipynb.cells.length === 0) {
94
ipynb.cells = misc.deep_copy(DEFAULT_IPYNB.cells);
95
}
96
if (ipynb.metadata == null) {
97
ipynb.metadata = misc.deep_copy(DEFAULT_IPYNB.metadata);
98
}
99
if (ipynb.nbformat == null) {
100
ipynb.nbformat = DEFAULT_IPYNB.nbformat;
101
}
102
ipynb.nbformat_minor != null
103
? ipynb.nbformat_minor
104
: (ipynb.nbformat_minor = DEFAULT_IPYNB.nbformat_minor);
105
};
106
107
private _handle_old_versions = () => {
108
// Update the ipynb file from formats before version 4.
109
// There are other changes made when parsing cells.
110
const ipynb = this._ipynb;
111
if (ipynb.nbformat >= 4) {
112
return;
113
}
114
if (ipynb.cells == null) {
115
ipynb.cells = [];
116
}
117
for (const worksheet of ipynb.worksheets || []) {
118
for (const cell of worksheet.cells || []) {
119
if (cell.input != null) {
120
cell.source = cell.input;
121
delete cell.input;
122
}
123
if (cell.cell_type === "heading") {
124
cell.cell_type = "markdown";
125
if (misc.is_array(cell.source)) {
126
cell.source = cell.source.join("");
127
}
128
cell.source = `# ${cell.source}`;
129
}
130
if (cell.outputs) {
131
for (const mesg of cell.outputs) {
132
if (mesg.output_type === "pyout") {
133
for (const type of JUPYTER_MIMETYPES) {
134
const b = type.split("/")[1];
135
if (mesg[b] != null) {
136
const data = { [type]: mesg[b] };
137
for (const k in mesg) {
138
delete mesg[k];
139
}
140
mesg.data = data;
141
break;
142
}
143
}
144
if (mesg.text != null) {
145
const data = { "text/plain": mesg.text.join("") };
146
for (const k in mesg) {
147
delete mesg[k];
148
}
149
mesg.data = data;
150
}
151
}
152
}
153
}
154
ipynb.cells.push(cell);
155
}
156
}
157
};
158
159
_import_settings = () => {
160
this._kernel =
161
this._ipynb &&
162
this._ipynb.metadata &&
163
this._ipynb.metadata.kernelspec &&
164
this._ipynb.metadata.kernelspec.name;
165
if (this._kernel != null) {
166
// kernel names are supposed to be case insensitive
167
// https://jupyter-client.readthedocs.io/en/latest/kernels.html
168
// We also make them all lower case when reading them in at
169
// src/packages/jupyter/kernel/kernel-data.ts
170
this._kernel = this._kernel.toLowerCase();
171
}
172
};
173
174
_import_metadata = () => {
175
const m = this._ipynb != null ? this._ipynb.metadata : undefined;
176
if (m == null) {
177
return;
178
}
179
const metadata: any = {};
180
for (const k in m) {
181
const v = m[k];
182
if (k === "kernelspec") {
183
continue;
184
}
185
metadata[k] = v;
186
}
187
if (misc.len(metadata) > 0) {
188
this._metadata = metadata;
189
}
190
};
191
192
_read_in_cells = () => {
193
const ipynb = this._ipynb;
194
this._cells = {};
195
if ((ipynb != null ? ipynb.cells : undefined) == null) {
196
// nothing to do
197
return;
198
}
199
let n = 0;
200
for (let cell of ipynb.cells) {
201
cell = this._import_cell(cell, n);
202
this._cells[cell.id] = cell;
203
n += 1;
204
}
205
};
206
207
_update_output_format = (content: any) => {
208
if ((this._ipynb != null ? this._ipynb.nbformat : undefined) >= 4) {
209
return content;
210
}
211
// fix old deprecated fields
212
if (content.output_type === "stream") {
213
if (misc.is_array(content.text)) {
214
content.text = content.text.join("");
215
}
216
content.name = content.stream;
217
} else {
218
for (const t of JUPYTER_MIMETYPES) {
219
const b = t.split("/")[1];
220
if (content[b] != null) {
221
content = { data: { [t]: content[b] } };
222
break; // at most one data per message.
223
}
224
}
225
if (content.text != null) {
226
content = {
227
data: { "text/plain": content.text },
228
output_type: "stream",
229
};
230
}
231
}
232
return content;
233
};
234
235
_join_array_strings_obj = (obj: any) => {
236
if (obj != null) {
237
for (const key in obj) {
238
const val = obj[key];
239
if (misc.is_array(val)) {
240
obj[key] = val.join("");
241
}
242
}
243
}
244
return obj;
245
};
246
247
// Mutate content to be of the format we use internally
248
_import_cell_output_content = (content: any): void => {
249
content = this._update_output_format(content); // old versions
250
this._join_array_strings_obj(content.data); // arrays --> strings
251
if (misc.is_array(content.text)) {
252
content.text = content.text.join("");
253
}
254
remove_redundant_reps(content.data); // multiple output formats
255
delete content.prompt_number; // redundant; in some files
256
};
257
258
_id_is_available = (id: any) => {
259
return !(
260
(this._cells != null ? this._cells[id] : undefined) ||
261
(this._existing_ids != null ? this._existing_ids : []).includes(id)
262
);
263
};
264
265
_get_new_id = (cell) => {
266
if (cell?.id && this._id_is_available(cell.id)) {
267
// attempt to use id in the ipynb file
268
return cell.id;
269
}
270
if (this._new_id != null) {
271
return this._new_id(this._id_is_available);
272
} else {
273
let id = 0;
274
while (true) {
275
const s = `${id}`;
276
if (this._id_is_available(s)) {
277
return s;
278
}
279
id += 1;
280
}
281
}
282
};
283
284
_get_exec_count = (execution_count?: number, prompt_number?: number) => {
285
if (execution_count != null) {
286
return execution_count;
287
} else if (prompt_number != null) {
288
return prompt_number;
289
} else {
290
return null;
291
}
292
};
293
294
_get_cell_type = (cell_type?: string) => {
295
return cell_type != null ? cell_type : "code";
296
};
297
298
_get_cell_output = (outputs: any, alt_outputs: any, id: any) => {
299
if (outputs == null || outputs.length == 0) {
300
return null;
301
}
302
let handler: any;
303
const cell: any = { id, output: {} };
304
if (this._output_handler != null) {
305
handler = this._output_handler(cell);
306
}
307
let k: string; // it's perfectly fine that k is a string here.
308
for (k in outputs) {
309
let content = outputs[k];
310
if (alt_outputs != null && alt_outputs[k] != null) {
311
content = alt_outputs[k];
312
}
313
this._import_cell_output_content(content);
314
if (handler != null) {
315
handler.message(content);
316
} else {
317
cell.output[k] = content;
318
}
319
}
320
if (handler != null && typeof handler.done === "function") {
321
handler.done();
322
}
323
return cell.output;
324
};
325
326
_get_cell_input(source) {
327
if (source != null) {
328
// "If you intend to work with notebook files directly, you must allow multi-line
329
// string fields to be either a string or list of strings."
330
// https://nbformat.readthedocs.io/en/latest/format_description.html#top-level-structure
331
if (misc.is_array(source)) {
332
return source.join("");
333
} else {
334
return source;
335
}
336
} else {
337
return null;
338
}
339
}
340
341
_import_cell(cell: any, n: any) {
342
const id =
343
(this._existing_ids != null ? this._existing_ids[n] : undefined) != null
344
? this._existing_ids != null
345
? this._existing_ids[n]
346
: undefined
347
: this._get_new_id(cell);
348
const obj: any = {
349
type: "cell",
350
id,
351
pos: n,
352
input: this._get_cell_input(cell.source),
353
output: this._get_cell_output(
354
cell.outputs,
355
cell.metadata != null && cell.metadata.cocalc != null
356
? cell.metadata.cocalc.outputs
357
: undefined,
358
id,
359
),
360
cell_type: this._get_cell_type(cell.cell_type),
361
exec_count: this._get_exec_count(
362
cell.execution_count,
363
cell.prompt_number,
364
),
365
};
366
367
if (cell.metadata != null) {
368
for (const k of ["collapsed", "scrolled"]) {
369
if (cell.metadata[k]) {
370
obj[k] = !!(cell.metadata != null ? cell.metadata[k] : undefined);
371
}
372
}
373
374
if (cell.metadata.slideshow != null) {
375
obj.slide = cell.metadata.slideshow.slide_type;
376
}
377
378
if (cell.metadata.tags != null) {
379
obj.tags = misc.dict(cell.metadata.tags.map((tag) => [tag, true]));
380
}
381
const other = misc.copy_without(cell.metadata, [
382
"collapsed",
383
"scrolled",
384
"slideshow",
385
"tags",
386
"_root",
387
"__ownerID",
388
"__hash",
389
"__altered",
390
]);
391
// See https://github.com/sagemathinc/cocalc/issues/3191 for
392
// why the _'d ones above; this is to fix "corrupted" worksheets.
393
if (misc.len(other) > 0) {
394
obj.metadata = other;
395
}
396
}
397
if (cell.attachments != null) {
398
obj.attachments = {};
399
for (const name in cell.attachments) {
400
const val = cell.attachments[name];
401
for (const mime in val) {
402
const base64 = val[mime];
403
if (this._process_attachment != null) {
404
try {
405
const sha1 = this._process_attachment(base64, mime);
406
obj.attachments[name] = { type: "sha1", value: sha1 };
407
} catch (err) {
408
// We put this in input, since actually attachments are
409
// only for markdown cells (?), and they have no output.
410
// Anyway, I'm mainly putting this here to debug this
411
// and it should never failed when debugged.
412
// Just to be clear again: this should never ever happen.
413
const text = `\n${err.stack}\nCoCalc Bug -- ${err}\n`;
414
obj.input = (obj.input ?? "") + text;
415
}
416
} else {
417
obj.attachments[name] = { type: "base64", value: base64 };
418
}
419
}
420
}
421
}
422
return obj;
423
}
424
}
425
426
export function remove_redundant_reps(data?: any) {
427
if (data == null) {
428
return;
429
}
430
// We only keep the first representation in types, since it provides the richest
431
// representation in the client; there is no need for the others.
432
// TODO: probably we should still store all of these types somewhere (in the
433
// backend only) for the .ipynb export, but I'm not doing that right now!
434
// This means opening and closing an ipynb file may lose information, which
435
// no client currently cares about (?) -- maybe nbconvert does.
436
let keep;
437
for (const type of JUPYTER_MIMETYPES) {
438
if (data[type] != null) {
439
keep = type;
440
break;
441
}
442
}
443
if (keep != null) {
444
for (const type in data) {
445
// NOTE: we only remove multiple reps that are both in JUPYTER_MIMETYPES;
446
// if there is another rep that is NOT in JUPYTER_MIMETYPES, then it is
447
// not removed, e.g., application/vnd.jupyter.widget-view+json and
448
// text/plain both are types of representation of a widget.
449
if (JUPYTER_MIMETYPES[type] !== undefined && type !== keep) {
450
delete data[type];
451
}
452
}
453
}
454
return data;
455
}
456
457