Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/jupyter/ipynb/export-to-ipynb.ts
6021 views
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
/*
7
Exporting from our in-memory sync-friendly format to ipynb
8
*/
9
10
import { CellType } from "@cocalc/util/jupyter/types";
11
import { deep_copy, filename_extension, keys } from "@cocalc/util/misc";
12
13
type Tags = { [key: string]: boolean };
14
15
interface Cell {
16
cell_type?: CellType;
17
input?: string;
18
collapsed?: boolean;
19
scrolled?: boolean;
20
slide?;
21
attachments?;
22
tags?: Tags;
23
output?: { [n: string]: OutputMessage };
24
metadata?: Metadata;
25
exec_count?: number;
26
}
27
28
type OutputMessage = any;
29
30
interface Metadata {
31
collapsed?: boolean;
32
scrolled?: boolean;
33
cocalc?: {
34
outputs: { [n: string]: any };
35
};
36
slideshow?;
37
tags?: string[];
38
}
39
40
export interface IPynbCell {
41
id: string;
42
cell_type: CellType;
43
source?: string[];
44
metadata?: Metadata;
45
execution_count?: number;
46
outputs?: OutputMessage[];
47
}
48
49
interface BlobStore {
50
getBase64: (sha1: string) => string | null | undefined | void;
51
getString: (sha1: string) => string | null | undefined | void;
52
}
53
54
interface Options {
55
// list of id's fo the cells in the correct order
56
cell_list: string[];
57
// actual data of the cells
58
cells: { [id: string]: Cell };
59
// custom metadata only
60
metadata?;
61
// official jupyter will give an error on load without properly giving this (and ask to select a kernel)
62
kernelspec?: object;
63
language_info?: object;
64
blob_store?: BlobStore;
65
// optional map id --> list of additional output messages to replace last output message.
66
more_output?: { [id: string]: OutputMessage[] };
67
}
68
69
// **WARNING: any input to export_to_ipynb function may be MUTATED!**
70
export function export_to_ipynb(opts: Options) {
71
if (opts.kernelspec == null) {
72
opts.kernelspec = {};
73
}
74
const ipynb = {
75
cells: opts.cell_list.map((id: string) => cell_to_ipynb(id, opts)),
76
metadata: opts.metadata ?? {},
77
nbformat: 4,
78
nbformat_minor: 4,
79
};
80
81
ipynb.metadata.kernelspec = opts.kernelspec;
82
if (opts.language_info != null) {
83
ipynb.metadata.language_info = opts.language_info;
84
}
85
86
return ipynb;
87
}
88
89
// Return ipynb version of the given cell as object
90
function cell_to_ipynb(id: string, opts: Options) {
91
const cell = opts.cells[id];
92
const metadata: Metadata = {};
93
const obj = {
94
id,
95
cell_type: cell.cell_type ?? "code",
96
source: diff_friendly(cell.input ?? ""),
97
metadata,
98
} as IPynbCell;
99
100
// Handle any extra metadata (mostly user defined) that we don't
101
// handle in a special way for efficiency reasons.
102
const other_metadata = cell.metadata;
103
if (other_metadata != null) {
104
processOtherMetadata(obj, other_metadata);
105
}
106
107
// consistenty with jupyter -- they explicitly give collapsed true or false state no matter what
108
metadata.collapsed = !!cell.collapsed;
109
110
// Jupyter only gives scrolled state when true.
111
if (cell.scrolled) {
112
metadata.scrolled = true;
113
}
114
115
const exec_count = cell.exec_count ?? 0;
116
if (obj.cell_type === "code") {
117
obj.execution_count = exec_count;
118
}
119
120
processSlides(obj, cell.slide);
121
processAttachments(obj, cell.attachments);
122
processTags(obj, cell.tags);
123
124
if (obj.cell_type !== "code") {
125
// Code is the only cell type that is allowed to have an outputs field.
126
return obj;
127
}
128
129
const output = cell.output;
130
if (output != null) {
131
obj.outputs = ipynbOutputs({
132
output,
133
exec_count,
134
more_output: opts.more_output?.[id],
135
blob_store: opts.blob_store,
136
});
137
} else if (obj.outputs == null && obj.cell_type === "code") {
138
obj.outputs = []; // annoying requirement of ipynb file format.
139
}
140
for (const n in obj.outputs) {
141
const x = obj.outputs[n];
142
if (x.cocalc != null) {
143
// alternative version of cell that official Jupyter doesn't support can only
144
// stored in the **cell-level** metadata, not output.
145
if (metadata.cocalc == null) {
146
metadata.cocalc = { outputs: {} };
147
}
148
metadata.cocalc.outputs[n] = x.cocalc;
149
delete x.cocalc;
150
}
151
}
152
return obj;
153
}
154
155
function processSlides(obj, slide?) {
156
if (slide != null) {
157
obj.metadata.slideshow = { slide_type: slide };
158
}
159
}
160
161
function processTags(obj, tags?: Tags) {
162
if (tags != null) {
163
// we store tags internally as a map (for easy
164
// efficient add/remove), but .ipynb uses a list.
165
obj.metadata.tags = keys(tags).sort();
166
}
167
}
168
169
function processOtherMetadata(obj, other_metadata) {
170
if (other_metadata != null) {
171
Object.assign(obj.metadata, other_metadata);
172
}
173
}
174
175
function processAttachments(obj, attachments) {
176
if (attachments == null) {
177
// don't have to or can't do anything (https://github.com/sagemathinc/cocalc/issues/4272)
178
return;
179
}
180
obj.attachments = {};
181
for (const name in attachments) {
182
const val = attachments[name];
183
if (val.type !== "base64") {
184
// we only handle this now
185
return;
186
}
187
let ext = filename_extension(name);
188
if (ext === "jpg") {
189
ext = "jpeg";
190
}
191
obj.attachments[name] = { [`image/${ext}`]: val.value };
192
}
193
}
194
195
function ipynbOutputs({
196
output,
197
exec_count,
198
more_output,
199
blob_store,
200
}: {
201
output: { [n: string]: OutputMessage };
202
exec_count: number;
203
more_output?: OutputMessage[];
204
blob_store?: BlobStore;
205
}) {
206
// If the last message has the more_output field, then there may be
207
// more output messages stored, which are not in the cells object.
208
let len = objArrayLength(output);
209
if (output[`${len - 1}`].more_output != null) {
210
let n: number = len - 1;
211
const cnt = more_output?.length ?? 0;
212
if (cnt === 0 || more_output == null) {
213
// For some reason more output is not available for this cell. So we replace
214
// the more_output message by an error explaining what happened.
215
output[`${n}`] = {
216
text: "WARNING: Some output was deleted.\n",
217
name: "stderr",
218
};
219
} else {
220
// Indeed, the last message has the more_output field.
221
// Before converting to ipynb, we remove that last message...
222
delete output[`${n}`];
223
// Then we put in the known more output.
224
for (const mesg of more_output) {
225
output[`${n}`] = mesg;
226
n += 1;
227
}
228
}
229
}
230
// Now, everything continues as normal.
231
232
const outputs: OutputMessage[] = [];
233
len = objArrayLength(output);
234
if (output != null && len > 0) {
235
for (let n = 0; n < len; n++) {
236
const output_n = output?.[`${n}`];
237
if (output_n != null) {
238
processOutputN(output_n, exec_count, blob_store);
239
outputs.push(output_n);
240
}
241
}
242
}
243
return outputs;
244
}
245
246
function objArrayLength(objArray) {
247
if (objArray == null) {
248
return 0;
249
}
250
let n = -1;
251
for (const k in objArray) {
252
const j = parseInt(k);
253
if (j > n) {
254
n = j;
255
}
256
}
257
return n + 1;
258
}
259
260
function processOutputN(
261
output_n: OutputMessage,
262
exec_count: number,
263
blob_store?: BlobStore,
264
) {
265
if (output_n == null) {
266
return;
267
}
268
if (output_n.exec_count != null) {
269
delete output_n.exec_count;
270
}
271
if (output_n.text != null) {
272
output_n.text = diff_friendly(output_n.text);
273
}
274
if (output_n.data != null) {
275
for (let k in output_n.data) {
276
const v = output_n.data[k];
277
if (k.slice(0, 5) === "text/") {
278
output_n.data[k] = diff_friendly(output_n.data[k]);
279
}
280
if (k.startsWith("image/") || k === "application/pdf" || k === "iframe") {
281
if (blob_store != null) {
282
let value;
283
if (k === "iframe") {
284
delete output_n.data[k];
285
k = "text/html";
286
value = blob_store.getString(v);
287
} else {
288
value = blob_store.getBase64(v);
289
}
290
if (value == null) {
291
// The image is no longer known; this could happen if the user reverts in the history
292
// browser and there is an image in the output that was not saved in the latest version.
293
// TODO: instead return an error.
294
return;
295
}
296
output_n.data[k] = value;
297
} else {
298
return; // impossible to include in the output without blob_store
299
}
300
}
301
}
302
output_n.output_type = "execute_result";
303
if (output_n.metadata == null) {
304
output_n.metadata = {};
305
}
306
output_n.execution_count = exec_count;
307
} else if (output_n.name != null) {
308
output_n.output_type = "stream";
309
if (output_n.name === "input") {
310
processStdinOutput(output_n);
311
}
312
} else if (output_n.ename != null) {
313
output_n.output_type = "error";
314
}
315
}
316
317
function processStdinOutput(output) {
318
output.cocalc = deep_copy(output);
319
output.name = "stdout";
320
output.text = output.opts.prompt + " " + (output.value ?? "");
321
delete output.opts;
322
delete output.value;
323
}
324
325
// Transform a string s with newlines into an array v of strings
326
// such that v.join('') == s.
327
function diff_friendly(
328
s: string | string[] | undefined | null,
329
): string[] | undefined | null {
330
if (typeof s !== "string") {
331
// might already be an array or undefined.
332
if (s == null) {
333
return undefined;
334
}
335
return s;
336
}
337
const v = s.split("\n");
338
for (let i = 0; i < v.length - 1; i++) {
339
v[i] += "\n";
340
}
341
if (v[v.length - 1] === "") {
342
v.pop(); // remove last elt
343
}
344
return v;
345
}
346
347