Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/ipynb/src/serializers.ts
3291 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import type * as nbformat from '@jupyterlab/nbformat';
7
import type { NotebookCell, NotebookCellData, NotebookCellOutput, NotebookData, NotebookDocument } from 'vscode';
8
import { CellOutputMetadata, type CellMetadata } from './common';
9
import { textMimeTypes, NotebookCellKindMarkup, CellOutputMimeTypes, defaultNotebookFormat } from './constants';
10
11
const textDecoder = new TextDecoder();
12
13
export function createJupyterCellFromNotebookCell(
14
vscCell: NotebookCellData,
15
preferredLanguage: string | undefined,
16
): nbformat.IRawCell | nbformat.IMarkdownCell | nbformat.ICodeCell {
17
let cell: nbformat.IRawCell | nbformat.IMarkdownCell | nbformat.ICodeCell;
18
if (vscCell.kind === NotebookCellKindMarkup) {
19
cell = createMarkdownCellFromNotebookCell(vscCell);
20
} else if (vscCell.languageId === 'raw') {
21
cell = createRawCellFromNotebookCell(vscCell);
22
} else {
23
cell = createCodeCellFromNotebookCell(vscCell, preferredLanguage);
24
}
25
return cell;
26
}
27
28
29
/**
30
* Sort the JSON to minimize unnecessary SCM changes.
31
* Jupyter notbeooks/labs sorts the JSON keys in alphabetical order.
32
* https://github.com/microsoft/vscode-python/issues/13155
33
*/
34
export function sortObjectPropertiesRecursively(obj: any): any {
35
if (Array.isArray(obj)) {
36
return obj.map(sortObjectPropertiesRecursively);
37
}
38
if (obj !== undefined && obj !== null && typeof obj === 'object' && Object.keys(obj).length > 0) {
39
return (
40
Object.keys(obj)
41
.sort()
42
.reduce<Record<string, any>>((sortedObj, prop) => {
43
sortedObj[prop] = sortObjectPropertiesRecursively(obj[prop]);
44
return sortedObj;
45
}, {}) as any
46
);
47
}
48
return obj;
49
}
50
51
export function getCellMetadata(options: { cell: NotebookCell | NotebookCellData } | { metadata?: { [key: string]: any } }): CellMetadata {
52
if ('cell' in options) {
53
const cell = options.cell;
54
const metadata = {
55
execution_count: null,
56
// it contains the cell id, and the cell metadata, along with other nb cell metadata
57
...(cell.metadata ?? {})
58
} satisfies CellMetadata;
59
if (cell.kind === NotebookCellKindMarkup) {
60
delete (metadata as any).execution_count;
61
}
62
return metadata;
63
} else {
64
const cell = options;
65
const metadata = {
66
// it contains the cell id, and the cell metadata, along with other nb cell metadata
67
...(cell.metadata ?? {})
68
};
69
70
return metadata as CellMetadata;
71
}
72
}
73
74
export function getVSCodeCellLanguageId(metadata: CellMetadata): string | undefined {
75
return metadata.metadata?.vscode?.languageId;
76
}
77
export function setVSCodeCellLanguageId(metadata: CellMetadata, languageId: string) {
78
metadata.metadata = metadata.metadata || {};
79
metadata.metadata.vscode = { languageId };
80
}
81
export function removeVSCodeCellLanguageId(metadata: CellMetadata) {
82
if (metadata.metadata?.vscode) {
83
delete metadata.metadata.vscode;
84
}
85
}
86
87
function createCodeCellFromNotebookCell(cell: NotebookCellData, preferredLanguage: string | undefined): nbformat.ICodeCell {
88
const cellMetadata: CellMetadata = JSON.parse(JSON.stringify(getCellMetadata({ cell })));
89
cellMetadata.metadata = cellMetadata.metadata || {}; // This cannot be empty.
90
if (cell.languageId !== preferredLanguage) {
91
setVSCodeCellLanguageId(cellMetadata, cell.languageId);
92
} else {
93
// cell current language is the same as the preferred cell language in the document, flush the vscode custom language id metadata
94
removeVSCodeCellLanguageId(cellMetadata);
95
}
96
97
const codeCell: nbformat.ICodeCell = {
98
cell_type: 'code',
99
// Metadata should always contain the execution_count.
100
// When ever execution summary data changes we will update the metadata to contain the execution count.
101
// Failing to do so means we have a problem.
102
// Also do not read the value of executionSummary here, as its possible user reverted changes to metadata
103
// & in that case execution summary could contain the data, but metadata will not.
104
// In such cases we do not want to re-set the metadata with the value from execution summary (remember, user reverted that).
105
execution_count: cellMetadata.execution_count ?? null,
106
source: splitCellSourceIntoMultilineString(cell.value),
107
outputs: (cell.outputs || []).map(translateCellDisplayOutput),
108
metadata: cellMetadata.metadata
109
};
110
if (cellMetadata?.id) {
111
codeCell.id = cellMetadata.id;
112
}
113
return codeCell;
114
}
115
116
function createRawCellFromNotebookCell(cell: NotebookCellData): nbformat.IRawCell {
117
const cellMetadata = getCellMetadata({ cell });
118
const rawCell: any = {
119
cell_type: 'raw',
120
source: splitCellSourceIntoMultilineString(cell.value),
121
metadata: cellMetadata?.metadata || {} // This cannot be empty.
122
};
123
if (cellMetadata?.attachments) {
124
rawCell.attachments = cellMetadata.attachments;
125
}
126
if (cellMetadata?.id) {
127
rawCell.id = cellMetadata.id;
128
}
129
return rawCell;
130
}
131
132
/**
133
* Splits the source of a cell into an array of strings, each representing a line.
134
* Also normalizes line endings to use LF (`\n`) instead of CRLF (`\r\n`).
135
* Same is done in deserializer as well.
136
*/
137
function splitCellSourceIntoMultilineString(source: string): string[] {
138
return splitMultilineString(source.replace(/\r\n/g, '\n'));
139
}
140
141
function splitMultilineString(source: nbformat.MultilineString): string[] {
142
if (Array.isArray(source)) {
143
return source as string[];
144
}
145
const str = source.toString();
146
if (str.length > 0) {
147
// Each line should be a separate entry, but end with a \n if not last entry
148
const arr = str.split('\n');
149
return arr
150
.map((s, i) => {
151
if (i < arr.length - 1) {
152
return `${s}\n`;
153
}
154
return s;
155
})
156
.filter(s => s.length > 0); // Skip last one if empty (it's the only one that could be length 0)
157
}
158
return [];
159
}
160
161
function translateCellDisplayOutput(output: NotebookCellOutput): JupyterOutput {
162
const customMetadata = output.metadata as CellOutputMetadata | undefined;
163
let result: JupyterOutput;
164
// Possible some other extension added some output (do best effort to translate & save in ipynb).
165
// In which case metadata might not contain `outputType`.
166
const outputType = customMetadata?.outputType as nbformat.OutputType;
167
switch (outputType) {
168
case 'error': {
169
result = translateCellErrorOutput(output);
170
break;
171
}
172
case 'stream': {
173
result = convertStreamOutput(output);
174
break;
175
}
176
case 'display_data': {
177
result = {
178
output_type: 'display_data',
179
data: output.items.reduce((prev: any, curr) => {
180
prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);
181
return prev;
182
}, {}),
183
metadata: customMetadata?.metadata || {} // This can never be undefined.
184
};
185
break;
186
}
187
case 'execute_result': {
188
result = {
189
output_type: 'execute_result',
190
data: output.items.reduce((prev: any, curr) => {
191
prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);
192
return prev;
193
}, {}),
194
metadata: customMetadata?.metadata || {}, // This can never be undefined.
195
execution_count:
196
typeof customMetadata?.executionCount === 'number' ? customMetadata?.executionCount : null // This can never be undefined, only a number or `null`.
197
};
198
break;
199
}
200
case 'update_display_data': {
201
result = {
202
output_type: 'update_display_data',
203
data: output.items.reduce((prev: any, curr) => {
204
prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);
205
return prev;
206
}, {}),
207
metadata: customMetadata?.metadata || {} // This can never be undefined.
208
};
209
break;
210
}
211
default: {
212
const isError =
213
output.items.length === 1 && output.items.every((item) => item.mime === CellOutputMimeTypes.error);
214
const isStream = output.items.every(
215
(item) => item.mime === CellOutputMimeTypes.stderr || item.mime === CellOutputMimeTypes.stdout
216
);
217
218
if (isError) {
219
return translateCellErrorOutput(output);
220
}
221
222
// In the case of .NET & other kernels, we need to ensure we save ipynb correctly.
223
// Hence if we have stream output, save the output as Jupyter `stream` else `display_data`
224
// Unless we already know its an unknown output type.
225
const outputType: nbformat.OutputType =
226
<nbformat.OutputType>customMetadata?.outputType || (isStream ? 'stream' : 'display_data');
227
let unknownOutput: nbformat.IUnrecognizedOutput | nbformat.IDisplayData | nbformat.IStream;
228
if (outputType === 'stream') {
229
// If saving as `stream` ensure the mandatory properties are set.
230
unknownOutput = convertStreamOutput(output);
231
} else if (outputType === 'display_data') {
232
// If saving as `display_data` ensure the mandatory properties are set.
233
const displayData: nbformat.IDisplayData = {
234
data: {},
235
metadata: {},
236
output_type: 'display_data'
237
};
238
unknownOutput = displayData;
239
} else {
240
unknownOutput = {
241
output_type: outputType
242
};
243
}
244
if (customMetadata?.metadata) {
245
unknownOutput.metadata = customMetadata.metadata;
246
}
247
if (output.items.length > 0) {
248
unknownOutput.data = output.items.reduce((prev: any, curr) => {
249
prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);
250
return prev;
251
}, {});
252
}
253
result = unknownOutput;
254
break;
255
}
256
}
257
258
// Account for transient data as well
259
// `transient.display_id` is used to update cell output in other cells, at least thats one use case we know of.
260
if (result && customMetadata && customMetadata.transient) {
261
result.transient = customMetadata.transient;
262
}
263
return result;
264
}
265
266
function translateCellErrorOutput(output: NotebookCellOutput): nbformat.IError {
267
// it should have at least one output item
268
const firstItem = output.items[0];
269
// Bug in VS Code.
270
if (!firstItem.data) {
271
return {
272
output_type: 'error',
273
ename: '',
274
evalue: '',
275
traceback: []
276
};
277
}
278
const originalError: undefined | nbformat.IError = output.metadata?.originalError;
279
const value: Error = JSON.parse(textDecoder.decode(firstItem.data));
280
return {
281
output_type: 'error',
282
ename: value.name,
283
evalue: value.message,
284
// VS Code needs an `Error` object which requires a `stack` property as a string.
285
// Its possible the format could change when converting from `traceback` to `string` and back again to `string`
286
// When .NET stores errors in output (with their .NET kernel),
287
// stack is empty, hence store the message instead of stack (so that somethign gets displayed in ipynb).
288
traceback: originalError?.traceback || splitMultilineString(value.stack || value.message || '')
289
};
290
}
291
292
293
function getOutputStreamType(output: NotebookCellOutput): string | undefined {
294
if (output.items.length > 0) {
295
return output.items[0].mime === CellOutputMimeTypes.stderr ? 'stderr' : 'stdout';
296
}
297
298
return;
299
}
300
301
type JupyterOutput =
302
| nbformat.IUnrecognizedOutput
303
| nbformat.IExecuteResult
304
| nbformat.IDisplayData
305
| nbformat.IStream
306
| nbformat.IError;
307
308
function convertStreamOutput(output: NotebookCellOutput): JupyterOutput {
309
const outputs: string[] = [];
310
output.items
311
.filter((opit) => opit.mime === CellOutputMimeTypes.stderr || opit.mime === CellOutputMimeTypes.stdout)
312
.map((opit) => textDecoder.decode(opit.data))
313
.forEach(value => {
314
// Ensure each line is a separate entry in an array (ending with \n).
315
const lines = value.split('\n');
316
// If the last item in `outputs` is not empty and the first item in `lines` is not empty, then concate them.
317
// As they are part of the same line.
318
if (outputs.length && lines.length && lines[0].length > 0) {
319
outputs[outputs.length - 1] = `${outputs[outputs.length - 1]}${lines.shift()!}`;
320
}
321
for (const line of lines) {
322
outputs.push(line);
323
}
324
});
325
326
for (let index = 0; index < (outputs.length - 1); index++) {
327
outputs[index] = `${outputs[index]}\n`;
328
}
329
330
// Skip last one if empty (it's the only one that could be length 0)
331
if (outputs.length && outputs[outputs.length - 1].length === 0) {
332
outputs.pop();
333
}
334
335
const streamType = getOutputStreamType(output) || 'stdout';
336
337
return {
338
output_type: 'stream',
339
name: streamType,
340
text: outputs
341
};
342
}
343
344
function convertOutputMimeToJupyterOutput(mime: string, value: Uint8Array) {
345
if (!value) {
346
return '';
347
}
348
try {
349
if (mime === CellOutputMimeTypes.error) {
350
const stringValue = textDecoder.decode(value);
351
return JSON.parse(stringValue);
352
} else if (mime.startsWith('text/') || textMimeTypes.includes(mime)) {
353
const stringValue = textDecoder.decode(value);
354
return splitMultilineString(stringValue);
355
} else if (mime.startsWith('image/') && mime !== 'image/svg+xml') {
356
// Images in Jupyter are stored in base64 encoded format.
357
// VS Code expects bytes when rendering images.
358
if (typeof Buffer !== 'undefined' && typeof Buffer.from === 'function') {
359
return Buffer.from(value).toString('base64');
360
} else {
361
return btoa(value.reduce((s: string, b: number) => s + String.fromCharCode(b), ''));
362
}
363
} else if (mime.toLowerCase().includes('json')) {
364
const stringValue = textDecoder.decode(value);
365
return stringValue.length > 0 ? JSON.parse(stringValue) : stringValue;
366
} else if (mime === 'image/svg+xml') {
367
return splitMultilineString(textDecoder.decode(value));
368
} else {
369
return textDecoder.decode(value);
370
}
371
} catch (ex) {
372
return '';
373
}
374
}
375
376
export function createMarkdownCellFromNotebookCell(cell: NotebookCellData): nbformat.IMarkdownCell {
377
const cellMetadata = getCellMetadata({ cell });
378
const markdownCell: any = {
379
cell_type: 'markdown',
380
source: splitCellSourceIntoMultilineString(cell.value),
381
metadata: cellMetadata?.metadata || {} // This cannot be empty.
382
};
383
if (cellMetadata?.attachments) {
384
markdownCell.attachments = cellMetadata.attachments;
385
}
386
if (cellMetadata?.id) {
387
markdownCell.id = cellMetadata.id;
388
}
389
return markdownCell;
390
}
391
392
export function pruneCell(cell: nbformat.ICell): nbformat.ICell {
393
// Source is usually a single string on input. Convert back to an array
394
const result: nbformat.ICell = {
395
...cell,
396
source: splitMultilineString(cell.source)
397
};
398
399
// Remove outputs and execution_count from non code cells
400
if (result.cell_type !== 'code') {
401
delete (<any>result).outputs;
402
delete (<any>result).execution_count;
403
} else {
404
// Clean outputs from code cells
405
result.outputs = result.outputs ? (result.outputs as nbformat.IOutput[]).map(fixupOutput) : [];
406
}
407
408
return result;
409
}
410
const dummyStreamObj: nbformat.IStream = {
411
output_type: 'stream',
412
name: 'stdout',
413
text: ''
414
};
415
const dummyErrorObj: nbformat.IError = {
416
output_type: 'error',
417
ename: '',
418
evalue: '',
419
traceback: ['']
420
};
421
const dummyDisplayObj: nbformat.IDisplayData = {
422
output_type: 'display_data',
423
data: {},
424
metadata: {}
425
};
426
const dummyExecuteResultObj: nbformat.IExecuteResult = {
427
output_type: 'execute_result',
428
name: '',
429
execution_count: 0,
430
data: {},
431
metadata: {}
432
};
433
const AllowedCellOutputKeys = {
434
['stream']: new Set(Object.keys(dummyStreamObj)),
435
['error']: new Set(Object.keys(dummyErrorObj)),
436
['display_data']: new Set(Object.keys(dummyDisplayObj)),
437
['execute_result']: new Set(Object.keys(dummyExecuteResultObj))
438
};
439
440
function fixupOutput(output: nbformat.IOutput): nbformat.IOutput {
441
let allowedKeys: Set<string>;
442
switch (output.output_type) {
443
case 'stream':
444
case 'error':
445
case 'execute_result':
446
case 'display_data':
447
allowedKeys = AllowedCellOutputKeys[output.output_type];
448
break;
449
default:
450
return output;
451
}
452
const result = { ...output };
453
for (const k of Object.keys(output)) {
454
if (!allowedKeys.has(k)) {
455
delete result[k];
456
}
457
}
458
return result;
459
}
460
461
462
export function serializeNotebookToString(data: NotebookData): string {
463
const notebookContent = getNotebookMetadata(data);
464
// use the preferred language from document metadata or the first cell language as the notebook preferred cell language
465
const preferredCellLanguage = notebookContent.metadata?.language_info?.name ?? data.cells.find(cell => cell.kind === 2)?.languageId;
466
467
notebookContent.cells = data.cells
468
.map(cell => createJupyterCellFromNotebookCell(cell, preferredCellLanguage))
469
.map(pruneCell);
470
471
const indentAmount = data.metadata && 'indentAmount' in data.metadata && typeof data.metadata.indentAmount === 'string' ?
472
data.metadata.indentAmount :
473
' ';
474
475
return serializeNotebookToJSON(notebookContent, indentAmount);
476
}
477
function serializeNotebookToJSON(notebookContent: Partial<nbformat.INotebookContent>, indentAmount: string): string {
478
// ipynb always ends with a trailing new line (we add this so that SCMs do not show unnecessary changes, resulting from a missing trailing new line).
479
const sorted = sortObjectPropertiesRecursively(notebookContent);
480
481
return JSON.stringify(sorted, undefined, indentAmount) + '\n';
482
}
483
484
export function getNotebookMetadata(document: NotebookDocument | NotebookData) {
485
const existingContent: Partial<nbformat.INotebookContent> = document.metadata || {};
486
const notebookContent: Partial<nbformat.INotebookContent> = {};
487
notebookContent.cells = existingContent.cells || [];
488
notebookContent.nbformat = existingContent.nbformat || defaultNotebookFormat.major;
489
notebookContent.nbformat_minor = existingContent.nbformat_minor ?? defaultNotebookFormat.minor;
490
notebookContent.metadata = existingContent.metadata || {};
491
return notebookContent;
492
}
493
494