CoCalc -- serializers.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/ipynb/src/serializers.ts
³²⁹¹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import type * as nbformat from '@jupyterlab/nbformat';
7
import type { NotebookCell, NotebookCellData, NotebookCellOutput, NotebookData, NotebookDocument } from 'vscode';
8
import { CellOutputMetadata, type CellMetadata } from './common';
9
import { textMimeTypes, NotebookCellKindMarkup, CellOutputMimeTypes, defaultNotebookFormat } from './constants';
10

11
const textDecoder = new TextDecoder();
12

13
export function createJupyterCellFromNotebookCell(
14
	vscCell: NotebookCellData,
15
	preferredLanguage: string | undefined,
16
): nbformat.IRawCell | nbformat.IMarkdownCell | nbformat.ICodeCell {
17
	let cell: nbformat.IRawCell | nbformat.IMarkdownCell | nbformat.ICodeCell;
18
	if (vscCell.kind === NotebookCellKindMarkup) {
19
		cell = createMarkdownCellFromNotebookCell(vscCell);
20
	} else if (vscCell.languageId === 'raw') {
21
		cell = createRawCellFromNotebookCell(vscCell);
22
	} else {
23
		cell = createCodeCellFromNotebookCell(vscCell, preferredLanguage);
24
	}
25
	return cell;
26
}
27

28

29
/**
30
 * Sort the JSON to minimize unnecessary SCM changes.
31
 * Jupyter notbeooks/labs sorts the JSON keys in alphabetical order.
32
 * https://github.com/microsoft/vscode-python/issues/13155
33
 */
34
export function sortObjectPropertiesRecursively(obj: any): any {
35
	if (Array.isArray(obj)) {
36
		return obj.map(sortObjectPropertiesRecursively);
37
	}
38
	if (obj !== undefined && obj !== null && typeof obj === 'object' && Object.keys(obj).length > 0) {
39
		return (
40
			Object.keys(obj)
41
				.sort()
42
				.reduce<Record<string, any>>((sortedObj, prop) => {
43
					sortedObj[prop] = sortObjectPropertiesRecursively(obj[prop]);
44
					return sortedObj;
45
				}, {}) as any
46
		);
47
	}
48
	return obj;
49
}
50

51
export function getCellMetadata(options: { cell: NotebookCell | NotebookCellData } | { metadata?: { [key: string]: any } }): CellMetadata {
52
	if ('cell' in options) {
53
		const cell = options.cell;
54
		const metadata = {
55
			execution_count: null,
56
			// it contains the cell id, and the cell metadata, along with other nb cell metadata
57
			...(cell.metadata ?? {})
58
		} satisfies CellMetadata;
59
		if (cell.kind === NotebookCellKindMarkup) {
60
			delete (metadata as any).execution_count;
61
		}
62
		return metadata;
63
	} else {
64
		const cell = options;
65
		const metadata = {
66
			// it contains the cell id, and the cell metadata, along with other nb cell metadata
67
			...(cell.metadata ?? {})
68
		};
69

70
		return metadata as CellMetadata;
71
	}
72
}
73

74
export function getVSCodeCellLanguageId(metadata: CellMetadata): string | undefined {
75
	return metadata.metadata?.vscode?.languageId;
76
}
77
export function setVSCodeCellLanguageId(metadata: CellMetadata, languageId: string) {
78
	metadata.metadata = metadata.metadata || {};
79
	metadata.metadata.vscode = { languageId };
80
}
81
export function removeVSCodeCellLanguageId(metadata: CellMetadata) {
82
	if (metadata.metadata?.vscode) {
83
		delete metadata.metadata.vscode;
84
	}
85
}
86

87
function createCodeCellFromNotebookCell(cell: NotebookCellData, preferredLanguage: string | undefined): nbformat.ICodeCell {
88
	const cellMetadata: CellMetadata = JSON.parse(JSON.stringify(getCellMetadata({ cell })));
89
	cellMetadata.metadata = cellMetadata.metadata || {}; // This cannot be empty.
90
	if (cell.languageId !== preferredLanguage) {
91
		setVSCodeCellLanguageId(cellMetadata, cell.languageId);
92
	} else {
93
		// cell current language is the same as the preferred cell language in the document, flush the vscode custom language id metadata
94
		removeVSCodeCellLanguageId(cellMetadata);
95
	}
96

97
	const codeCell: nbformat.ICodeCell = {
98
		cell_type: 'code',
99
		// Metadata should always contain the execution_count.
100
		// When ever execution summary data changes we will update the metadata to contain the execution count.
101
		// Failing to do so means we have a problem.
102
		// Also do not read the value of executionSummary here, as its possible user reverted changes to metadata
103
		// & in that case execution summary could contain the data, but metadata will not.
104
		// In such cases we do not want to re-set the metadata with the value from execution summary (remember, user reverted that).
105
		execution_count: cellMetadata.execution_count ?? null,
106
		source: splitCellSourceIntoMultilineString(cell.value),
107
		outputs: (cell.outputs || []).map(translateCellDisplayOutput),
108
		metadata: cellMetadata.metadata
109
	};
110
	if (cellMetadata?.id) {
111
		codeCell.id = cellMetadata.id;
112
	}
113
	return codeCell;
114
}
115

116
function createRawCellFromNotebookCell(cell: NotebookCellData): nbformat.IRawCell {
117
	const cellMetadata = getCellMetadata({ cell });
118
	const rawCell: any = {
119
		cell_type: 'raw',
120
		source: splitCellSourceIntoMultilineString(cell.value),
121
		metadata: cellMetadata?.metadata || {} // This cannot be empty.
122
	};
123
	if (cellMetadata?.attachments) {
124
		rawCell.attachments = cellMetadata.attachments;
125
	}
126
	if (cellMetadata?.id) {
127
		rawCell.id = cellMetadata.id;
128
	}
129
	return rawCell;
130
}
131

132
/**
133
 * Splits the source of a cell into an array of strings, each representing a line.
134
 * Also normalizes line endings to use LF (`\n`) instead of CRLF (`\r\n`).
135
 * Same is done in deserializer as well.
136
 */
137
function splitCellSourceIntoMultilineString(source: string): string[] {
138
	return splitMultilineString(source.replace(/\r\n/g, '\n'));
139
}
140

141
function splitMultilineString(source: nbformat.MultilineString): string[] {
142
	if (Array.isArray(source)) {
143
		return source as string[];
144
	}
145
	const str = source.toString();
146
	if (str.length > 0) {
147
		// Each line should be a separate entry, but end with a \n if not last entry
148
		const arr = str.split('\n');
149
		return arr
150
			.map((s, i) => {
151
				if (i < arr.length - 1) {
152
					return `${s}\n`;
153
				}
154
				return s;
155
			})
156
			.filter(s => s.length > 0); // Skip last one if empty (it's the only one that could be length 0)
157
	}
158
	return [];
159
}
160

161
function translateCellDisplayOutput(output: NotebookCellOutput): JupyterOutput {
162
	const customMetadata = output.metadata as CellOutputMetadata | undefined;
163
	let result: JupyterOutput;
164
	// Possible some other extension added some output (do best effort to translate & save in ipynb).
165
	// In which case metadata might not contain `outputType`.
166
	const outputType = customMetadata?.outputType as nbformat.OutputType;
167
	switch (outputType) {
168
		case 'error': {
169
			result = translateCellErrorOutput(output);
170
			break;
171
		}
172
		case 'stream': {
173
			result = convertStreamOutput(output);
174
			break;
175
		}
176
		case 'display_data': {
177
			result = {
178
				output_type: 'display_data',
179
				data: output.items.reduce((prev: any, curr) => {
180
					prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);
181
					return prev;
182
				}, {}),
183
				metadata: customMetadata?.metadata || {} // This can never be undefined.
184
			};
185
			break;
186
		}
187
		case 'execute_result': {
188
			result = {
189
				output_type: 'execute_result',
190
				data: output.items.reduce((prev: any, curr) => {
191
					prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);
192
					return prev;
193
				}, {}),
194
				metadata: customMetadata?.metadata || {}, // This can never be undefined.
195
				execution_count:
196
					typeof customMetadata?.executionCount === 'number' ? customMetadata?.executionCount : null // This can never be undefined, only a number or `null`.
197
			};
198
			break;
199
		}
200
		case 'update_display_data': {
201
			result = {
202
				output_type: 'update_display_data',
203
				data: output.items.reduce((prev: any, curr) => {
204
					prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);
205
					return prev;
206
				}, {}),
207
				metadata: customMetadata?.metadata || {} // This can never be undefined.
208
			};
209
			break;
210
		}
211
		default: {
212
			const isError =
213
				output.items.length === 1 && output.items.every((item) => item.mime === CellOutputMimeTypes.error);
214
			const isStream = output.items.every(
215
				(item) => item.mime === CellOutputMimeTypes.stderr || item.mime === CellOutputMimeTypes.stdout
216
			);
217

218
			if (isError) {
219
				return translateCellErrorOutput(output);
220
			}
221

222
			// In the case of .NET & other kernels, we need to ensure we save ipynb correctly.
223
			// Hence if we have stream output, save the output as Jupyter `stream` else `display_data`
224
			// Unless we already know its an unknown output type.
225
			const outputType: nbformat.OutputType =
226
				<nbformat.OutputType>customMetadata?.outputType || (isStream ? 'stream' : 'display_data');
227
			let unknownOutput: nbformat.IUnrecognizedOutput | nbformat.IDisplayData | nbformat.IStream;
228
			if (outputType === 'stream') {
229
				// If saving as `stream` ensure the mandatory properties are set.
230
				unknownOutput = convertStreamOutput(output);
231
			} else if (outputType === 'display_data') {
232
				// If saving as `display_data` ensure the mandatory properties are set.
233
				const displayData: nbformat.IDisplayData = {
234
					data: {},
235
					metadata: {},
236
					output_type: 'display_data'
237
				};
238
				unknownOutput = displayData;
239
			} else {
240
				unknownOutput = {
241
					output_type: outputType
242
				};
243
			}
244
			if (customMetadata?.metadata) {
245
				unknownOutput.metadata = customMetadata.metadata;
246
			}
247
			if (output.items.length > 0) {
248
				unknownOutput.data = output.items.reduce((prev: any, curr) => {
249
					prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);
250
					return prev;
251
				}, {});
252
			}
253
			result = unknownOutput;
254
			break;
255
		}
256
	}
257

258
	// Account for transient data as well
259
	// `transient.display_id` is used to update cell output in other cells, at least thats one use case we know of.
260
	if (result && customMetadata && customMetadata.transient) {
261
		result.transient = customMetadata.transient;
262
	}
263
	return result;
264
}
265

266
function translateCellErrorOutput(output: NotebookCellOutput): nbformat.IError {
267
	// it should have at least one output item
268
	const firstItem = output.items[0];
269
	// Bug in VS Code.
270
	if (!firstItem.data) {
271
		return {
272
			output_type: 'error',
273
			ename: '',
274
			evalue: '',
275
			traceback: []
276
		};
277
	}
278
	const originalError: undefined | nbformat.IError = output.metadata?.originalError;
279
	const value: Error = JSON.parse(textDecoder.decode(firstItem.data));
280
	return {
281
		output_type: 'error',
282
		ename: value.name,
283
		evalue: value.message,
284
		// VS Code needs an `Error` object which requires a `stack` property as a string.
285
		// Its possible the format could change when converting from `traceback` to `string` and back again to `string`
286
		// When .NET stores errors in output (with their .NET kernel),
287
		// stack is empty, hence store the message instead of stack (so that somethign gets displayed in ipynb).
288
		traceback: originalError?.traceback || splitMultilineString(value.stack || value.message || '')
289
	};
290
}
291

292

293
function getOutputStreamType(output: NotebookCellOutput): string | undefined {
294
	if (output.items.length > 0) {
295
		return output.items[0].mime === CellOutputMimeTypes.stderr ? 'stderr' : 'stdout';
296
	}
297

298
	return;
299
}
300

301
type JupyterOutput =
302
	| nbformat.IUnrecognizedOutput
303
	| nbformat.IExecuteResult
304
	| nbformat.IDisplayData
305
	| nbformat.IStream
306
	| nbformat.IError;
307

308
function convertStreamOutput(output: NotebookCellOutput): JupyterOutput {
309
	const outputs: string[] = [];
310
	output.items
311
		.filter((opit) => opit.mime === CellOutputMimeTypes.stderr || opit.mime === CellOutputMimeTypes.stdout)
312
		.map((opit) => textDecoder.decode(opit.data))
313
		.forEach(value => {
314
			// Ensure each line is a separate entry in an array (ending with \n).
315
			const lines = value.split('\n');
316
			// If the last item in `outputs` is not empty and the first item in `lines` is not empty, then concate them.
317
			// As they are part of the same line.
318
			if (outputs.length && lines.length && lines[0].length > 0) {
319
				outputs[outputs.length - 1] = `${outputs[outputs.length - 1]}${lines.shift()!}`;
320
			}
321
			for (const line of lines) {
322
				outputs.push(line);
323
			}
324
		});
325

326
	for (let index = 0; index < (outputs.length - 1); index++) {
327
		outputs[index] = `${outputs[index]}\n`;
328
	}
329

330
	// Skip last one if empty (it's the only one that could be length 0)
331
	if (outputs.length && outputs[outputs.length - 1].length === 0) {
332
		outputs.pop();
333
	}
334

335
	const streamType = getOutputStreamType(output) || 'stdout';
336

337
	return {
338
		output_type: 'stream',
339
		name: streamType,
340
		text: outputs
341
	};
342
}
343

344
function convertOutputMimeToJupyterOutput(mime: string, value: Uint8Array) {
345
	if (!value) {
346
		return '';
347
	}
348
	try {
349
		if (mime === CellOutputMimeTypes.error) {
350
			const stringValue = textDecoder.decode(value);
351
			return JSON.parse(stringValue);
352
		} else if (mime.startsWith('text/') || textMimeTypes.includes(mime)) {
353
			const stringValue = textDecoder.decode(value);
354
			return splitMultilineString(stringValue);
355
		} else if (mime.startsWith('image/') && mime !== 'image/svg+xml') {
356
			// Images in Jupyter are stored in base64 encoded format.
357
			// VS Code expects bytes when rendering images.
358
			if (typeof Buffer !== 'undefined' && typeof Buffer.from === 'function') {
359
				return Buffer.from(value).toString('base64');
360
			} else {
361
				return btoa(value.reduce((s: string, b: number) => s + String.fromCharCode(b), ''));
362
			}
363
		} else if (mime.toLowerCase().includes('json')) {
364
			const stringValue = textDecoder.decode(value);
365
			return stringValue.length > 0 ? JSON.parse(stringValue) : stringValue;
366
		} else if (mime === 'image/svg+xml') {
367
			return splitMultilineString(textDecoder.decode(value));
368
		} else {
369
			return textDecoder.decode(value);
370
		}
371
	} catch (ex) {
372
		return '';
373
	}
374
}
375

376
export function createMarkdownCellFromNotebookCell(cell: NotebookCellData): nbformat.IMarkdownCell {
377
	const cellMetadata = getCellMetadata({ cell });
378
	const markdownCell: any = {
379
		cell_type: 'markdown',
380
		source: splitCellSourceIntoMultilineString(cell.value),
381
		metadata: cellMetadata?.metadata || {} // This cannot be empty.
382
	};
383
	if (cellMetadata?.attachments) {
384
		markdownCell.attachments = cellMetadata.attachments;
385
	}
386
	if (cellMetadata?.id) {
387
		markdownCell.id = cellMetadata.id;
388
	}
389
	return markdownCell;
390
}
391

392
export function pruneCell(cell: nbformat.ICell): nbformat.ICell {
393
	// Source is usually a single string on input. Convert back to an array
394
	const result: nbformat.ICell = {
395
		...cell,
396
		source: splitMultilineString(cell.source)
397
	};
398

399
	// Remove outputs and execution_count from non code cells
400
	if (result.cell_type !== 'code') {
401
		delete (<any>result).outputs;
402
		delete (<any>result).execution_count;
403
	} else {
404
		// Clean outputs from code cells
405
		result.outputs = result.outputs ? (result.outputs as nbformat.IOutput[]).map(fixupOutput) : [];
406
	}
407

408
	return result;
409
}
410
const dummyStreamObj: nbformat.IStream = {
411
	output_type: 'stream',
412
	name: 'stdout',
413
	text: ''
414
};
415
const dummyErrorObj: nbformat.IError = {
416
	output_type: 'error',
417
	ename: '',
418
	evalue: '',
419
	traceback: ['']
420
};
421
const dummyDisplayObj: nbformat.IDisplayData = {
422
	output_type: 'display_data',
423
	data: {},
424
	metadata: {}
425
};
426
const dummyExecuteResultObj: nbformat.IExecuteResult = {
427
	output_type: 'execute_result',
428
	name: '',
429
	execution_count: 0,
430
	data: {},
431
	metadata: {}
432
};
433
const AllowedCellOutputKeys = {
434
	['stream']: new Set(Object.keys(dummyStreamObj)),
435
	['error']: new Set(Object.keys(dummyErrorObj)),
436
	['display_data']: new Set(Object.keys(dummyDisplayObj)),
437
	['execute_result']: new Set(Object.keys(dummyExecuteResultObj))
438
};
439

440
function fixupOutput(output: nbformat.IOutput): nbformat.IOutput {
441
	let allowedKeys: Set<string>;
442
	switch (output.output_type) {
443
		case 'stream':
444
		case 'error':
445
		case 'execute_result':
446
		case 'display_data':
447
			allowedKeys = AllowedCellOutputKeys[output.output_type];
448
			break;
449
		default:
450
			return output;
451
	}
452
	const result = { ...output };
453
	for (const k of Object.keys(output)) {
454
		if (!allowedKeys.has(k)) {
455
			delete result[k];
456
		}
457
	}
458
	return result;
459
}
460

461

462
export function serializeNotebookToString(data: NotebookData): string {
463
	const notebookContent = getNotebookMetadata(data);
464
	// use the preferred language from document metadata or the first cell language as the notebook preferred cell language
465
	const preferredCellLanguage = notebookContent.metadata?.language_info?.name ?? data.cells.find(cell => cell.kind === 2)?.languageId;
466

467
	notebookContent.cells = data.cells
468
		.map(cell => createJupyterCellFromNotebookCell(cell, preferredCellLanguage))
469
		.map(pruneCell);
470

471
	const indentAmount = data.metadata && 'indentAmount' in data.metadata && typeof data.metadata.indentAmount === 'string' ?
472
		data.metadata.indentAmount :
473
		' ';
474

475
	return serializeNotebookToJSON(notebookContent, indentAmount);
476
}
477
function serializeNotebookToJSON(notebookContent: Partial<nbformat.INotebookContent>, indentAmount: string): string {
478
	// ipynb always ends with a trailing new line (we add this so that SCMs do not show unnecessary changes, resulting from a missing trailing new line).
479
	const sorted = sortObjectPropertiesRecursively(notebookContent);
480

481
	return JSON.stringify(sorted, undefined, indentAmount) + '\n';
482
}
483

484
export function getNotebookMetadata(document: NotebookDocument | NotebookData) {
485
	const existingContent: Partial<nbformat.INotebookContent> = document.metadata || {};
486
	const notebookContent: Partial<nbformat.INotebookContent> = {};
487
	notebookContent.cells = existingContent.cells || [];
488
	notebookContent.nbformat = existingContent.nbformat || defaultNotebookFormat.major;
489
	notebookContent.nbformat_minor = existingContent.nbformat_minor ?? defaultNotebookFormat.minor;
490
	notebookContent.metadata = existingContent.metadata || {};
491
	return notebookContent;
492
}
493

494
Product

Resources

Company