Path: blob/main/extensions/copilot/test/simulation/fixtures/edit/issue-6059/serializers.ts
13405 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import type * as nbformat from '@jupyterlab/nbformat';6import { NotebookCell, NotebookCellData, NotebookCellKind, NotebookCellOutput } from 'vscode';7import { CellOutputMetadata, useCustomPropertyInMetadata, type CellMetadata } from './common';8import { textMimeTypes } from './deserializers';910const textDecoder = new TextDecoder();1112enum CellOutputMimeTypes {13error = 'application/vnd.code.notebook.error',14stderr = 'application/vnd.code.notebook.stderr',15stdout = 'application/vnd.code.notebook.stdout'16}1718export function createJupyterCellFromNotebookCell(19vscCell: NotebookCellData,20preferredLanguage: string | undefined21): nbformat.IRawCell | nbformat.IMarkdownCell | nbformat.ICodeCell {22let cell: nbformat.IRawCell | nbformat.IMarkdownCell | nbformat.ICodeCell;23if (vscCell.kind === NotebookCellKind.Markup) {24cell = createMarkdownCellFromNotebookCell(vscCell);25} else if (vscCell.languageId === 'raw') {26cell = createRawCellFromNotebookCell(vscCell);27} else {28cell = createCodeCellFromNotebookCell(vscCell, preferredLanguage);29}30return cell;31}323334/**35* Sort the JSON to minimize unnecessary SCM changes.36* Jupyter notbeooks/labs sorts the JSON keys in alphabetical order.37* https://github.com/microsoft/vscode-python/issues/1315538*/39export function sortObjectPropertiesRecursively(obj: any): any {40if (Array.isArray(obj)) {41return obj.map(sortObjectPropertiesRecursively);42}43if (obj !== undefined && obj !== null && typeof obj === 'object' && Object.keys(obj).length > 0) {44return (45Object.keys(obj)46.sort()47.reduce<Record<string, any>>((sortedObj, prop) => {48sortedObj[prop] = sortObjectPropertiesRecursively(obj[prop]);49return sortedObj;50}, {}) as any51);52}53return obj;54}5556export function getCellMetadata(options: { cell: NotebookCell | NotebookCellData } | { metadata?: { [key: string]: any } }): CellMetadata {57if ('cell' in options) {58const cell = options.cell;59if (useCustomPropertyInMetadata()) {60const metadata: CellMetadata = {61// it contains the cell id, and the cell metadata, along with other nb cell metadata62...(cell.metadata?.custom ?? {})63};64// promote the cell attachments to the top level65const attachments = cell.metadata?.custom?.attachments ?? cell.metadata?.attachments;66if (attachments) {67metadata.attachments = attachments;68}69return metadata;70}71const metadata = {72// it contains the cell id, and the cell metadata, along with other nb cell metadata73...(cell.metadata ?? {})74};7576return metadata;77} else {78const cell = options;79if (useCustomPropertyInMetadata()) {80const metadata: CellMetadata = {81// it contains the cell id, and the cell metadata, along with other nb cell metadata82...(cell.metadata?.custom ?? {})83};84// promote the cell attachments to the top level85const attachments = cell.metadata?.custom?.attachments ?? cell.metadata?.attachments;86if (attachments) {87metadata.attachments = attachments;88}89return metadata;90}91const metadata = {92// it contains the cell id, and the cell metadata, along with other nb cell metadata93...(cell.metadata ?? {})94};9596return metadata;97}98}99100export function getVSCodeCellLanguageId(metadata: CellMetadata): string | undefined {101return metadata.metadata?.vscode?.languageId;102}103export function setVSCodeCellLanguageId(metadata: CellMetadata, languageId: string) {104metadata.metadata = metadata.metadata || {};105metadata.metadata.vscode = { languageId };106}107export function removeVSCodeCellLanguageId(metadata: CellMetadata) {108if (metadata.metadata?.vscode) {109delete metadata.metadata.vscode;110}111}112113function createCodeCellFromNotebookCell(cell: NotebookCellData, preferredLanguage: string | undefined): nbformat.ICodeCell {114const cellMetadata: CellMetadata = JSON.parse(JSON.stringify(getCellMetadata({ cell })));115cellMetadata.metadata = cellMetadata.metadata || {}; // This cannot be empty.116if (cell.languageId !== preferredLanguage) {117setVSCodeCellLanguageId(cellMetadata, cell.languageId);118} else {119// cell current language is the same as the preferred cell language in the document, flush the vscode custom language id metadata120removeVSCodeCellLanguageId(cellMetadata);121}122123const codeCell: any = {124cell_type: 'code',125// Possible the metadata was edited as part of diff view126// In diff view we display execution_count as part of metadata, hence when execution count changes in metadata,127// We need to change that here as well, i.e. give preference to any execution_count value in metadata.128execution_count: cellMetadata.execution_count ?? cell.executionSummary?.executionOrder ?? null,129source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),130outputs: (cell.outputs || []).map(translateCellDisplayOutput),131metadata: cellMetadata.metadata132};133if (cellMetadata?.id) {134codeCell.id = cellMetadata.id;135}136return codeCell;137}138139function createRawCellFromNotebookCell(cell: NotebookCellData): nbformat.IRawCell {140const cellMetadata = getCellMetadata({ cell });141const rawCell: any = {142cell_type: 'raw',143source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),144metadata: cellMetadata?.metadata || {} // This cannot be empty.145};146if (cellMetadata?.attachments) {147rawCell.attachments = cellMetadata.attachments;148}149if (cellMetadata?.id) {150rawCell.id = cellMetadata.id;151}152return rawCell;153}154155function splitMultilineString(source: nbformat.MultilineString): string[] {156if (Array.isArray(source)) {157return source as string[];158}159const str = source.toString();160if (str.length > 0) {161// Each line should be a separate entry, but end with a \n if not last entry162const arr = str.split('\n');163return arr164.map((s, i) => {165if (i < arr.length - 1) {166return `${s}\n`;167}168return s;169})170.filter(s => s.length > 0); // Skip last one if empty (it's the only one that could be length 0)171}172return [];173}174175function translateCellDisplayOutput(output: NotebookCellOutput): JupyterOutput {176const customMetadata = output.metadata as CellOutputMetadata | undefined;177let result: JupyterOutput;178// Possible some other extension added some output (do best effort to translate & save in ipynb).179// In which case metadata might not contain `outputType`.180const outputType = customMetadata?.outputType as nbformat.OutputType;181switch (outputType) {182case 'error': {183result = translateCellErrorOutput(output);184break;185}186case 'stream': {187result = convertStreamOutput(output);188break;189}190case 'display_data': {191result = {192output_type: 'display_data',193data: output.items.reduce((prev: any, curr) => {194prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);195return prev;196}, {}),197metadata: customMetadata?.metadata || {} // This can never be undefined.198};199break;200}201case 'execute_result': {202result = {203output_type: 'execute_result',204data: output.items.reduce((prev: any, curr) => {205prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);206return prev;207}, {}),208metadata: customMetadata?.metadata || {}, // This can never be undefined.209execution_count:210typeof customMetadata?.executionCount === 'number' ? customMetadata?.executionCount : null // This can never be undefined, only a number or `null`.211};212break;213}214case 'update_display_data': {215result = {216output_type: 'update_display_data',217data: output.items.reduce((prev: any, curr) => {218prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);219return prev;220}, {}),221metadata: customMetadata?.metadata || {} // This can never be undefined.222};223break;224}225default: {226const isError =227output.items.length === 1 && output.items.every((item) => item.mime === CellOutputMimeTypes.error);228const isStream = output.items.every(229(item) => item.mime === CellOutputMimeTypes.stderr || item.mime === CellOutputMimeTypes.stdout230);231232if (isError) {233return translateCellErrorOutput(output);234}235236// In the case of .NET & other kernels, we need to ensure we save ipynb correctly.237// Hence if we have stream output, save the output as Jupyter `stream` else `display_data`238// Unless we already know its an unknown output type.239const outputType: nbformat.OutputType =240<nbformat.OutputType>customMetadata?.outputType || (isStream ? 'stream' : 'display_data');241let unknownOutput: nbformat.IUnrecognizedOutput | nbformat.IDisplayData | nbformat.IStream;242if (outputType === 'stream') {243// If saving as `stream` ensure the mandatory properties are set.244unknownOutput = convertStreamOutput(output);245} else if (outputType === 'display_data') {246// If saving as `display_data` ensure the mandatory properties are set.247const displayData: nbformat.IDisplayData = {248data: {},249metadata: {},250output_type: 'display_data'251};252unknownOutput = displayData;253} else {254unknownOutput = {255output_type: outputType256};257}258if (customMetadata?.metadata) {259unknownOutput.metadata = customMetadata.metadata;260}261if (output.items.length > 0) {262unknownOutput.data = output.items.reduce((prev: any, curr) => {263prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array);264return prev;265}, {});266}267result = unknownOutput;268break;269}270}271272// Account for transient data as well273// `transient.display_id` is used to update cell output in other cells, at least thats one use case we know of.274if (result && customMetadata && customMetadata.transient) {275result.transient = customMetadata.transient;276}277return result;278}279280function translateCellErrorOutput(output: NotebookCellOutput): nbformat.IError {281// it should have at least one output item282const firstItem = output.items[0];283// Bug in VS Code.284if (!firstItem.data) {285return {286output_type: 'error',287ename: '',288evalue: '',289traceback: []290};291}292const originalError: undefined | nbformat.IError = output.metadata?.originalError;293const value: Error = JSON.parse(textDecoder.decode(firstItem.data));294return {295output_type: 'error',296ename: value.name,297evalue: value.message,298// VS Code needs an `Error` object which requires a `stack` property as a string.299// Its possible the format could change when converting from `traceback` to `string` and back again to `string`300// When .NET stores errors in output (with their .NET kernel),301// stack is empty, hence store the message instead of stack (so that somethign gets displayed in ipynb).302traceback: originalError?.traceback || splitMultilineString(value.stack || value.message || '')303};304}305306307function getOutputStreamType(output: NotebookCellOutput): string | undefined {308if (output.items.length > 0) {309return output.items[0].mime === CellOutputMimeTypes.stderr ? 'stderr' : 'stdout';310}311312return;313}314315type JupyterOutput =316| nbformat.IUnrecognizedOutput317| nbformat.IExecuteResult318| nbformat.IDisplayData319| nbformat.IStream320| nbformat.IError;321322function convertStreamOutput(output: NotebookCellOutput): JupyterOutput {323const outputs: string[] = [];324output.items325.filter((opit) => opit.mime === CellOutputMimeTypes.stderr || opit.mime === CellOutputMimeTypes.stdout)326.map((opit) => textDecoder.decode(opit.data))327.forEach(value => {328// Ensure each line is a separate entry in an array (ending with \n).329const lines = value.split('\n');330// If the last item in `outputs` is not empty and the first item in `lines` is not empty, then concate them.331// As they are part of the same line.332if (outputs.length && lines.length && lines[0].length > 0) {333outputs[outputs.length - 1] = `${outputs[outputs.length - 1]}${lines.shift()!}`;334}335for (const line of lines) {336outputs.push(line);337}338});339340for (let index = 0; index < (outputs.length - 1); index++) {341outputs[index] = `${outputs[index]}\n`;342}343344// Skip last one if empty (it's the only one that could be length 0)345if (outputs.length && outputs[outputs.length - 1].length === 0) {346outputs.pop();347}348349const streamType = getOutputStreamType(output) || 'stdout';350351return {352output_type: 'stream',353name: streamType,354text: outputs355};356}357358function convertOutputMimeToJupyterOutput(mime: string, value: Uint8Array) {359if (!value) {360return '';361}362try {363if (mime === CellOutputMimeTypes.error) {364const stringValue = textDecoder.decode(value);365return JSON.parse(stringValue);366} else if (mime.startsWith('text/') || textMimeTypes.includes(mime)) {367const stringValue = textDecoder.decode(value);368return splitMultilineString(stringValue);369} else if (mime.startsWith('image/') && mime !== 'image/svg+xml') {370// Images in Jupyter are stored in base64 encoded format.371// VS Code expects bytes when rendering images.372if (typeof Buffer !== 'undefined' && typeof Buffer.from === 'function') {373return Buffer.from(value).toString('base64');374} else {375return btoa(value.reduce((s: string, b: number) => s + String.fromCharCode(b), ''));376}377} else if (mime.toLowerCase().includes('json')) {378const stringValue = textDecoder.decode(value);379return stringValue.length > 0 ? JSON.parse(stringValue) : stringValue;380} else if (mime === 'image/svg+xml') {381return splitMultilineString(textDecoder.decode(value));382} else {383return textDecoder.decode(value);384}385} catch (ex) {386return '';387}388}389390export function createMarkdownCellFromNotebookCell(cell: NotebookCellData): nbformat.IMarkdownCell {391const cellMetadata = getCellMetadata({ cell });392const markdownCell: any = {393cell_type: 'markdown',394source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),395metadata: cellMetadata?.metadata || {} // This cannot be empty.396};397if (cellMetadata?.attachments) {398markdownCell.attachments = cellMetadata.attachments;399}400if (cellMetadata?.id) {401markdownCell.id = cellMetadata.id;402}403return markdownCell;404}405406export function pruneCell(cell: nbformat.ICell): nbformat.ICell {407// Source is usually a single string on input. Convert back to an array408const result = {409...cell,410source: splitMultilineString(cell.source)411} as nbformat.ICell;412413// Remove outputs and execution_count from non code cells414if (result.cell_type !== 'code') {415delete (<any>result).outputs;416delete (<any>result).execution_count;417} else {418// Clean outputs from code cells419result.outputs = result.outputs ? (result.outputs as nbformat.IOutput[]).map(fixupOutput) : [];420}421422return result;423}424const dummyStreamObj: nbformat.IStream = {425output_type: 'stream',426name: 'stdout',427text: ''428};429const dummyErrorObj: nbformat.IError = {430output_type: 'error',431ename: '',432evalue: '',433traceback: ['']434};435const dummyDisplayObj: nbformat.IDisplayData = {436output_type: 'display_data',437data: {},438metadata: {}439};440const dummyExecuteResultObj: nbformat.IExecuteResult = {441output_type: 'execute_result',442name: '',443execution_count: 0,444data: {},445metadata: {}446};447const AllowedCellOutputKeys = {448['stream']: new Set(Object.keys(dummyStreamObj)),449['error']: new Set(Object.keys(dummyErrorObj)),450['display_data']: new Set(Object.keys(dummyDisplayObj)),451['execute_result']: new Set(Object.keys(dummyExecuteResultObj))452};453454function fixupOutput(output: nbformat.IOutput): nbformat.IOutput {455let allowedKeys: Set<string>;456switch (output.output_type) {457case 'stream':458case 'error':459case 'execute_result':460case 'display_data':461allowedKeys = AllowedCellOutputKeys[output.output_type];462break;463default:464return output;465}466const result = { ...output };467for (const k of Object.keys(output)) {468if (!allowedKeys.has(k)) {469delete result[k];470}471}472return result;473}474475476