Path: blob/master/src/packages/jupyter/ipynb/export-to-ipynb.ts
6021 views
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45/*6Exporting from our in-memory sync-friendly format to ipynb7*/89import { CellType } from "@cocalc/util/jupyter/types";10import { deep_copy, filename_extension, keys } from "@cocalc/util/misc";1112type Tags = { [key: string]: boolean };1314interface Cell {15cell_type?: CellType;16input?: string;17collapsed?: boolean;18scrolled?: boolean;19slide?;20attachments?;21tags?: Tags;22output?: { [n: string]: OutputMessage };23metadata?: Metadata;24exec_count?: number;25}2627type OutputMessage = any;2829interface Metadata {30collapsed?: boolean;31scrolled?: boolean;32cocalc?: {33outputs: { [n: string]: any };34};35slideshow?;36tags?: string[];37}3839export interface IPynbCell {40id: string;41cell_type: CellType;42source?: string[];43metadata?: Metadata;44execution_count?: number;45outputs?: OutputMessage[];46}4748interface BlobStore {49getBase64: (sha1: string) => string | null | undefined | void;50getString: (sha1: string) => string | null | undefined | void;51}5253interface Options {54// list of id's fo the cells in the correct order55cell_list: string[];56// actual data of the cells57cells: { [id: string]: Cell };58// custom metadata only59metadata?;60// official jupyter will give an error on load without properly giving this (and ask to select a kernel)61kernelspec?: object;62language_info?: object;63blob_store?: BlobStore;64// optional map id --> list of additional output messages to replace last output message.65more_output?: { [id: string]: OutputMessage[] };66}6768// **WARNING: any input to export_to_ipynb function may be MUTATED!**69export function export_to_ipynb(opts: Options) {70if (opts.kernelspec == null) {71opts.kernelspec = {};72}73const ipynb = {74cells: opts.cell_list.map((id: string) => cell_to_ipynb(id, opts)),75metadata: opts.metadata ?? {},76nbformat: 4,77nbformat_minor: 4,78};7980ipynb.metadata.kernelspec = opts.kernelspec;81if (opts.language_info != null) {82ipynb.metadata.language_info = opts.language_info;83}8485return ipynb;86}8788// Return ipynb version of the given cell as object89function cell_to_ipynb(id: string, opts: Options) {90const cell = opts.cells[id];91const metadata: Metadata = {};92const obj = {93id,94cell_type: cell.cell_type ?? "code",95source: diff_friendly(cell.input ?? ""),96metadata,97} as IPynbCell;9899// Handle any extra metadata (mostly user defined) that we don't100// handle in a special way for efficiency reasons.101const other_metadata = cell.metadata;102if (other_metadata != null) {103processOtherMetadata(obj, other_metadata);104}105106// consistenty with jupyter -- they explicitly give collapsed true or false state no matter what107metadata.collapsed = !!cell.collapsed;108109// Jupyter only gives scrolled state when true.110if (cell.scrolled) {111metadata.scrolled = true;112}113114const exec_count = cell.exec_count ?? 0;115if (obj.cell_type === "code") {116obj.execution_count = exec_count;117}118119processSlides(obj, cell.slide);120processAttachments(obj, cell.attachments);121processTags(obj, cell.tags);122123if (obj.cell_type !== "code") {124// Code is the only cell type that is allowed to have an outputs field.125return obj;126}127128const output = cell.output;129if (output != null) {130obj.outputs = ipynbOutputs({131output,132exec_count,133more_output: opts.more_output?.[id],134blob_store: opts.blob_store,135});136} else if (obj.outputs == null && obj.cell_type === "code") {137obj.outputs = []; // annoying requirement of ipynb file format.138}139for (const n in obj.outputs) {140const x = obj.outputs[n];141if (x.cocalc != null) {142// alternative version of cell that official Jupyter doesn't support can only143// stored in the **cell-level** metadata, not output.144if (metadata.cocalc == null) {145metadata.cocalc = { outputs: {} };146}147metadata.cocalc.outputs[n] = x.cocalc;148delete x.cocalc;149}150}151return obj;152}153154function processSlides(obj, slide?) {155if (slide != null) {156obj.metadata.slideshow = { slide_type: slide };157}158}159160function processTags(obj, tags?: Tags) {161if (tags != null) {162// we store tags internally as a map (for easy163// efficient add/remove), but .ipynb uses a list.164obj.metadata.tags = keys(tags).sort();165}166}167168function processOtherMetadata(obj, other_metadata) {169if (other_metadata != null) {170Object.assign(obj.metadata, other_metadata);171}172}173174function processAttachments(obj, attachments) {175if (attachments == null) {176// don't have to or can't do anything (https://github.com/sagemathinc/cocalc/issues/4272)177return;178}179obj.attachments = {};180for (const name in attachments) {181const val = attachments[name];182if (val.type !== "base64") {183// we only handle this now184return;185}186let ext = filename_extension(name);187if (ext === "jpg") {188ext = "jpeg";189}190obj.attachments[name] = { [`image/${ext}`]: val.value };191}192}193194function ipynbOutputs({195output,196exec_count,197more_output,198blob_store,199}: {200output: { [n: string]: OutputMessage };201exec_count: number;202more_output?: OutputMessage[];203blob_store?: BlobStore;204}) {205// If the last message has the more_output field, then there may be206// more output messages stored, which are not in the cells object.207let len = objArrayLength(output);208if (output[`${len - 1}`].more_output != null) {209let n: number = len - 1;210const cnt = more_output?.length ?? 0;211if (cnt === 0 || more_output == null) {212// For some reason more output is not available for this cell. So we replace213// the more_output message by an error explaining what happened.214output[`${n}`] = {215text: "WARNING: Some output was deleted.\n",216name: "stderr",217};218} else {219// Indeed, the last message has the more_output field.220// Before converting to ipynb, we remove that last message...221delete output[`${n}`];222// Then we put in the known more output.223for (const mesg of more_output) {224output[`${n}`] = mesg;225n += 1;226}227}228}229// Now, everything continues as normal.230231const outputs: OutputMessage[] = [];232len = objArrayLength(output);233if (output != null && len > 0) {234for (let n = 0; n < len; n++) {235const output_n = output?.[`${n}`];236if (output_n != null) {237processOutputN(output_n, exec_count, blob_store);238outputs.push(output_n);239}240}241}242return outputs;243}244245function objArrayLength(objArray) {246if (objArray == null) {247return 0;248}249let n = -1;250for (const k in objArray) {251const j = parseInt(k);252if (j > n) {253n = j;254}255}256return n + 1;257}258259function processOutputN(260output_n: OutputMessage,261exec_count: number,262blob_store?: BlobStore,263) {264if (output_n == null) {265return;266}267if (output_n.exec_count != null) {268delete output_n.exec_count;269}270if (output_n.text != null) {271output_n.text = diff_friendly(output_n.text);272}273if (output_n.data != null) {274for (let k in output_n.data) {275const v = output_n.data[k];276if (k.slice(0, 5) === "text/") {277output_n.data[k] = diff_friendly(output_n.data[k]);278}279if (k.startsWith("image/") || k === "application/pdf" || k === "iframe") {280if (blob_store != null) {281let value;282if (k === "iframe") {283delete output_n.data[k];284k = "text/html";285value = blob_store.getString(v);286} else {287value = blob_store.getBase64(v);288}289if (value == null) {290// The image is no longer known; this could happen if the user reverts in the history291// browser and there is an image in the output that was not saved in the latest version.292// TODO: instead return an error.293return;294}295output_n.data[k] = value;296} else {297return; // impossible to include in the output without blob_store298}299}300}301output_n.output_type = "execute_result";302if (output_n.metadata == null) {303output_n.metadata = {};304}305output_n.execution_count = exec_count;306} else if (output_n.name != null) {307output_n.output_type = "stream";308if (output_n.name === "input") {309processStdinOutput(output_n);310}311} else if (output_n.ename != null) {312output_n.output_type = "error";313}314}315316function processStdinOutput(output) {317output.cocalc = deep_copy(output);318output.name = "stdout";319output.text = output.opts.prompt + " " + (output.value ?? "");320delete output.opts;321delete output.value;322}323324// Transform a string s with newlines into an array v of strings325// such that v.join('') == s.326function diff_friendly(327s: string | string[] | undefined | null,328): string[] | undefined | null {329if (typeof s !== "string") {330// might already be an array or undefined.331if (s == null) {332return undefined;333}334return s;335}336const v = s.split("\n");337for (let i = 0; i < v.length - 1; i++) {338v[i] += "\n";339}340if (v[v.length - 1] === "") {341v.pop(); // remove last elt342}343return v;344}345346347