Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
quarto-dev
GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/src/command/convert/jupyter.ts
3583 views
1
/*
2
* jupyter.ts
3
*
4
* Copyright (C) 2020-2022 Posit Software, PBC
5
*/
6
7
import { stringify } from "../../core/yaml.ts";
8
9
import {
10
partitionYamlFrontMatter,
11
readYamlFromMarkdown,
12
} from "../../core/yaml.ts";
13
import { kCellId, kCellLabel } from "../../config/constants.ts";
14
import { JupyterCell, JupyterCellOptions } from "../../core/jupyter/types.ts";
15
import {
16
jupyterAutoIdentifier,
17
jupyterCellOptionsAsComment,
18
jupyterCellWithOptions,
19
jupyterFromFile,
20
mdEnsureTrailingNewline,
21
mdFromContentCell,
22
mdFromRawCell,
23
quartoMdToJupyter,
24
} from "../../core/jupyter/jupyter.ts";
25
import { partitionCellOptions } from "../../core/lib/partition-cell-options.ts";
26
import { Metadata } from "../../config/types.ts";
27
import { jupyterKernelspec } from "../../core/jupyter/kernels.ts";
28
import { fixupFrontMatter } from "../../core/jupyter/jupyter-fixups.ts";
29
import {
30
jupyterCellSrcAsLines,
31
jupyterCellSrcAsStr,
32
} from "../../core/jupyter/jupyter-shared.ts";
33
import { assert } from "testing/asserts";
34
import { getEndingNewlineCount } from "../../core/lib/text.ts";
35
36
export async function markdownToJupyterNotebook(
37
file: string,
38
includeIds: boolean,
39
) {
40
const markdown = Deno.readTextFileSync(file);
41
const notebook = await quartoMdToJupyter(markdown, includeIds);
42
return JSON.stringify(notebook, null, 2);
43
}
44
45
export async function jupyterNotebookToMarkdown(
46
file: string,
47
includeIds: boolean,
48
) {
49
// read notebook & alias kernelspec
50
const notebook = fixupFrontMatter(jupyterFromFile(file));
51
let kernelspec = notebook.metadata.kernelspec;
52
53
// https://github.com/quarto-dev/quarto-cli/issues/12374
54
// narrow fix for .ipynbs that have a language_info field but no kernelspec.language
55
if (
56
kernelspec.language === undefined && notebook.metadata.language_info?.name
57
) {
58
kernelspec = {
59
...kernelspec,
60
language: notebook.metadata.language_info?.name,
61
};
62
}
63
if (kernelspec.language === undefined) {
64
throw new Error(
65
"No language found in kernelspec for notebook " + file,
66
);
67
}
68
69
// generate markdown
70
const md: string[] = [];
71
72
let frontMatter: string | undefined;
73
for (let i = 0; i < notebook.cells.length; i++) {
74
{
75
// alias cell
76
const cell = notebook.cells[i];
77
78
const cellWithOptions = jupyterCellWithOptions(
79
i,
80
kernelspec.language,
81
cell,
82
);
83
84
const endingNewLineCount = getEndingNewlineCount(md);
85
if (i > 0 && endingNewLineCount < 2) {
86
md.push("\n\n");
87
}
88
89
// write markdown
90
switch (cell.cell_type) {
91
case "markdown":
92
// does the previous line have enough newlines?
93
// if not, add sufficient newlines so we have at least two
94
// between the last cell and this one
95
md.push(...mdFromContentCell(cellWithOptions));
96
break;
97
case "raw":
98
// see if this is the front matter
99
if (frontMatter === undefined) {
100
const { yaml: cellYaml, markdown: cellMarkdown } =
101
partitionYamlFrontMatter(
102
jupyterCellSrcAsStr(cell),
103
) || {};
104
if (cellYaml) {
105
frontMatter = cellYaml;
106
}
107
if (cellMarkdown) {
108
md.push(cellMarkdown);
109
}
110
} else {
111
md.push(...mdFromRawCell(cellWithOptions));
112
}
113
114
break;
115
case "code":
116
md.push(
117
...(await mdFromCodeCell(
118
kernelspec.language.toLowerCase(),
119
cell,
120
includeIds,
121
)),
122
);
123
break;
124
default:
125
throw new Error("Unexpected cell type " + cell.cell_type);
126
}
127
128
// if we didn't capture frontMatter then add a newline
129
if (i > 0 || !frontMatter) {
130
md.push("\n");
131
}
132
}
133
}
134
135
// join into source
136
const mdSource = md.join("");
137
138
// read any yaml front matter defined in a 'raw' cell
139
const yaml: Metadata = frontMatter ? readYamlFromMarkdown(frontMatter) : {};
140
141
// forward qmd-relevant notebook metadata. known metadata we exclude:
142
// - toc: Jupyter UI specific metadata
143
// - language_info: Jupyter UI specific metadata
144
// - widgets: Output artifacts that don't belong in qmd source
145
// for jupytext we provide a full kernelspec + jupytext metadata,
146
// otherwise we provide an abbreviated spec w/ just the kernel name
147
yaml.jupyter = notebook.metadata.jupytext
148
? {
149
jupytext: notebook.metadata.jupytext,
150
kernelspec: notebook.metadata.kernelspec,
151
}
152
: notebook.metadata.kernelspec.name;
153
154
// if we are using the string shorthand make sure we have this kernelspec locally
155
if (typeof yaml.jupyter === "string") {
156
if (!await jupyterKernelspec(yaml.jupyter)) {
157
yaml.jupyter = {
158
kernelspec: notebook.metadata.kernelspec,
159
};
160
}
161
}
162
163
// if we found front matter, then the markdown source will start with enough
164
// newlines for the front matter to have been detected in the first place.
165
// So we only need to add newlines if there was no front matter.
166
//
167
// If this invariant breaks, we have a bug of some kind, so let's just assert it
168
assert(frontMatter || !mdSource.match(/^\n\n/));
169
const maybeYamlMdBreak = frontMatter ? "" : "\n\n";
170
171
// return yaml + markdown
172
const yamlText = stringify(yaml, {
173
indent: 2,
174
lineWidth: -1,
175
sortKeys: false,
176
skipInvalid: true,
177
});
178
return `---\n${yamlText}---${maybeYamlMdBreak}${mdSource}`;
179
}
180
181
async function mdFromCodeCell(
182
language: string,
183
cell: JupyterCell,
184
includeIds: boolean,
185
) {
186
// redact if the cell has no source
187
if (!cell.source.length) {
188
return [];
189
}
190
191
// determine the largest number of backticks in the cell
192
193
const maxBackticks = Math.max(
194
...jupyterCellSrcAsLines(cell).map((line) =>
195
line.match(/^`+/g)?.[0].length || 0
196
),
197
2,
198
);
199
const backticks = "`".repeat(maxBackticks + 1);
200
201
// begin code cell
202
const md: string[] = [backticks + "{" + language + "}\n"];
203
204
// partition
205
const { yaml, source } = await partitionCellOptions(
206
language,
207
jupyterCellSrcAsLines(cell),
208
);
209
const options = yaml ? yaml as JupyterCellOptions : {};
210
211
if (!includeIds) {
212
delete cell.id;
213
delete cell.metadata["id"];
214
delete cell.metadata["outputId"];
215
} else {
216
if (cell.id) {
217
if (options[kCellLabel]) {
218
const label = String(options[kCellLabel]);
219
if (jupyterAutoIdentifier(label) === cell.id) {
220
cell.id = undefined;
221
}
222
}
223
}
224
}
225
226
// prepare the options for writing
227
let yamlOptions: Metadata = {};
228
if (cell.id) {
229
yamlOptions[kCellId] = cell.id;
230
}
231
yamlOptions = {
232
...cell.metadata,
233
...yaml,
234
...yamlOptions,
235
};
236
237
// cell id first
238
if (yamlOptions[kCellId]) {
239
md.push(
240
...jupyterCellOptionsAsComment(language, { id: yamlOptions[kCellId] }),
241
);
242
delete yamlOptions[kCellId];
243
}
244
245
// yaml
246
if (yaml) {
247
const yamlOutput: Metadata = {};
248
for (const key in yaml) {
249
const value = yamlOptions[key];
250
if (value !== undefined) {
251
yamlOutput[key] = value;
252
delete yamlOptions[key];
253
}
254
}
255
md.push(...jupyterCellOptionsAsComment(language, yamlOutput));
256
}
257
258
// metadata
259
const metadataOutput: Metadata = {};
260
for (const key in cell.metadata) {
261
const value = cell.metadata[key];
262
if (value !== undefined) {
263
metadataOutput[key] = value;
264
delete yamlOptions[key];
265
}
266
}
267
md.push(
268
...jupyterCellOptionsAsComment(language, metadataOutput, { flowLevel: 1 }),
269
);
270
271
// write cell code
272
md.push(...mdEnsureTrailingNewline(source));
273
274
// end code cell
275
md.push(backticks + "\n");
276
277
return md;
278
}
279
280