Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/frontend/editors/slate/markdown-to-slate/parse.ts
1697 views
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import { Descendant } from "slate";
7
import { handlers } from "./register";
8
import { State, Token } from "./types";
9
import { parse_markdown } from "./parse-markdown";
10
import { ensureDocNonempty } from "../padding";
11
import { createMetaNode } from "../elements/meta/type";
12
import { createReferencesNode } from "../elements/references/type";
13
import normalize from "./normalize";
14
import { len } from "@cocalc/util/misc";
15
16
export function parse(token: Token, state: State, cache?): Descendant[] {
17
// console.log("parse", JSON.stringify({ token, state }));
18
19
if (token.type == "image") {
20
// The image token that comes out of markdown-it is very weird, since if you do
21
// [foo](bar.png)
22
// then it makes foo be the *child* of bar.png and sets no alt tag. That just
23
// makes absolutely no sense at all, so we workaround this (bug?!) as follows.
24
// If this bug gets fixed upstream, then I guess the code below would safely become a no-op.
25
// I should report this.
26
if ((token.children?.length ?? 0) > 0) {
27
if (token.attrs != null && token.children?.[0].content != null) {
28
// checks above to make typescript happy
29
token.attrs[1] = ["alt", token.children[0].content];
30
}
31
token.children = [];
32
}
33
}
34
for (const handler of handlers) {
35
const nodes: Descendant[] | undefined = handler({ token, state, cache });
36
if (nodes != null) {
37
// console.log("parse got ", JSON.stringify(nodes));
38
return nodes;
39
}
40
}
41
42
throw Error(
43
`some handler must process every token -- ${JSON.stringify(token)}`
44
);
45
}
46
47
export function markdown_to_slate(
48
markdown: string,
49
no_meta?: boolean,
50
cache?
51
): Descendant[] {
52
// Parse the markdown:
53
// const t0 = Date.now();
54
const { tokens, meta, lines, references } = parse_markdown(markdown, no_meta);
55
// window.markdown_parse = { tokens, meta, lines, references };
56
57
const doc: Descendant[] = [];
58
if (meta != null) {
59
doc.push(createMetaNode(meta));
60
}
61
const state: State = { marks: {}, nesting: 0, lines };
62
for (const token of tokens) {
63
for (const node of parse(token, state, cache)) {
64
doc.push(node);
65
}
66
}
67
if (references != null && len(references) > 0) {
68
doc.push(createReferencesNode(references));
69
}
70
71
ensureDocNonempty(doc);
72
73
/*
74
Why normalize? It's critial that the slatejs
75
tree produced by this code is normalized, as defined here:
76
77
https://docs.slatejs.org/concepts/10-normalizing
78
79
... and also as it is carried out in practice with our normalization plugins
80
that are in ../normalize.ts.
81
82
The reason is that any time normalization results in a change from the
83
source markdown document, then every single update to the document
84
keeps redoing exactly that extra update! This leads to extensive problems.
85
If you suspect this, enable EXPENSIVE_DEBUG in ./editable-markdown.tsx
86
and edit a document, watching the console.log.
87
88
I've tried to make it so the parser here is always normalized. However,
89
there always seem to be really subtle edge cases. Also, in the long run
90
other people working on this code could add normalizations to
91
./normalize.ts and mess up this parser ever so slightly. So instead,
92
we just always normalize. This isn't too expensive, and is worth it
93
to ensure sanity.
94
*/
95
// console.log(
96
// "time: markdown_to_slate without normalize",
97
// Date.now() - t0,
98
// "ms"
99
// );
100
const ndoc = normalize(doc);
101
102
// console.log("time: markdown_to_slate", Date.now() - t0, "ms");
103
// console.log({ markdown_to_slate: JSON.stringify(doc) });
104
return ndoc;
105
}
106
107