CoCalc -- parse.ts

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/frontend/editors/slate/markdown-to-slate/parse.ts
¹⁶⁹⁷ views
1
/*
2
 *  This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
 *  License: MS-RSL – see LICENSE.md for details
4
 */
5

6
import { Descendant } from "slate";
7
import { handlers } from "./register";
8
import { State, Token } from "./types";
9
import { parse_markdown } from "./parse-markdown";
10
import { ensureDocNonempty } from "../padding";
11
import { createMetaNode } from "../elements/meta/type";
12
import { createReferencesNode } from "../elements/references/type";
13
import normalize from "./normalize";
14
import { len } from "@cocalc/util/misc";
15

16
export function parse(token: Token, state: State, cache?): Descendant[] {
17
  // console.log("parse", JSON.stringify({ token, state }));
18

19
  if (token.type == "image") {
20
    // The image token that comes out of markdown-it is very weird, since if you do
21
    //  [foo](bar.png)
22
    // then it makes foo be the *child* of bar.png and sets no alt tag.  That just
23
    // makes absolutely no sense at all, so we workaround this (bug?!) as follows.
24
    // If this bug gets fixed upstream, then I guess the code below would safely become a no-op.
25
    // I should report this.
26
    if ((token.children?.length ?? 0) > 0) {
27
      if (token.attrs != null && token.children?.[0].content != null) {
28
        // checks above to make typescript happy
29
        token.attrs[1] = ["alt", token.children[0].content];
30
      }
31
      token.children = [];
32
    }
33
  }
34
  for (const handler of handlers) {
35
    const nodes: Descendant[] | undefined = handler({ token, state, cache });
36
    if (nodes != null) {
37
      // console.log("parse got ", JSON.stringify(nodes));
38
      return nodes;
39
    }
40
  }
41

42
  throw Error(
43
    `some handler must process every token -- ${JSON.stringify(token)}`
44
  );
45
}
46

47
export function markdown_to_slate(
48
  markdown: string,
49
  no_meta?: boolean,
50
  cache?
51
): Descendant[] {
52
  // Parse the markdown:
53
  // const t0 = Date.now();
54
  const { tokens, meta, lines, references } = parse_markdown(markdown, no_meta);
55
  // window.markdown_parse = { tokens, meta, lines, references };
56

57
  const doc: Descendant[] = [];
58
  if (meta != null) {
59
    doc.push(createMetaNode(meta));
60
  }
61
  const state: State = { marks: {}, nesting: 0, lines };
62
  for (const token of tokens) {
63
    for (const node of parse(token, state, cache)) {
64
      doc.push(node);
65
    }
66
  }
67
  if (references != null && len(references) > 0) {
68
    doc.push(createReferencesNode(references));
69
  }
70

71
  ensureDocNonempty(doc);
72

73
  /*
74
  Why normalize?  It's critial that the slatejs
75
  tree produced by this code is normalized, as defined here:
76

77
      https://docs.slatejs.org/concepts/10-normalizing
78

79
  ... and also as it is carried out in practice with our normalization plugins
80
  that are in ../normalize.ts.
81

82
  The reason is that any time normalization results in a change from the
83
  source markdown document, then every single update to the document
84
  keeps redoing exactly that extra update! This leads to extensive problems.
85
  If you suspect this, enable EXPENSIVE_DEBUG in ./editable-markdown.tsx
86
  and edit a document, watching the console.log.
87

88
  I've tried to make it so the parser here is always normalized. However,
89
  there always seem to be really subtle edge cases.  Also, in the long run
90
  other people working on this code could add normalizations to
91
  ./normalize.ts and mess up this parser ever so slightly.  So instead,
92
  we just always normalize.  This isn't too expensive, and is worth it
93
  to ensure sanity.
94
  */
95
  //   console.log(
96
  //     "time: markdown_to_slate without normalize",
97
  //     Date.now() - t0,
98
  //     "ms"
99
  //   );
100
  const ndoc = normalize(doc);
101

102
  // console.log("time: markdown_to_slate", Date.now() - t0, "ms");
103
  // console.log({ markdown_to_slate: JSON.stringify(doc) });
104
  return ndoc;
105
}
106

107
Product

Resources

Company