Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
quarto-dev
GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/src/resources/pandoc/datadir/lpegshortcode.lua
12922 views
1
-- LPEG parsing and handling for shortcodes
2
-- Copyright (C) 2020-2024 Posit Software, PBC
3
4
local lpeg = require('lpeg')
5
6
local unshortcode
7
8
local function escape(s, quote)
9
quote = quote or '"'
10
local result = s:gsub("\\", "\\\\"):gsub(quote, "\\" .. quote)
11
return result
12
end
13
14
local function unescape(s, quote)
15
quote = quote or '"'
16
local result = s:gsub("\\" .. quote, quote):gsub("\\\\", "\\")
17
return result
18
end
19
20
local id = function(s) return s end
21
22
local function trim_end(s)
23
local result = string.gsub(s, "%s*$", "")
24
return result
25
end
26
27
-- lpeg helpers
28
local Space = lpeg.S(" \n\t")^0
29
local Space1 = lpeg.S(" \n\t")^1
30
31
local function untilS(s)
32
return lpeg.C((1 - lpeg.P(s))^0) * lpeg.P(s)
33
end
34
35
local function into_list(pattern)
36
return lpeg.Cf(lpeg.Ct("") * pattern, function(list, value)
37
table.insert(list, value)
38
return list
39
end)
40
end
41
42
local function into_string(pattern)
43
return lpeg.Cf(lpeg.Ct("") * pattern, function(list, value)
44
table.insert(list, value)
45
return list
46
end) / table.concat
47
end
48
49
-- constants
50
local quarto_shortcode_class_prefix = "quarto-shortcode__"
51
52
-- evaluators
53
local function md_escaped_shortcode(s)
54
-- escaped shortcodes bring in whitespace
55
return "[]{." .. quarto_shortcode_class_prefix .. "-escaped data-is-shortcode=\"1\" data-value=\"" .. escape("{{<" .. s .. ">}}") .. "\"}"
56
end
57
58
local function into_dataset_value(s)
59
if s:sub(1, 1) == "'" then
60
value = escape(unescape(s:sub(2, -2), "'"), '"')
61
elseif s:sub(1, 1) == "\"" then
62
value = escape(unescape(s:sub(2, -2), '"'), '"')
63
else
64
value = s
65
end
66
return value
67
end
68
69
local function md_string_param(s)
70
local value = into_dataset_value(s)
71
local result = "[]{." .. quarto_shortcode_class_prefix .. "-param data-is-shortcode=\"1\" data-value=\"" .. value .. "\" data-raw=\"" .. escape(trim_end(s)) .. "\"}"
72
return result
73
end
74
75
local function md_keyvalue_param(k, connective, v)
76
local recursive_key = false
77
local recursive_value = false
78
79
if k:sub(1, 1) == "[" then
80
recursive_key = true
81
end
82
if v:sub(1, 1) == "[" then
83
recursive_value = true
84
end
85
if recursive_key then
86
if recursive_value then
87
return "[" .. k .. v .. "]{." .. quarto_shortcode_class_prefix .. "-param data-is-shortcode=\"1\"}"
88
else
89
return "[" .. k .. "]{." .. quarto_shortcode_class_prefix .. "-param data-is-shortcode=\"1\" data-value=\"" .. into_dataset_value(v) .. "\"}"
90
end
91
else
92
if recursive_value then
93
return "[" .. v .. "]{." .. quarto_shortcode_class_prefix .. "-param data-is-shortcode=\"1\" data-key=\"" .. into_dataset_value(k) .. "\"}"
94
else
95
raw = k .. connective .. v
96
return "[]{." .. quarto_shortcode_class_prefix .. "-param data-is-shortcode=\"1\" data-raw=\"" .. escape(raw) .. "\" data-key=\"" .. into_dataset_value(k) .. "\"" .. " data-value=\"" .. into_dataset_value(v) .. "\"}"
97
end
98
end
99
end
100
101
local function md_shortcode(open, space, lst, close)
102
local shortcode = {"["}
103
104
for i = 1, #lst do
105
table.insert(shortcode, lst[i])
106
end
107
table.insert(shortcode, "]{.")
108
table.insert(shortcode, quarto_shortcode_class_prefix)
109
table.insert(shortcode, " data-is-shortcode=\"1\"")
110
local raw = open .. space
111
for i = 1, #lst do
112
local un = unshortcode:match(lst[i])
113
raw = raw .. (un or lst[i])
114
end
115
raw = raw .. close
116
table.insert(shortcode, " data-raw=\"")
117
table.insert(shortcode, escape(raw))
118
table.insert(shortcode, "\"")
119
table.insert(shortcode, "}")
120
return table.concat(shortcode, "")
121
end
122
123
local double_quoted_string = into_string(lpeg.C("\"") * lpeg.C((1 - lpeg.P("\""))^0) * lpeg.C("\""))
124
local single_quoted_string = into_string(lpeg.C("'") * lpeg.C((1 - lpeg.P("'"))^0) * lpeg.C("'"))
125
local sc_string = (
126
double_quoted_string * Space +
127
single_quoted_string * Space +
128
(- lpeg.S("'\"}>") * lpeg.C((1 - lpeg.S(" \n\t"))^1) * Space)
129
) / id
130
131
local sc_string_no_space = (
132
double_quoted_string +
133
single_quoted_string +
134
(- lpeg.S("'\"}>") * lpeg.C((1 - lpeg.S(" \n\t"))^1))
135
) / id
136
137
local function make_shortcode_parser(evaluator_table)
138
local escaped_handler = evaluator_table.escaped
139
local string_handler = evaluator_table.string
140
local keyvalue_handler = evaluator_table.keyvalue
141
local shortcode_handler = evaluator_table.shortcode
142
143
-- rules
144
local escaped_sc1 = lpeg.P("{{{<") * untilS(">}}}") / escaped_handler
145
local escaped_sc2 = lpeg.P("{{</*") * untilS("*/>}}") / escaped_handler
146
147
local function sc_string_skipping(skip, capture)
148
if type(skip) == "string" then
149
skip = lpeg.P(skip)
150
end
151
return (into_string(double_quoted_string) +
152
into_string(single_quoted_string) +
153
(- lpeg.S("'\"}>") * lpeg.C(((1 - skip) - lpeg.S(" \n\t"))^1))) / (capture or string_handler) -- function(s) return { type = "string", value = s } end
154
end
155
156
-- skip :/? as well so that URLs with = in them are not treated as key/value pairs
157
local sc_keyvalue = (sc_string_skipping(lpeg.S(":/?="), id) * lpeg.C(Space * lpeg.P("=") * Space) * sc_string_no_space) / keyvalue_handler
158
159
local text
160
if evaluator_table.ignore_pattern then
161
text = (evaluator_table.ignore_pattern / id +
162
lpeg.V("Nonshortcode") +
163
lpeg.V("Shortcode"))^1
164
else
165
text = (lpeg.V("Nonshortcode") +
166
lpeg.V("Shortcode"))^1
167
end
168
local sc = lpeg.P({
169
"Text",
170
Text = into_string(text),
171
Nonshortcode = (1 - lpeg.P("{{{<") - lpeg.P("{{<")) / id,
172
KeyShortcodeValue = (sc_string_skipping(lpeg.S(":/?="), id) * Space * lpeg.P("=") * Space * lpeg.V("Shortcode")) / keyvalue_handler,
173
Shortcode = escaped_sc1 +
174
escaped_sc2 +
175
((lpeg.C(lpeg.P("{{<")) *
176
lpeg.C(Space) *
177
into_list(
178
(lpeg.V("Shortcode") +
179
lpeg.V("KeyShortcodeValue") +
180
sc_keyvalue +
181
(Space1 / id) +
182
(sc_string_skipping(">}}") * (Space / id))
183
)^1
184
) *
185
lpeg.C(Space * lpeg.P(">}}"))) / shortcode_handler) * (Space / id)
186
})
187
188
return sc
189
end
190
191
md_shortcode = make_shortcode_parser({
192
escaped = md_escaped_shortcode,
193
string = md_string_param,
194
keyvalue = md_keyvalue_param,
195
shortcode = md_shortcode,
196
197
ignore_pattern = lpeg.P("{.hidden .quarto-markdown-envelope-contents render-id=\"") * (lpeg.P(1) - lpeg.P("\"}"))^1 * lpeg.P("\"}")
198
})
199
200
local escaped_string = into_string(
201
(lpeg.P("\"") *
202
((lpeg.P("\\\\") +
203
lpeg.P("\\\"") +
204
(1 - lpeg.P("\""))) ^ 0) * lpeg.P("\"")) / function(s)
205
return s:gsub("\\\"", "\""):gsub("\\\\", "\\"):sub(2, -2)
206
end)
207
208
-- local unshortcode = lpeg.P("[]{.quarto-shortcode__-param data-raw=\"") * (lpeg.P("value") / id) * lpeg.P("\"}")
209
unshortcode = lpeg.P({
210
"Text",
211
Text = into_string((lpeg.V("Shortcodespan") + lpeg.P(1) / id)^1),
212
Nonshortcode = (1 - lpeg.P("["))^1 / id,
213
Shortcodekeyvalue = (lpeg.P("[]{.quarto-shortcode__-param data-is-shortcode=\"1\" data-raw=") * escaped_string * Space * lpeg.P("data-key=") * escaped_string * Space * lpeg.P("data-value=") * escaped_string * lpeg.P("}")) /
214
function(r, k, v) return r end,
215
Shortcodestring = (lpeg.P("[]{.quarto-shortcode__-param data-is-shortcode=\"1\" data-value=") * escaped_string * Space * lpeg.P("data-raw=") * escaped_string * lpeg.P("}")) /
216
function(v, r) return r end,
217
-- Shortcodekeyvalue =
218
Shortcodeescaped = lpeg.P("[]{.quarto-shortcode__-escaped data-is-shortcode=\"1\" data-value=") *
219
(escaped_string / function(s) return "{" .. unescape(s) .. "}" end) *
220
lpeg.P("}"),
221
Shortcodespan = lpeg.V"Shortcodeescaped" + lpeg.V"Shortcodekeyvalue" + lpeg.V"Shortcodestring" +
222
(lpeg.P("[") * (lpeg.V("Shortcodespan") * Space)^0 * (lpeg.P("]{.quarto-shortcode__ data-is-shortcode=\"1\"") * Space * lpeg.P("data-raw=") * escaped_string * Space * lpeg.P("}"))) / function(...)
223
local args = {...}
224
return args[#args]
225
end
226
})
227
228
local function fail_at_line(msg)
229
local info = debug.getinfo(3, "Sl")
230
print(info.source .. ":" .. tostring(info.currentline) .. ": " .. msg)
231
os.exit(1)
232
end
233
234
local function expect_equals(v1, v2)
235
if v1 ~= v2 then
236
fail_at_line("Expected " .. v1 .. " to equal " .. v2)
237
end
238
end
239
local function expect_match(pattern, str)
240
if not pattern:match(str) then
241
fail_at_line("Expected " .. str .. " to match " .. tostring(pattern))
242
end
243
end
244
local function expect_no_match(pattern, str)
245
if pattern:match(str) then
246
fail_at_line("Expected " .. str .. " to not match " .. tostring(pattern))
247
end
248
end
249
250
if os.getenv("LUA_TESTING") ~= nil then
251
expect_match(single_quoted_string, "'asdf'")
252
expect_no_match(single_quoted_string, "\"asdf\"")
253
expect_match(double_quoted_string, "\"asdf\"")
254
expect_no_match(double_quoted_string, "'asdf'")
255
expect_match(sc_string, "\"asdf\"")
256
expect_match(sc_string, "'asdf'")
257
expect_match(sc_string, "asdf }}>")
258
expect_equals(sc_string:match("asdf }}>"), "asdf")
259
260
local unshortcode_tests = {
261
'{{{< meta >}}}',
262
"{{< meta 'foo' >}}",
263
"{{< meta \"foo\" >}}",
264
"{{< meta bar >}}",
265
"{{< meta bar >}} {{< meta bar >}}",
266
"{{< meta bar >}}",
267
"{{< meta foo = bar >}}",
268
"{{< meta\n foo = bar >}}",
269
"{{< meta foo = 'bar' >}}",
270
'{{< meta foo = "bar" >}}',
271
"{{< kbd Shift-Ctrl-Q mac=Shift-Command-Q win=Shift-Control-Q linux=Shift-Ctrl-Q >}}",
272
"{{< meta k1=v1 k2=v2 >}}",
273
"{{< kbd Shift-Ctrl-Q mac=Shift-Command-Q win=Shift-Control-Q >}}",
274
'{{< video https://youtu.be/wo9vZccmqwc width="400" height="300" >}}',
275
}
276
for i, v in ipairs(unshortcode_tests) do
277
expect_equals(unshortcode:match(md_shortcode:match(v)), v)
278
end
279
280
print("Tests passed")
281
end
282
283
-- replace multi-character code points with an escaped version
284
-- that contains an UUID that we can use to restore the original
285
-- without worrying about collisions from user code that uses
286
-- the same escape syntax
287
local function escape_unicode(txt)
288
local result = {}
289
for _, c in utf8.codes(txt) do
290
if c > 127 then
291
table.insert(result, string.format("cf5733e5-0370-4aae-8689-61bad1dd9ec0&#x%x;", c))
292
else
293
table.insert(result, utf8.char(c))
294
end
295
end
296
return table.concat(result, "")
297
end
298
299
-- replace escaped code points with their unescaped version
300
local function unescape_unicode(txt)
301
return txt:gsub("cf5733e5%-0370%-4aae%-8689%-61bad1dd9ec0&#x([0-9a-fA-F]+);", function (c)
302
return utf8.char(tonumber(c, 16))
303
end)
304
end
305
306
local function wrap_lpeg_match(pattern, txt)
307
txt = escape_unicode(txt)
308
txt = pattern:match(txt)
309
if txt == nil then
310
return nil
311
end
312
txt = unescape_unicode(txt)
313
return txt
314
end
315
316
-- Convert a string to its hexadecimal representation
317
local function string_to_hex(str)
318
return (str:gsub('.', function(c)
319
return string.format('%02X', string.byte(c))
320
end))
321
end
322
323
local md_shortcode_2_uuid = "b58fc729-690b-4000-b19f-365a4093b2ff"
324
local md_shortcode_2_uuid_pattern = "b58fc729%-690b%-4000%-b19f%-365a4093b2ff;"
325
local function md_escaped_shortcode_2_fun(s)
326
return table.concat({
327
md_shortcode_2_uuid,
328
";",
329
string_to_hex("{{{<" .. s .. ">}}}"),
330
";"
331
})
332
end
333
334
local function md_shortcode_2_fun(open, space, lst, close)
335
local raw = open .. space
336
for i = 1, #lst do
337
local un = unshortcode:match(lst[i])
338
raw = raw .. (un or lst[i])
339
end
340
raw = raw .. close
341
return table.concat({
342
md_shortcode_2_uuid,
343
";",
344
string_to_hex(raw),
345
";"
346
});
347
end
348
349
-- This new transformation into a plain UUID-guarded string,
350
-- is designed to survive the pandoc markdown reader barrier under Pandoc 3.7 and later.
351
-- we still need the first shortcode transformation to actually convert
352
-- to a span when it's safe to do so, but this transformation
353
-- is safe to use in all contexts (including link and image targets).
354
local md_shortcode_2 = make_shortcode_parser({
355
escaped = md_escaped_shortcode_2_fun,
356
string = md_string_param,
357
keyvalue = md_keyvalue_param,
358
shortcode = md_shortcode_2_fun,
359
ignore_pattern = lpeg.P("{.hidden .quarto-markdown-envelope-contents render-id=\"") * (lpeg.P(1) - lpeg.P("\"}"))^1 * lpeg.P("\"}")
360
})
361
362
return {
363
lpegs = {
364
md_shortcode = md_shortcode,
365
md_shortcode_2 = md_shortcode_2,
366
md_shortcode_2_uuid = md_shortcode_2_uuid_pattern,
367
unshortcode = unshortcode -- for undoing shortcodes in non-markdown contexts
368
},
369
370
parse_md_shortcode_2 = function(txt)
371
return wrap_lpeg_match(md_shortcode_2, txt)
372
end,
373
374
parse_md_shortcode = function(txt)
375
return wrap_lpeg_match(md_shortcode, txt)
376
end,
377
378
-- use this to undo shortcode parsing in non-markdown contexts
379
unparse_md_shortcode = function(txt)
380
return wrap_lpeg_match(unshortcode, txt)
381
end,
382
383
make_shortcode_parser = make_shortcode_parser,
384
385
-- use this to safely call an lpeg pattern with a string
386
-- that contains multi-byte code points
387
wrap_lpeg_match = wrap_lpeg_match
388
}
389
390