Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
quarto-dev
GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/src/resources/pandoc/datadir/_utils.lua
12922 views
1
-- _utils.lua
2
-- Copyright (C) 2020-2022 Posit Software, PBC
3
4
-- improved formatting for dumping tables and quarto's emulated pandoc nodes
5
function tdump (tbl, raw)
6
7
local shouldPrint = function(k, _, innerTbl)
8
-- when raw, print everything
9
if raw then
10
return true
11
end
12
if type(k) == "number" then
13
return true
14
end
15
if string.sub(k, 1, 1) == "-" then
16
return false
17
end
18
return true
19
end
20
21
local refs = {}
22
local resultTable = {}
23
24
-- https://www.lua.org/pil/19.3.html
25
local pairsByKeys = function (t, f)
26
local a = {}
27
for n in pairs(t) do table.insert(a, n) end
28
table.sort(a, f)
29
local i = 0 -- iterator variable
30
local iter = function () -- iterator function
31
i = i + 1
32
if a[i] == nil then return nil
33
else return a[i], t[a[i]]
34
end
35
end
36
return iter
37
end
38
39
local printInner = function(str)
40
table.insert(resultTable, str)
41
end
42
43
local empty = function(tbl)
44
for k, v in pairs(tbl) do
45
return false
46
end
47
return true
48
end
49
50
-- sigh.
51
-- https://stackoverflow.com/questions/48209461/global-and-local-recursive-functions-in-lua
52
local inner
53
inner = function(tbl, indent, doNotIndentType)
54
local address = string.format("%p", tbl)
55
local indentStr = string.rep(" ", indent)
56
local closeBracket = indentStr .. "}\n"
57
if refs[address] ~= nil then
58
printInner(indentStr .. "(circular reference to " .. address .. ")\n")
59
return
60
end
61
62
local isArray = tisarray(tbl)
63
local isEmpty = empty(tbl)
64
65
if type(tbl) == "table" or type(tbl) == "userdata" and tbl.is_emulated then
66
local typeIndent = indentStr
67
if doNotIndentType then
68
typeIndent = ""
69
end
70
local endOfOpen = "\n"
71
if isEmpty then
72
endOfOpen = " <empty> }\n"
73
end
74
75
if tbl.is_emulated then
76
printInner(typeIndent .. string.format("{ [quarto-emulated-ast:%s:%s]%s", tbl.t, address, endOfOpen))
77
elseif tisarray(tbl) then
78
printInner(typeIndent .. string.format("{ [array:%s]%s", address, endOfOpen))
79
else
80
printInner(typeIndent .. string.format("{ [table:%s]%s", address, endOfOpen))
81
end
82
if raw then
83
printInner(indentStr .. " [metatable: " .. tostring(getmetatable(tbl)) .. "]\n")
84
end
85
if tbl.attr then
86
printInner(indentStr .. " attr: " .. tostring(tbl.attr) .. "\n")
87
end
88
end
89
local empty = true
90
local typesThenValues = function(a, b)
91
local ta = type(a)
92
local tb = type(b)
93
if ta < tb then return true end
94
if ta > tb then return false end
95
return a < b
96
end
97
for k, v in pairsByKeys(tbl, typesThenValues) do
98
if shouldPrint(k, v, tbl) then
99
empty = false
100
local formatting = indentStr .. " " .. k .. ": "
101
v = asLua(v)
102
if type(v) == "table" or type(v) == "userdata" and v.is_emulated then
103
printInner(formatting)
104
refs[address] = true
105
local indentBump = 2
106
if string.len(k) < 3 then -- this does work when k is number
107
indentBump = string.len(k) + 1
108
end
109
inner(v, indent+indentBump, true)
110
elseif type(v) == 'boolean' then
111
printInner(formatting .. tostring(v) .. "\n")
112
elseif (v ~= nil) then
113
printInner(formatting .. tostring(v) .. "\n")
114
else
115
printInner(formatting .. 'nil\n')
116
end
117
end
118
end
119
printInner(closeBracket)
120
end
121
122
inner(tbl, 0)
123
print(table.concat(resultTable, ""))
124
end
125
126
function asLua(o)
127
if type(o) ~= 'userdata' then
128
return o
129
end
130
131
if rawequal(o, PANDOC_READER_OPTIONS) then
132
return {
133
abbreviations = o.abbreviations,
134
columns = o.columns,
135
default_image_extension = o.default_image_extension,
136
extensions = o.extensions,
137
indented_code_classes = o.indented_code_classes,
138
standalone = o.standalone,
139
strip_comments = o.strip_comments,
140
tab_stop = o.tab_stop,
141
track_changes = o.track_changes,
142
}
143
elseif rawequal(o, PANDOC_WRITER_OPTIONS) then
144
return {
145
cite_method = o.cite_method,
146
columns = o.columns,
147
dpi = o.dpi,
148
email_obfuscation = o.email_obfuscation,
149
epub_chapter_level = o.epub_chapter_level,
150
epub_fonts = o.epub_fonts,
151
epub_metadata = o.epub_metadata,
152
epub_subdirectory = o.epub_subdirectory,
153
extensions = o.extensions,
154
highlight_style = o.highlight_style,
155
html_math_method = o.html_math_method,
156
html_q_tags = o.html_q_tags,
157
identifier_prefix = o.identifier_prefix,
158
incremental = o.incremental,
159
listings = o.listings,
160
number_offset = o.number_offset,
161
number_sections = o.number_sections,
162
prefer_ascii = o.prefer_ascii,
163
reference_doc = o.reference_doc,
164
reference_links = o.reference_links,
165
reference_location = o.reference_location,
166
section_divs = o.section_divs,
167
setext_headers = o.setext_headers,
168
slide_level = o.slide_level,
169
tab_stop = o.tab_stop,
170
table_of_contents = o.table_of_contents,
171
template = o.template,
172
toc_depth = o.toc_depth,
173
top_level_division = o.top_level_division,
174
variables = o.variables,
175
wrap_text = o.wrap_text
176
}
177
end
178
v = tostring(o)
179
if string.find(v, "^pandoc CommonState") then
180
return {
181
input_files = o.input_files,
182
output_file = o.output_file,
183
log = o.log,
184
request_headers = o.request_headers,
185
resource_path = o.resource_path,
186
source_url = o.source_url,
187
user_data_dir = o.user_data_dir,
188
trace = o.trace,
189
verbosity = o.verbosity
190
}
191
elseif string.find(v, "^pandoc LogMessage") then
192
return v
193
end
194
return o
195
end
196
197
-- dump an object to stdout
198
function dump(o, raw)
199
200
o = asLua(o)
201
if type(o) == 'table' or type(o) == 'userdata' and o.is_emulated then
202
tdump(o, raw)
203
else
204
print(tostring(o) .. "\n")
205
end
206
end
207
208
209
-- is the table a simple array?
210
-- see: https://web.archive.org/web/20140227143701/http://ericjmritz.name/2014/02/26/lua-is_array/
211
function tisarray(t)
212
if type(t) ~= "table" then
213
return false
214
end
215
local i = 0
216
for _ in pairs(t) do
217
i = i + 1
218
if t[i] == nil then
219
return false
220
end
221
end
222
return true
223
end
224
225
-- does the table contain a value
226
local function tcontains(t, value)
227
if t and type(t) == "table" and value then
228
for _, v in ipairs(t) do
229
if v == value then
230
return true
231
end
232
end
233
return false
234
end
235
return false
236
end
237
238
239
local function sortedPairs(t, f)
240
local a = {}
241
for n in pairs(t) do table.insert(a, n) end
242
table.sort(a, f)
243
local i = 0 -- iterator variable
244
local iter = function() -- iterator function
245
i = i + 1
246
if a[i] == nil then return nil
247
else return a[i], t[a[i]]
248
end
249
end
250
return iter
251
end
252
253
254
local function get_type(v)
255
local pandoc_type = pandoc.utils.type(v)
256
if pandoc_type == "Inline" then
257
if v.t == "Span" and v.attributes.__quarto_custom == "true" then
258
return "CustomInline"
259
end
260
elseif pandoc_type == "Block" then
261
if v.t == "Div" and v.attributes.__quarto_custom == "true" then
262
return "CustomBlock"
263
end
264
end
265
return pandoc_type
266
end
267
268
--- Blocks metatable
269
local BlocksMT = getmetatable(pandoc.Blocks{})
270
--- Inlines metatable
271
local InlinesMT = getmetatable(pandoc.Inlines{})
272
273
--- Turns the given object into a `Inlines` list.
274
--
275
-- Works mostly like `pandoc.Inlines`, but doesn't a do a full
276
-- unmarshal/marshal roundtrip. This buys performance, at the cost of
277
-- less thorough type checks.
278
--
279
-- NOTE: The input object might be modified *destructively*!
280
local function as_inlines(obj)
281
local pt = pandoc.utils.type(obj)
282
if pt == 'Inlines' then
283
return obj
284
elseif pt == "Inline" then
285
-- Faster than calling pandoc.Inlines
286
return setmetatable({obj}, InlinesMT)
287
elseif pt == 'List' or pt == 'table' then
288
if obj[1] and pandoc.utils.type(obj[1]) == 'Block' then
289
return pandoc.utils.blocks_to_inlines(obj)
290
end
291
-- Faster than calling pandoc.Inlines
292
return setmetatable(obj, InlinesMT)
293
elseif pt == "Block" then
294
return pandoc.utils.blocks_to_inlines({obj})
295
elseif pt == "Blocks" then
296
return pandoc.utils.blocks_to_inlines(obj)
297
else
298
return pandoc.Inlines(obj or {})
299
end
300
end
301
302
--- Turns the given object into a `Blocks` list.
303
--
304
-- Works mostly like `pandoc.Blocks`, but doesn't a do a full
305
-- unmarshal/marshal roundtrip. This buys performance, at the cost of
306
-- less thorough type checks.
307
--
308
-- NOTE: The input object might be modified *destructively*!
309
--
310
-- This might need some benchmarking.
311
local function as_blocks(obj)
312
local pt = pandoc.utils.type(obj)
313
if pt == 'Blocks' then
314
return obj
315
elseif pt == 'Block' then
316
-- Assigning a metatable directly is faster than calling
317
-- `pandoc.Blocks`.
318
return setmetatable({obj}, BlocksMT)
319
elseif pt == 'Inline' then
320
return setmetatable({pandoc.Plain{obj}}, BlocksMT)
321
elseif pt == 'Inlines' then
322
if next(obj) then
323
return setmetatable({pandoc.Plain(obj)}, BlocksMT)
324
end
325
return setmetatable({}, BlocksMT)
326
elseif pt == 'List' or (pt == 'table' and obj[1]) then
327
if pandoc.utils.type(obj[1]) == 'Inline' then
328
obj = {pandoc.Plain(obj)}
329
end
330
return setmetatable(obj, BlocksMT)
331
elseif (pt == 'table' and obj.long) or pt == 'Caption' then
332
-- Looks like a Caption
333
return as_blocks(obj.long)
334
else
335
return pandoc.Blocks(obj or {})
336
end
337
end
338
339
local function match_fun(reset, ...)
340
local args = {...}
341
return function(v)
342
reset()
343
for _, f in ipairs(args) do
344
local r = f(v)
345
if r == false or r == nil then
346
return r
347
end
348
if r ~= true then
349
v = r
350
end
351
end
352
return v
353
end
354
end
355
356
357
-- ## syntax examples
358
--
359
-- match("Div")
360
-- returns the node if it's a Div, otherwise false
361
-- match("Div/[1]")
362
-- returns the first child of a Div, otherwise false
363
-- match(".class")
364
-- returns the node if it has the class "class", otherwise false
365
-- match("#id")
366
-- returns the node if it has the id "id", otherwise false
367
--
368
-- match("Div/:child/Para") (in analogy to "div > p" in CSS)
369
-- returns the div if it has a direct child Para, otherwise false
370
--
371
-- match("Div/:descendant/Para") (in analogy to "div p" in CSS)
372
-- returns the div if it has a direct child Para, otherwise false
373
--
374
-- ## Node captures
375
--
376
-- match("{Div}/[1]/Para") (capture)
377
-- returns a list with the div if the first child is a Para, otherwise false
378
-- match("{Div}/[1]/{Para}/[1]/Img") (capture)
379
-- returns a list with the div and Para if the first child is a Para whose
380
-- first child is an Image, otherwise false
381
--
382
-- ## custom matchers
383
--
384
-- match("Div", function(node) return node.content[1] end)
385
-- is equivalent to match("Div/[1]")
386
-- match("Div", function(node) return node.content[1] end, "Para")
387
-- is equivalent to match("Div/[1]/Para")
388
--
389
--
390
391
-- Performance notes: :descendant is implemented with a walk,
392
-- so it's not very efficient.
393
--
394
-- eg :descendant/#id walks the node set
395
--
396
-- repeated calls to :descendant in the same match are likely
397
-- to be quite slow
398
399
-- TODO we probably need to consider recursive reentrancy here
400
local function match(...)
401
local result = {}
402
local captured = false
403
local captures = {}
404
local capture_id = function(v) return v end
405
local capture_add = function(v)
406
table.insert(captures, v)
407
return v
408
end
409
local function reset()
410
result = {}
411
captures = {}
412
end
413
414
-- canonicalize the arguments into split_args
415
local args = {...}
416
local split_args = {}
417
for _, v in ipairs(args) do
418
if type(v) == "string" then
419
local vs = split(v, "/", true)
420
tappend(split_args, vs)
421
else
422
table.insert(split_args, v)
423
end
424
end
425
426
local function process_nth_child(n, capture_fun)
427
table.insert(result, function(node)
428
if node == nil then
429
return false
430
end
431
local pt = pandoc.utils.type(node)
432
local content
433
if pt == "Blocks" or pt == "Inlines" then
434
content = node
435
else
436
content = node.content
437
end
438
return content ~= nil and
439
content[n] and
440
capture_fun(content[n])
441
end)
442
end
443
444
local function report_inner_result(r)
445
if r == nil or r == false or not captured then
446
return r
447
end
448
-- a table result indicates the child was captured
449
-- and we might need to return the parent
450
-- if we're also capturing
451
if type(r) == "table" then
452
for _, v in ipairs(r) do
453
table.insert(captures, v)
454
end
455
end
456
return captures
457
end
458
459
local function process_child(index)
460
-- call match recursively, slicing the remaining args
461
local conf = table.pack(table.unpack(split_args, index))
462
local inner_match = match(table.unpack(split_args, index))
463
table.insert(result, function(node)
464
if node.content == nil then
465
return nil
466
end
467
local r
468
for _, v in ipairs(node.content) do
469
r = inner_match(v)
470
if r ~= nil and r ~= false then
471
break
472
end
473
end
474
475
return report_inner_result(r)
476
end)
477
end
478
479
local function process_descendant(index)
480
local inner_match = match(table.unpack(split_args, index))
481
table.insert(result, function(node)
482
local r
483
local function inner_process(inner_node)
484
if r ~= nil and r ~= false then
485
-- we've already found a match, so we can stop
486
return
487
end
488
489
r = inner_match(inner_node)
490
end
491
_quarto.ast.walk(node, {
492
Inline = inner_process,
493
Block = inner_process
494
})
495
return report_inner_result(r)
496
end)
497
end
498
499
for i, v in ipairs(split_args) do
500
if type(v) == "string" then
501
local first = v:sub(1, 1)
502
local last = v:sub(-1)
503
local capture_fun = capture_id
504
if first == "{" then -- capture
505
v = v:sub(2, -2)
506
if last ~= "}" then
507
fail("invalid match token: " .. v .. "(in " .. str .. ")")
508
return match_fun(reset, {})
509
end
510
first = v:sub(1, 1)
511
capture_fun = capture_add
512
captured = true
513
end
514
-- close over capture_fun in all cases
515
if v == "" then
516
-- empty case exists to support {} as a valid parameter,
517
-- which is useful to capture the result of the previous match when it's a function
518
table.insert(result, (function(capture_fun)
519
return function(node)
520
return capture_fun(node)
521
end
522
end)(capture_fun))
523
elseif v == ":child" then
524
process_child(i + 1)
525
break
526
elseif v == ":descendant" then
527
process_descendant(i + 1)
528
break
529
elseif first == "." then
530
table.insert(result, (function(capture_fun, v)
531
return function(node)
532
return node.classes ~= nil and tcontains(node.classes, v) and capture_fun(node)
533
end
534
end)(capture_fun, v:sub(2)))
535
elseif first == "#" then
536
table.insert(result, (function(capture_fun, v)
537
return function(node)
538
return node.identifier ~= nil and node.identifier == v and capture_fun(node)
539
end
540
end)(capture_fun, v:sub(2)))
541
elseif first == "[" then -- [1]
542
local n = tonumber(v:sub(2, -2))
543
process_nth_child(n, capture_fun)
544
elseif first:upper() == first then -- Plain
545
table.insert(result, (function(capture_fun, v)
546
return function(node)
547
return (is_regular_node(node, v) or is_custom_node(node, v)) and capture_fun(node)
548
end
549
end)(capture_fun, v))
550
else
551
fail("invalid match token: " .. v .. "(in " .. str .. ")")
552
return match_fun(reset, {})
553
end
554
elseif type(v) == "number" then
555
process_nth_child(v, capture_id)
556
elseif type(v) == "function" then
557
table.insert(result, v)
558
else
559
fail("invalid match parameter: " .. tostring(v))
560
return match_fun(reset, {})
561
end
562
end
563
564
if captured then
565
local function send_capture(v)
566
if v then
567
return captures
568
end
569
return v
570
end
571
table.insert(result, send_capture)
572
end
573
return match_fun(reset, table.unpack(result))
574
end
575
576
--- Returns `true` iff the given AST node is empty.
577
-- A node is considered "empty" if it's an empty list, table, or a node
578
-- without any text or nested AST nodes.
579
local function is_empty_node (node)
580
if not node then
581
return true
582
elseif type(node) == 'table' then
583
-- tables are considered empty if they don't have any fields.
584
return not next(node)
585
elseif node.content then
586
return not next(node.content)
587
elseif node.caption then
588
-- looks like an image, figure, or table
589
if node.caption.long then
590
return not next(node.caption.long)
591
end
592
return not next(node.caption)
593
elseif node.text then
594
-- looks like a code node or text node
595
return node.text ~= ''
596
else
597
-- Not sure what this is, but it's probably not empty.
598
return false
599
end
600
end
601
602
--- Call the node's walk method with the given filters.
603
-- @param node a pandoc AST node
604
-- @param filter table with filter functions
605
local function walk(node, filter)
606
quarto_assert(node and node.walk)
607
return node:walk(filter)
608
end
609
610
return {
611
dump = dump,
612
type = get_type,
613
table = {
614
isarray = tisarray,
615
contains = tcontains,
616
sortedPairs = sortedPairs
617
},
618
as_inlines = as_inlines,
619
as_blocks = as_blocks,
620
is_empty_node = is_empty_node,
621
match = match,
622
walk = walk,
623
add_to_blocks = function(blocks, block)
624
if pandoc.utils.type(blocks) ~= "Blocks" then
625
fatal("add_to_blocks: invalid type " .. pandoc.utils.type(blocks))
626
end
627
if block == nil then
628
return
629
end
630
local t = pandoc.utils.type(block)
631
if t == "Blocks" or t == "Inlines" then
632
blocks:extend(block)
633
elseif t == "Block" then
634
table.insert(blocks, block)
635
else
636
fatal("add_to_blocks: invalid type " .. t)
637
end
638
end,
639
}
640
641
642