Path: blob/main/src/resources/pandoc/datadir/_utils.lua
12922 views
-- _utils.lua1-- Copyright (C) 2020-2022 Posit Software, PBC23-- improved formatting for dumping tables and quarto's emulated pandoc nodes4function tdump (tbl, raw)56local shouldPrint = function(k, _, innerTbl)7-- when raw, print everything8if raw then9return true10end11if type(k) == "number" then12return true13end14if string.sub(k, 1, 1) == "-" then15return false16end17return true18end1920local refs = {}21local resultTable = {}2223-- https://www.lua.org/pil/19.3.html24local pairsByKeys = function (t, f)25local a = {}26for n in pairs(t) do table.insert(a, n) end27table.sort(a, f)28local i = 0 -- iterator variable29local iter = function () -- iterator function30i = i + 131if a[i] == nil then return nil32else return a[i], t[a[i]]33end34end35return iter36end3738local printInner = function(str)39table.insert(resultTable, str)40end4142local empty = function(tbl)43for k, v in pairs(tbl) do44return false45end46return true47end4849-- sigh.50-- https://stackoverflow.com/questions/48209461/global-and-local-recursive-functions-in-lua51local inner52inner = function(tbl, indent, doNotIndentType)53local address = string.format("%p", tbl)54local indentStr = string.rep(" ", indent)55local closeBracket = indentStr .. "}\n"56if refs[address] ~= nil then57printInner(indentStr .. "(circular reference to " .. address .. ")\n")58return59end6061local isArray = tisarray(tbl)62local isEmpty = empty(tbl)6364if type(tbl) == "table" or type(tbl) == "userdata" and tbl.is_emulated then65local typeIndent = indentStr66if doNotIndentType then67typeIndent = ""68end69local endOfOpen = "\n"70if isEmpty then71endOfOpen = " <empty> }\n"72end7374if tbl.is_emulated then75printInner(typeIndent .. string.format("{ [quarto-emulated-ast:%s:%s]%s", tbl.t, address, endOfOpen))76elseif tisarray(tbl) then77printInner(typeIndent .. string.format("{ [array:%s]%s", address, endOfOpen))78else79printInner(typeIndent .. string.format("{ [table:%s]%s", address, endOfOpen))80end81if raw then82printInner(indentStr .. " [metatable: " .. tostring(getmetatable(tbl)) .. "]\n")83end84if tbl.attr then85printInner(indentStr .. " attr: " .. tostring(tbl.attr) .. "\n")86end87end88local empty = true89local typesThenValues = function(a, b)90local ta = type(a)91local tb = type(b)92if ta < tb then return true end93if ta > tb then return false end94return a < b95end96for k, v in pairsByKeys(tbl, typesThenValues) do97if shouldPrint(k, v, tbl) then98empty = false99local formatting = indentStr .. " " .. k .. ": "100v = asLua(v)101if type(v) == "table" or type(v) == "userdata" and v.is_emulated then102printInner(formatting)103refs[address] = true104local indentBump = 2105if string.len(k) < 3 then -- this does work when k is number106indentBump = string.len(k) + 1107end108inner(v, indent+indentBump, true)109elseif type(v) == 'boolean' then110printInner(formatting .. tostring(v) .. "\n")111elseif (v ~= nil) then112printInner(formatting .. tostring(v) .. "\n")113else114printInner(formatting .. 'nil\n')115end116end117end118printInner(closeBracket)119end120121inner(tbl, 0)122print(table.concat(resultTable, ""))123end124125function asLua(o)126if type(o) ~= 'userdata' then127return o128end129130if rawequal(o, PANDOC_READER_OPTIONS) then131return {132abbreviations = o.abbreviations,133columns = o.columns,134default_image_extension = o.default_image_extension,135extensions = o.extensions,136indented_code_classes = o.indented_code_classes,137standalone = o.standalone,138strip_comments = o.strip_comments,139tab_stop = o.tab_stop,140track_changes = o.track_changes,141}142elseif rawequal(o, PANDOC_WRITER_OPTIONS) then143return {144cite_method = o.cite_method,145columns = o.columns,146dpi = o.dpi,147email_obfuscation = o.email_obfuscation,148epub_chapter_level = o.epub_chapter_level,149epub_fonts = o.epub_fonts,150epub_metadata = o.epub_metadata,151epub_subdirectory = o.epub_subdirectory,152extensions = o.extensions,153highlight_style = o.highlight_style,154html_math_method = o.html_math_method,155html_q_tags = o.html_q_tags,156identifier_prefix = o.identifier_prefix,157incremental = o.incremental,158listings = o.listings,159number_offset = o.number_offset,160number_sections = o.number_sections,161prefer_ascii = o.prefer_ascii,162reference_doc = o.reference_doc,163reference_links = o.reference_links,164reference_location = o.reference_location,165section_divs = o.section_divs,166setext_headers = o.setext_headers,167slide_level = o.slide_level,168tab_stop = o.tab_stop,169table_of_contents = o.table_of_contents,170template = o.template,171toc_depth = o.toc_depth,172top_level_division = o.top_level_division,173variables = o.variables,174wrap_text = o.wrap_text175}176end177v = tostring(o)178if string.find(v, "^pandoc CommonState") then179return {180input_files = o.input_files,181output_file = o.output_file,182log = o.log,183request_headers = o.request_headers,184resource_path = o.resource_path,185source_url = o.source_url,186user_data_dir = o.user_data_dir,187trace = o.trace,188verbosity = o.verbosity189}190elseif string.find(v, "^pandoc LogMessage") then191return v192end193return o194end195196-- dump an object to stdout197function dump(o, raw)198199o = asLua(o)200if type(o) == 'table' or type(o) == 'userdata' and o.is_emulated then201tdump(o, raw)202else203print(tostring(o) .. "\n")204end205end206207208-- is the table a simple array?209-- see: https://web.archive.org/web/20140227143701/http://ericjmritz.name/2014/02/26/lua-is_array/210function tisarray(t)211if type(t) ~= "table" then212return false213end214local i = 0215for _ in pairs(t) do216i = i + 1217if t[i] == nil then218return false219end220end221return true222end223224-- does the table contain a value225local function tcontains(t, value)226if t and type(t) == "table" and value then227for _, v in ipairs(t) do228if v == value then229return true230end231end232return false233end234return false235end236237238local function sortedPairs(t, f)239local a = {}240for n in pairs(t) do table.insert(a, n) end241table.sort(a, f)242local i = 0 -- iterator variable243local iter = function() -- iterator function244i = i + 1245if a[i] == nil then return nil246else return a[i], t[a[i]]247end248end249return iter250end251252253local function get_type(v)254local pandoc_type = pandoc.utils.type(v)255if pandoc_type == "Inline" then256if v.t == "Span" and v.attributes.__quarto_custom == "true" then257return "CustomInline"258end259elseif pandoc_type == "Block" then260if v.t == "Div" and v.attributes.__quarto_custom == "true" then261return "CustomBlock"262end263end264return pandoc_type265end266267--- Blocks metatable268local BlocksMT = getmetatable(pandoc.Blocks{})269--- Inlines metatable270local InlinesMT = getmetatable(pandoc.Inlines{})271272--- Turns the given object into a `Inlines` list.273--274-- Works mostly like `pandoc.Inlines`, but doesn't a do a full275-- unmarshal/marshal roundtrip. This buys performance, at the cost of276-- less thorough type checks.277--278-- NOTE: The input object might be modified *destructively*!279local function as_inlines(obj)280local pt = pandoc.utils.type(obj)281if pt == 'Inlines' then282return obj283elseif pt == "Inline" then284-- Faster than calling pandoc.Inlines285return setmetatable({obj}, InlinesMT)286elseif pt == 'List' or pt == 'table' then287if obj[1] and pandoc.utils.type(obj[1]) == 'Block' then288return pandoc.utils.blocks_to_inlines(obj)289end290-- Faster than calling pandoc.Inlines291return setmetatable(obj, InlinesMT)292elseif pt == "Block" then293return pandoc.utils.blocks_to_inlines({obj})294elseif pt == "Blocks" then295return pandoc.utils.blocks_to_inlines(obj)296else297return pandoc.Inlines(obj or {})298end299end300301--- Turns the given object into a `Blocks` list.302--303-- Works mostly like `pandoc.Blocks`, but doesn't a do a full304-- unmarshal/marshal roundtrip. This buys performance, at the cost of305-- less thorough type checks.306--307-- NOTE: The input object might be modified *destructively*!308--309-- This might need some benchmarking.310local function as_blocks(obj)311local pt = pandoc.utils.type(obj)312if pt == 'Blocks' then313return obj314elseif pt == 'Block' then315-- Assigning a metatable directly is faster than calling316-- `pandoc.Blocks`.317return setmetatable({obj}, BlocksMT)318elseif pt == 'Inline' then319return setmetatable({pandoc.Plain{obj}}, BlocksMT)320elseif pt == 'Inlines' then321if next(obj) then322return setmetatable({pandoc.Plain(obj)}, BlocksMT)323end324return setmetatable({}, BlocksMT)325elseif pt == 'List' or (pt == 'table' and obj[1]) then326if pandoc.utils.type(obj[1]) == 'Inline' then327obj = {pandoc.Plain(obj)}328end329return setmetatable(obj, BlocksMT)330elseif (pt == 'table' and obj.long) or pt == 'Caption' then331-- Looks like a Caption332return as_blocks(obj.long)333else334return pandoc.Blocks(obj or {})335end336end337338local function match_fun(reset, ...)339local args = {...}340return function(v)341reset()342for _, f in ipairs(args) do343local r = f(v)344if r == false or r == nil then345return r346end347if r ~= true then348v = r349end350end351return v352end353end354355356-- ## syntax examples357--358-- match("Div")359-- returns the node if it's a Div, otherwise false360-- match("Div/[1]")361-- returns the first child of a Div, otherwise false362-- match(".class")363-- returns the node if it has the class "class", otherwise false364-- match("#id")365-- returns the node if it has the id "id", otherwise false366--367-- match("Div/:child/Para") (in analogy to "div > p" in CSS)368-- returns the div if it has a direct child Para, otherwise false369--370-- match("Div/:descendant/Para") (in analogy to "div p" in CSS)371-- returns the div if it has a direct child Para, otherwise false372--373-- ## Node captures374--375-- match("{Div}/[1]/Para") (capture)376-- returns a list with the div if the first child is a Para, otherwise false377-- match("{Div}/[1]/{Para}/[1]/Img") (capture)378-- returns a list with the div and Para if the first child is a Para whose379-- first child is an Image, otherwise false380--381-- ## custom matchers382--383-- match("Div", function(node) return node.content[1] end)384-- is equivalent to match("Div/[1]")385-- match("Div", function(node) return node.content[1] end, "Para")386-- is equivalent to match("Div/[1]/Para")387--388--389390-- Performance notes: :descendant is implemented with a walk,391-- so it's not very efficient.392--393-- eg :descendant/#id walks the node set394--395-- repeated calls to :descendant in the same match are likely396-- to be quite slow397398-- TODO we probably need to consider recursive reentrancy here399local function match(...)400local result = {}401local captured = false402local captures = {}403local capture_id = function(v) return v end404local capture_add = function(v)405table.insert(captures, v)406return v407end408local function reset()409result = {}410captures = {}411end412413-- canonicalize the arguments into split_args414local args = {...}415local split_args = {}416for _, v in ipairs(args) do417if type(v) == "string" then418local vs = split(v, "/", true)419tappend(split_args, vs)420else421table.insert(split_args, v)422end423end424425local function process_nth_child(n, capture_fun)426table.insert(result, function(node)427if node == nil then428return false429end430local pt = pandoc.utils.type(node)431local content432if pt == "Blocks" or pt == "Inlines" then433content = node434else435content = node.content436end437return content ~= nil and438content[n] and439capture_fun(content[n])440end)441end442443local function report_inner_result(r)444if r == nil or r == false or not captured then445return r446end447-- a table result indicates the child was captured448-- and we might need to return the parent449-- if we're also capturing450if type(r) == "table" then451for _, v in ipairs(r) do452table.insert(captures, v)453end454end455return captures456end457458local function process_child(index)459-- call match recursively, slicing the remaining args460local conf = table.pack(table.unpack(split_args, index))461local inner_match = match(table.unpack(split_args, index))462table.insert(result, function(node)463if node.content == nil then464return nil465end466local r467for _, v in ipairs(node.content) do468r = inner_match(v)469if r ~= nil and r ~= false then470break471end472end473474return report_inner_result(r)475end)476end477478local function process_descendant(index)479local inner_match = match(table.unpack(split_args, index))480table.insert(result, function(node)481local r482local function inner_process(inner_node)483if r ~= nil and r ~= false then484-- we've already found a match, so we can stop485return486end487488r = inner_match(inner_node)489end490_quarto.ast.walk(node, {491Inline = inner_process,492Block = inner_process493})494return report_inner_result(r)495end)496end497498for i, v in ipairs(split_args) do499if type(v) == "string" then500local first = v:sub(1, 1)501local last = v:sub(-1)502local capture_fun = capture_id503if first == "{" then -- capture504v = v:sub(2, -2)505if last ~= "}" then506fail("invalid match token: " .. v .. "(in " .. str .. ")")507return match_fun(reset, {})508end509first = v:sub(1, 1)510capture_fun = capture_add511captured = true512end513-- close over capture_fun in all cases514if v == "" then515-- empty case exists to support {} as a valid parameter,516-- which is useful to capture the result of the previous match when it's a function517table.insert(result, (function(capture_fun)518return function(node)519return capture_fun(node)520end521end)(capture_fun))522elseif v == ":child" then523process_child(i + 1)524break525elseif v == ":descendant" then526process_descendant(i + 1)527break528elseif first == "." then529table.insert(result, (function(capture_fun, v)530return function(node)531return node.classes ~= nil and tcontains(node.classes, v) and capture_fun(node)532end533end)(capture_fun, v:sub(2)))534elseif first == "#" then535table.insert(result, (function(capture_fun, v)536return function(node)537return node.identifier ~= nil and node.identifier == v and capture_fun(node)538end539end)(capture_fun, v:sub(2)))540elseif first == "[" then -- [1]541local n = tonumber(v:sub(2, -2))542process_nth_child(n, capture_fun)543elseif first:upper() == first then -- Plain544table.insert(result, (function(capture_fun, v)545return function(node)546return (is_regular_node(node, v) or is_custom_node(node, v)) and capture_fun(node)547end548end)(capture_fun, v))549else550fail("invalid match token: " .. v .. "(in " .. str .. ")")551return match_fun(reset, {})552end553elseif type(v) == "number" then554process_nth_child(v, capture_id)555elseif type(v) == "function" then556table.insert(result, v)557else558fail("invalid match parameter: " .. tostring(v))559return match_fun(reset, {})560end561end562563if captured then564local function send_capture(v)565if v then566return captures567end568return v569end570table.insert(result, send_capture)571end572return match_fun(reset, table.unpack(result))573end574575--- Returns `true` iff the given AST node is empty.576-- A node is considered "empty" if it's an empty list, table, or a node577-- without any text or nested AST nodes.578local function is_empty_node (node)579if not node then580return true581elseif type(node) == 'table' then582-- tables are considered empty if they don't have any fields.583return not next(node)584elseif node.content then585return not next(node.content)586elseif node.caption then587-- looks like an image, figure, or table588if node.caption.long then589return not next(node.caption.long)590end591return not next(node.caption)592elseif node.text then593-- looks like a code node or text node594return node.text ~= ''595else596-- Not sure what this is, but it's probably not empty.597return false598end599end600601--- Call the node's walk method with the given filters.602-- @param node a pandoc AST node603-- @param filter table with filter functions604local function walk(node, filter)605quarto_assert(node and node.walk)606return node:walk(filter)607end608609return {610dump = dump,611type = get_type,612table = {613isarray = tisarray,614contains = tcontains,615sortedPairs = sortedPairs616},617as_inlines = as_inlines,618as_blocks = as_blocks,619is_empty_node = is_empty_node,620match = match,621walk = walk,622add_to_blocks = function(blocks, block)623if pandoc.utils.type(blocks) ~= "Blocks" then624fatal("add_to_blocks: invalid type " .. pandoc.utils.type(blocks))625end626if block == nil then627return628end629local t = pandoc.utils.type(block)630if t == "Blocks" or t == "Inlines" then631blocks:extend(block)632elseif t == "Block" then633table.insert(blocks, block)634else635fatal("add_to_blocks: invalid type " .. t)636end637end,638}639640641642