Path: blob/main/src/resources/pandoc/datadir/lpegshortcode.lua
12922 views
-- LPEG parsing and handling for shortcodes1-- Copyright (C) 2020-2024 Posit Software, PBC23local lpeg = require('lpeg')45local unshortcode67local function escape(s, quote)8quote = quote or '"'9local result = s:gsub("\\", "\\\\"):gsub(quote, "\\" .. quote)10return result11end1213local function unescape(s, quote)14quote = quote or '"'15local result = s:gsub("\\" .. quote, quote):gsub("\\\\", "\\")16return result17end1819local id = function(s) return s end2021local function trim_end(s)22local result = string.gsub(s, "%s*$", "")23return result24end2526-- lpeg helpers27local Space = lpeg.S(" \n\t")^028local Space1 = lpeg.S(" \n\t")^12930local function untilS(s)31return lpeg.C((1 - lpeg.P(s))^0) * lpeg.P(s)32end3334local function into_list(pattern)35return lpeg.Cf(lpeg.Ct("") * pattern, function(list, value)36table.insert(list, value)37return list38end)39end4041local function into_string(pattern)42return lpeg.Cf(lpeg.Ct("") * pattern, function(list, value)43table.insert(list, value)44return list45end) / table.concat46end4748-- constants49local quarto_shortcode_class_prefix = "quarto-shortcode__"5051-- evaluators52local function md_escaped_shortcode(s)53-- escaped shortcodes bring in whitespace54return "[]{." .. quarto_shortcode_class_prefix .. "-escaped data-is-shortcode=\"1\" data-value=\"" .. escape("{{<" .. s .. ">}}") .. "\"}"55end5657local function into_dataset_value(s)58if s:sub(1, 1) == "'" then59value = escape(unescape(s:sub(2, -2), "'"), '"')60elseif s:sub(1, 1) == "\"" then61value = escape(unescape(s:sub(2, -2), '"'), '"')62else63value = s64end65return value66end6768local function md_string_param(s)69local value = into_dataset_value(s)70local result = "[]{." .. quarto_shortcode_class_prefix .. "-param data-is-shortcode=\"1\" data-value=\"" .. value .. "\" data-raw=\"" .. escape(trim_end(s)) .. "\"}"71return result72end7374local function md_keyvalue_param(k, connective, v)75local recursive_key = false76local recursive_value = false7778if k:sub(1, 1) == "[" then79recursive_key = true80end81if v:sub(1, 1) == "[" then82recursive_value = true83end84if recursive_key then85if recursive_value then86return "[" .. k .. v .. "]{." .. quarto_shortcode_class_prefix .. "-param data-is-shortcode=\"1\"}"87else88return "[" .. k .. "]{." .. quarto_shortcode_class_prefix .. "-param data-is-shortcode=\"1\" data-value=\"" .. into_dataset_value(v) .. "\"}"89end90else91if recursive_value then92return "[" .. v .. "]{." .. quarto_shortcode_class_prefix .. "-param data-is-shortcode=\"1\" data-key=\"" .. into_dataset_value(k) .. "\"}"93else94raw = k .. connective .. v95return "[]{." .. quarto_shortcode_class_prefix .. "-param data-is-shortcode=\"1\" data-raw=\"" .. escape(raw) .. "\" data-key=\"" .. into_dataset_value(k) .. "\"" .. " data-value=\"" .. into_dataset_value(v) .. "\"}"96end97end98end99100local function md_shortcode(open, space, lst, close)101local shortcode = {"["}102103for i = 1, #lst do104table.insert(shortcode, lst[i])105end106table.insert(shortcode, "]{.")107table.insert(shortcode, quarto_shortcode_class_prefix)108table.insert(shortcode, " data-is-shortcode=\"1\"")109local raw = open .. space110for i = 1, #lst do111local un = unshortcode:match(lst[i])112raw = raw .. (un or lst[i])113end114raw = raw .. close115table.insert(shortcode, " data-raw=\"")116table.insert(shortcode, escape(raw))117table.insert(shortcode, "\"")118table.insert(shortcode, "}")119return table.concat(shortcode, "")120end121122local double_quoted_string = into_string(lpeg.C("\"") * lpeg.C((1 - lpeg.P("\""))^0) * lpeg.C("\""))123local single_quoted_string = into_string(lpeg.C("'") * lpeg.C((1 - lpeg.P("'"))^0) * lpeg.C("'"))124local sc_string = (125double_quoted_string * Space +126single_quoted_string * Space +127(- lpeg.S("'\"}>") * lpeg.C((1 - lpeg.S(" \n\t"))^1) * Space)128) / id129130local sc_string_no_space = (131double_quoted_string +132single_quoted_string +133(- lpeg.S("'\"}>") * lpeg.C((1 - lpeg.S(" \n\t"))^1))134) / id135136local function make_shortcode_parser(evaluator_table)137local escaped_handler = evaluator_table.escaped138local string_handler = evaluator_table.string139local keyvalue_handler = evaluator_table.keyvalue140local shortcode_handler = evaluator_table.shortcode141142-- rules143local escaped_sc1 = lpeg.P("{{{<") * untilS(">}}}") / escaped_handler144local escaped_sc2 = lpeg.P("{{</*") * untilS("*/>}}") / escaped_handler145146local function sc_string_skipping(skip, capture)147if type(skip) == "string" then148skip = lpeg.P(skip)149end150return (into_string(double_quoted_string) +151into_string(single_quoted_string) +152(- lpeg.S("'\"}>") * lpeg.C(((1 - skip) - lpeg.S(" \n\t"))^1))) / (capture or string_handler) -- function(s) return { type = "string", value = s } end153end154155-- skip :/? as well so that URLs with = in them are not treated as key/value pairs156local sc_keyvalue = (sc_string_skipping(lpeg.S(":/?="), id) * lpeg.C(Space * lpeg.P("=") * Space) * sc_string_no_space) / keyvalue_handler157158local text159if evaluator_table.ignore_pattern then160text = (evaluator_table.ignore_pattern / id +161lpeg.V("Nonshortcode") +162lpeg.V("Shortcode"))^1163else164text = (lpeg.V("Nonshortcode") +165lpeg.V("Shortcode"))^1166end167local sc = lpeg.P({168"Text",169Text = into_string(text),170Nonshortcode = (1 - lpeg.P("{{{<") - lpeg.P("{{<")) / id,171KeyShortcodeValue = (sc_string_skipping(lpeg.S(":/?="), id) * Space * lpeg.P("=") * Space * lpeg.V("Shortcode")) / keyvalue_handler,172Shortcode = escaped_sc1 +173escaped_sc2 +174((lpeg.C(lpeg.P("{{<")) *175lpeg.C(Space) *176into_list(177(lpeg.V("Shortcode") +178lpeg.V("KeyShortcodeValue") +179sc_keyvalue +180(Space1 / id) +181(sc_string_skipping(">}}") * (Space / id))182)^1183) *184lpeg.C(Space * lpeg.P(">}}"))) / shortcode_handler) * (Space / id)185})186187return sc188end189190md_shortcode = make_shortcode_parser({191escaped = md_escaped_shortcode,192string = md_string_param,193keyvalue = md_keyvalue_param,194shortcode = md_shortcode,195196ignore_pattern = lpeg.P("{.hidden .quarto-markdown-envelope-contents render-id=\"") * (lpeg.P(1) - lpeg.P("\"}"))^1 * lpeg.P("\"}")197})198199local escaped_string = into_string(200(lpeg.P("\"") *201((lpeg.P("\\\\") +202lpeg.P("\\\"") +203(1 - lpeg.P("\""))) ^ 0) * lpeg.P("\"")) / function(s)204return s:gsub("\\\"", "\""):gsub("\\\\", "\\"):sub(2, -2)205end)206207-- local unshortcode = lpeg.P("[]{.quarto-shortcode__-param data-raw=\"") * (lpeg.P("value") / id) * lpeg.P("\"}")208unshortcode = lpeg.P({209"Text",210Text = into_string((lpeg.V("Shortcodespan") + lpeg.P(1) / id)^1),211Nonshortcode = (1 - lpeg.P("["))^1 / id,212Shortcodekeyvalue = (lpeg.P("[]{.quarto-shortcode__-param data-is-shortcode=\"1\" data-raw=") * escaped_string * Space * lpeg.P("data-key=") * escaped_string * Space * lpeg.P("data-value=") * escaped_string * lpeg.P("}")) /213function(r, k, v) return r end,214Shortcodestring = (lpeg.P("[]{.quarto-shortcode__-param data-is-shortcode=\"1\" data-value=") * escaped_string * Space * lpeg.P("data-raw=") * escaped_string * lpeg.P("}")) /215function(v, r) return r end,216-- Shortcodekeyvalue =217Shortcodeescaped = lpeg.P("[]{.quarto-shortcode__-escaped data-is-shortcode=\"1\" data-value=") *218(escaped_string / function(s) return "{" .. unescape(s) .. "}" end) *219lpeg.P("}"),220Shortcodespan = lpeg.V"Shortcodeescaped" + lpeg.V"Shortcodekeyvalue" + lpeg.V"Shortcodestring" +221(lpeg.P("[") * (lpeg.V("Shortcodespan") * Space)^0 * (lpeg.P("]{.quarto-shortcode__ data-is-shortcode=\"1\"") * Space * lpeg.P("data-raw=") * escaped_string * Space * lpeg.P("}"))) / function(...)222local args = {...}223return args[#args]224end225})226227local function fail_at_line(msg)228local info = debug.getinfo(3, "Sl")229print(info.source .. ":" .. tostring(info.currentline) .. ": " .. msg)230os.exit(1)231end232233local function expect_equals(v1, v2)234if v1 ~= v2 then235fail_at_line("Expected " .. v1 .. " to equal " .. v2)236end237end238local function expect_match(pattern, str)239if not pattern:match(str) then240fail_at_line("Expected " .. str .. " to match " .. tostring(pattern))241end242end243local function expect_no_match(pattern, str)244if pattern:match(str) then245fail_at_line("Expected " .. str .. " to not match " .. tostring(pattern))246end247end248249if os.getenv("LUA_TESTING") ~= nil then250expect_match(single_quoted_string, "'asdf'")251expect_no_match(single_quoted_string, "\"asdf\"")252expect_match(double_quoted_string, "\"asdf\"")253expect_no_match(double_quoted_string, "'asdf'")254expect_match(sc_string, "\"asdf\"")255expect_match(sc_string, "'asdf'")256expect_match(sc_string, "asdf }}>")257expect_equals(sc_string:match("asdf }}>"), "asdf")258259local unshortcode_tests = {260'{{{< meta >}}}',261"{{< meta 'foo' >}}",262"{{< meta \"foo\" >}}",263"{{< meta bar >}}",264"{{< meta bar >}} {{< meta bar >}}",265"{{< meta bar >}}",266"{{< meta foo = bar >}}",267"{{< meta\n foo = bar >}}",268"{{< meta foo = 'bar' >}}",269'{{< meta foo = "bar" >}}',270"{{< kbd Shift-Ctrl-Q mac=Shift-Command-Q win=Shift-Control-Q linux=Shift-Ctrl-Q >}}",271"{{< meta k1=v1 k2=v2 >}}",272"{{< kbd Shift-Ctrl-Q mac=Shift-Command-Q win=Shift-Control-Q >}}",273'{{< video https://youtu.be/wo9vZccmqwc width="400" height="300" >}}',274}275for i, v in ipairs(unshortcode_tests) do276expect_equals(unshortcode:match(md_shortcode:match(v)), v)277end278279print("Tests passed")280end281282-- replace multi-character code points with an escaped version283-- that contains an UUID that we can use to restore the original284-- without worrying about collisions from user code that uses285-- the same escape syntax286local function escape_unicode(txt)287local result = {}288for _, c in utf8.codes(txt) do289if c > 127 then290table.insert(result, string.format("cf5733e5-0370-4aae-8689-61bad1dd9ec0&#x%x;", c))291else292table.insert(result, utf8.char(c))293end294end295return table.concat(result, "")296end297298-- replace escaped code points with their unescaped version299local function unescape_unicode(txt)300return txt:gsub("cf5733e5%-0370%-4aae%-8689%-61bad1dd9ec0&#x([0-9a-fA-F]+);", function (c)301return utf8.char(tonumber(c, 16))302end)303end304305local function wrap_lpeg_match(pattern, txt)306txt = escape_unicode(txt)307txt = pattern:match(txt)308if txt == nil then309return nil310end311txt = unescape_unicode(txt)312return txt313end314315-- Convert a string to its hexadecimal representation316local function string_to_hex(str)317return (str:gsub('.', function(c)318return string.format('%02X', string.byte(c))319end))320end321322local md_shortcode_2_uuid = "b58fc729-690b-4000-b19f-365a4093b2ff"323local md_shortcode_2_uuid_pattern = "b58fc729%-690b%-4000%-b19f%-365a4093b2ff;"324local function md_escaped_shortcode_2_fun(s)325return table.concat({326md_shortcode_2_uuid,327";",328string_to_hex("{{{<" .. s .. ">}}}"),329";"330})331end332333local function md_shortcode_2_fun(open, space, lst, close)334local raw = open .. space335for i = 1, #lst do336local un = unshortcode:match(lst[i])337raw = raw .. (un or lst[i])338end339raw = raw .. close340return table.concat({341md_shortcode_2_uuid,342";",343string_to_hex(raw),344";"345});346end347348-- This new transformation into a plain UUID-guarded string,349-- is designed to survive the pandoc markdown reader barrier under Pandoc 3.7 and later.350-- we still need the first shortcode transformation to actually convert351-- to a span when it's safe to do so, but this transformation352-- is safe to use in all contexts (including link and image targets).353local md_shortcode_2 = make_shortcode_parser({354escaped = md_escaped_shortcode_2_fun,355string = md_string_param,356keyvalue = md_keyvalue_param,357shortcode = md_shortcode_2_fun,358ignore_pattern = lpeg.P("{.hidden .quarto-markdown-envelope-contents render-id=\"") * (lpeg.P(1) - lpeg.P("\"}"))^1 * lpeg.P("\"}")359})360361return {362lpegs = {363md_shortcode = md_shortcode,364md_shortcode_2 = md_shortcode_2,365md_shortcode_2_uuid = md_shortcode_2_uuid_pattern,366unshortcode = unshortcode -- for undoing shortcodes in non-markdown contexts367},368369parse_md_shortcode_2 = function(txt)370return wrap_lpeg_match(md_shortcode_2, txt)371end,372373parse_md_shortcode = function(txt)374return wrap_lpeg_match(md_shortcode, txt)375end,376377-- use this to undo shortcode parsing in non-markdown contexts378unparse_md_shortcode = function(txt)379return wrap_lpeg_match(unshortcode, txt)380end,381382make_shortcode_parser = make_shortcode_parser,383384-- use this to safely call an lpeg pattern with a string385-- that contains multi-byte code points386wrap_lpeg_match = wrap_lpeg_match387}388389390