Skip to content

Commit

Permalink
Merge pull request #10592 from quarto-dev/feature/improve-fenced-divs
Browse files Browse the repository at this point in the history
Feature/improve fenced divs
  • Loading branch information
cscheid authored Aug 29, 2024
2 parents 9ce9fcc + fb60b45 commit 522b595
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 3 deletions.
4 changes: 3 additions & 1 deletion news/changelog-1.6.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,7 @@ All changes included in 1.6:
- ([#10295](https://github.com/quarto-dev/quarto-cli/issues/10235)): Fix regression to return error status to shell when `CommandError` is thrown.
- ([#10332](https://github.com/quarto-dev/quarto-cli/issues/10332)): Use `exitWithCleanup` whenever possible instead of `Deno.exit` to clean up temporary resources.
- ([#10334](https://github.com/quarto-dev/quarto-cli/issues/10334)): Fix `author` field rendered incorrectly in dashboards when multiple authors are present.
- ([#8383](https://github.com/quarto-dev/quarto-cli/issues/8383)), ([#10087](https://github.com/quarto-dev/quarto-cli/issues/10087)), ([#10369](https://github.coma/quarto-dev/quarto-cli/issues/10369)): Track theme generation and file naming through content hashing to allow different themes to coexist in the same project.
- ([#10552](https://github.com/quarto-dev/quarto-cli/issues/10552)): Add `contents` shortcode.
- ([#8383](https://github.com/quarto-dev/quarto-cli/issues/8383)), ([#10087](https://github.com/quarto-dev/quarto-cli/issues/10087)), ([#10369](https://github.com/quarto-dev/quarto-cli/issues/10369)): Track theme generation and file naming through content hashing to allow different themes to coexist in the same project.
- ([#10581](https://github.com/quarto-dev/quarto-cli/issues/10581)): Add `.landscape` div processing to `typst`, `docx` and `pdf` formats to support pages in landscape orientation.
- ([#10591](https://github.com/quarto-dev/quarto-cli/issues/10591)): Make fenced div syntax slightly more robust by removing spaces around the `=` sign ahead of Pandoc's reader.
20 changes: 20 additions & 0 deletions src/resources/filters/normalize/astpipeline.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,25 @@
-- Copyright (C) 2023 Posit Software, PBC

function quarto_ast_pipeline()
local function warn_on_stray_triple_colons()
local function block_handler(block)
_quarto.ast.walk(block, {
Str = function(el)
if string.match(el.text, ":::(:*)") then
local error_message =
"\nThe following string was found in the document: " .. el.text ..
"\nThis string was found in a block element with the following content:\n\n" .. pandoc.utils.stringify(block) ..
"\n\nThis usually indicates a problem with a fenced div in the document. Please check the document for errors."
warn(error_message)
end
end
})
end
return {
Para = block_handler,
Plain = block_handler,
}
end
return {
{ name = "normalize-table-merge-raw-html", filter = table_merge_raw_html() },

Expand All @@ -15,6 +34,7 @@ function quarto_ast_pipeline()
parse_extended_nodes(),
code_filename(),
normalize_fixup_data_uri_image_extension(),
warn_on_stray_triple_colons(),
})
},
{
Expand Down
135 changes: 135 additions & 0 deletions src/resources/pandoc/datadir/lpegfenceddiv.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
-- LPEG "parsing" and code for fenced div workarounds
-- Copyright (C) 2024 Posit Software, PBC

local lpeg = require('lpeg')
local colons = lpeg.P(':')^3
local maybe_spaces = lpeg.S("\t ")^0
local newline = lpeg.P("\n")

local single_quoted_string = lpeg.C(lpeg.P("'") * (lpeg.P("\\'") + (lpeg.P(1) - lpeg.P("'")))^0 * lpeg.P("'"))
local double_quoted_string = lpeg.C(lpeg.P('"') * (lpeg.P('\\"') + (lpeg.P(1) - lpeg.P('"')))^0 * lpeg.P('"'))
local literal = lpeg.C(
(lpeg.R("az", "AZ") + lpeg.S("_#.=")) *
(lpeg.R("az", "AZ", "09") + lpeg.S(".=-_"))^0
)
local Cp = lpeg.Cp()

local function anywhere(p)
return lpeg.P{ p + 1 * lpeg.V(1) }
end
local function anywhere_pos(p)
return lpeg.P{ Cp * p * Cp + 1 * lpeg.V(1) }
end

local div_attr_block = lpeg.P("{") * maybe_spaces * ((single_quoted_string + double_quoted_string + literal) * maybe_spaces)^0 * lpeg.P("}")

local start_div = colons * maybe_spaces * div_attr_block * (newline + lpeg.P(-1))
local start_div_search = anywhere_pos(start_div)

local function first_and_last(...)
local arg = {...}
local n = #arg
return arg[1], arg[n]
end

local single_quote_p = anywhere(lpeg.P("'"))
local double_quote_p = anywhere(lpeg.P('"'))
local bad_equals = anywhere_pos(lpeg.P("= ") + (lpeg.P(" =") * lpeg.P(" ")^-1))

local function attempt_to_fix_fenced_div(txt)
local b, e = first_and_last(start_div_search:match(txt))
while b do
local substring = txt:sub(b, e - 1)
local function count(txt, p, b)
local result = 0
if not b then
b = 1
end
while b do
b = p:match(txt, b)
if b then
result = result + 1
end
end
return result
end
-- now we try to find the dangerous `=` with spaces around it
-- the best heuristic we have at the moment is to look for a ` = `, `= ` or ` =`
-- and then attempt to rule out that the `=` is part of a quoted string
-- if `=` is not part of a quoted string, then we'll have an even number of single and double quotes
-- to the left and right of the `=`
-- if there's a total odd number of quotes, then this is a badly formatted key-value pair
-- for a _different_ reason, so we do nothing

local bad_eq, bad_eq_end = bad_equals:match(substring)
if bad_eq then
local total_single = count(substring, single_quote_p)
local total_double = count(substring, double_quote_p)
local right_single = count(substring, single_quote_p, bad_eq_end)
local right_double = count(substring, double_quote_p, bad_eq_end)
local left_single = total_single - right_single
local left_double = total_double - right_double
if left_single % 2 == 0 and right_single % 2 == 0 and left_double % 2 == 0 and right_double % 2 == 0 then
-- we have a bad key-value pair
-- we need to replace the `=` with _no spaces_
local replacement = substring:sub(1, bad_eq - 1) .. "=" .. substring:sub(bad_eq_end)
local pad_length = #replacement - #substring

-- in order to keep the string length the same, we need add spaces to the end of the block
txt = txt:sub(1, b - 1) .. replacement .. txt:sub(e) .. (" "):rep(pad_length)

-- if substitution was made, we need to search at the beginning again
-- to find the next bad key-value pair in the same block
b, e = first_and_last(start_div_search:match(txt, b))
else
b, e = first_and_last(start_div_search:match(txt, e))
end
else
b, e = first_and_last(start_div_search:match(txt, e))
end
end
return txt
end

---------------------------------------------------

local div_attr_block_tests = {
"{#id .class key='value'}",
"{#id .class key=value}",
'{#id .class key="value with spaces"}',
}

local div_block_tests = {
"::: {#id .class key='value'}",
"::: {#id .class key=value}",
'::: {#id .class key="value with spaces"}',
}
local end_to_end_tests = {
"::: {#id-1 .class key =value}\nfoo\n:::\n\n::: {#id-2 .class key='value'}\nfoo\n:::\n",
"::: {#id-1 .class key = value}\nfoo\n:::\n\n::: {#id-2 .class key='value'}\nfoo\n:::\n",
"::: {#id-1 .class key= value}\nfoo\n:::\n\n::: {#id-2 .class key='value'}\nfoo\n:::\n",
"::: {#id-1 .class key =value}\nfoo\n:::\n\n::: {#id-2 .class key= 'value'}\nfoo\n:::\n",
"::: {#id-1 .class key = value}\nfoo\n:::\n\n::: {#id-2 .class key = 'value'}\nfoo\n:::\n",
"::: {#id-1 .class key= value}\nfoo\n:::\n\n::: {#id-2 .class key ='value'}\nfoo\n:::\n",
"::: {#id-1 .class key= value please='do not touch = this one'}\nfoo\n:::",
"::: {#id-1 .class key= value key2 =value2}\nfoo\n:::",
"::: {#id-4 key = value}\nfoo\n:::",
}

local function tests()
for _, test in ipairs(div_attr_block_tests) do
print(div_attr_block:match(test))
end
for _, test in ipairs(div_block_tests) do
print(start_div_search:match(test))
end
for _, test in ipairs(end_to_end_tests) do
print(attempt_to_fix_fenced_div(test))
print("---")
end
end

return {
_tests = tests,
attempt_to_fix_fenced_div = attempt_to_fix_fenced_div
}
2 changes: 1 addition & 1 deletion src/resources/pandoc/datadir/lpegshortcode.lua
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- LPEG parsing and handling for shortcodes
-- Copyright (C) 2020-2023 Posit Software, PBC
-- Copyright (C) 2020-2024 Posit Software, PBC

local lpeg = require('lpeg')

Expand Down
4 changes: 3 additions & 1 deletion src/resources/pandoc/datadir/readqmd.lua
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
-- read qmd with quarto syntax extensions and produce quarto's extended AST
-- Copyright (C) 2023 Posit Software, PBC
-- Copyright (C) 2023-2024 Posit Software, PBC
--
-- Originally by Albert Krewinkel

local md_shortcode = require("lpegshortcode")
local md_fenced_div = require("lpegfenceddiv")

-- Support the same format extensions as pandoc's Markdown reader
Extensions = pandoc.format.extensions 'markdown'
Expand Down Expand Up @@ -125,6 +126,7 @@ local function urldecode(url)
end

local function readqmd(txt, opts)
txt = md_fenced_div.attempt_to_fix_fenced_div(txt)
txt, tags = escape_invalid_tags(txt)
txt = md_shortcode.parse_md_shortcode(txt)
local flavor = {
Expand Down
40 changes: 40 additions & 0 deletions tests/docs/smoke-all/fenced-div-reader-fixes/test-1.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
format: html
_quarto:
tests:
html:
ensureHtmlElements:
-
- "div#id"
- "div#id-2"
- "div#id-3"
- "div#id-4"
- "div#id-5"
- 'div#id-6[data-key2="key = value"]'
---

::: {#id key= value}
Some content inside a malformed Pandoc div that we decided to fix ourselves.
:::

::: {#id-2 key = value}
Some content inside a malformed Pandoc div that we decided to fix ourselves.
:::

::: {#id-3 key =value}
Some content inside a malformed Pandoc div that we decided to fix ourselves.
:::

::: {#id-4 key = value}
Some content inside a malformed Pandoc div that we decided to fix ourselves.
This case works but is obnoxiously inefficient, because our code eats one whitespace character at a time.
I'm not sure it's worth fixing.
:::

::: {#id-5 key = value key2 = value2}
Some content inside a malformed Pandoc div that we decided to fix ourselves.
:::

::: {#id-6 key2='key = value'}
An equals sign that we shouldn't touch.
:::

0 comments on commit 522b595

Please sign in to comment.