Skip to content

Commit

Permalink
handle unknown markdown
Browse files Browse the repository at this point in the history
I have two spots in the code that I wasn't able to produce cases for. I
suspect these can't happen, but just in case, I'm going to put basically
an overridable panic of sorts. By default, hitting either of these cases
will result in an error; but if you provide the hidden
`--allow-unknown-markdown` flag, we'll just ignore them.

I can't test this (if I knew how to hit these branches, I wouldn't need
it!), but I manually added a `lookups.unknown_markdown("testing")?;` in
`MdElem::read` to verify that (a) it correctly errors out without the
flag, and (b) it correctly succeeds with the flag.

As part of this, I added better handling for the Err case of
`MdElem::read`.

Also, replace a TODO with a reference to #84.

This PR removes all the TODOs for #11. Once it gets merged, I'll check
the last checkbox on that ticket and manually close it.
  • Loading branch information
yshavit authored Jul 17, 2024
1 parent 7ea559f commit 3ff304d
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 24 deletions.
4 changes: 4 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ pub struct Cli {
/// Quiet: do not print anything to stdout. The exit code will still be 0 if-and-only-iff any elements match.
#[arg(long, short)]
pub(crate) quiet: bool,

// See: tree.rs > Lookups::unknown_markdown.
#[arg(long, hide = true)]
pub(crate) allow_unknown_markdown: bool,
}

impl Cli {
Expand Down
12 changes: 11 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,17 @@ where
W: Write,
{
let ast = markdown::to_mdast(&contents, &markdown::ParseOptions::gfm()).unwrap();
let mdqs = MdElem::read(ast, &ReadOptions::default()).unwrap();
let read_options = ReadOptions {
validate_no_conflicting_links: false,
allow_unknown_markdown: cli.allow_unknown_markdown,
};
let mdqs = match MdElem::read(ast, &read_options) {
Ok(mdqs) => mdqs,
Err(err) => {
eprintln!("error: {}", err);
return false;
}
};

let selectors_str = cli.selector_string();
let selectors = match MdqRefSelector::parse(&selectors_str) {
Expand Down
2 changes: 1 addition & 1 deletion src/select/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ impl MdqRefSelector {
children.iter().map(|child| MdElemRef::Inline(child)).collect()
}
Inline::Footnote(footnote) => vec![MdElemRef::Doc(&footnote.text)],
Inline::Link(link) => vec![MdElemRef::Link(link)], // TODO find a test case that hits this to make sure it doesn't infinite-loop!
Inline::Link(link) => vec![MdElemRef::Link(link)], // issue #84: find a test case that hits this to make sure it doesn't infinite-loop!
Inline::Image(image) => vec![MdElemRef::Image(image)],
Inline::Text(Text { .. }) => Vec::new(),
},
Expand Down
118 changes: 96 additions & 22 deletions src/tree.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::backtrace::Backtrace;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fmt::Debug;
use std::fmt::{Debug, Display, Formatter, Write};
use std::hash::{Hash, Hasher};
use std::vec::IntoIter;

Expand Down Expand Up @@ -88,14 +89,8 @@ pub struct ReadOptions {
/// [1]: https://example.com/one
/// ```
pub validate_no_conflicting_links: bool,
}

impl Default for ReadOptions {
fn default() -> Self {
Self {
validate_no_conflicting_links: false,
}
}
pub allow_unknown_markdown: bool,
}

pub type TableRow = Vec<Line>;
Expand Down Expand Up @@ -200,10 +195,57 @@ pub enum InvalidMd {
Unsupported(mdast::Node),
NonListItemDirectlyUnderList(mdast::Node),
NonRowDirectlyUnderTable(mdast::Node),
NonInlineWhereInlineExpected,
NonInlineWhereInlineExpected(MdElem),
MissingReferenceDefinition(String),
ConflictingReferenceDefinition(String),
InternalError,
InternalError(PartialEqBacktrace),
UnknownMarkdown(&'static str),
}

/// A wrapper for [Backtrace] that implements [PartialEq] to always return `true`. This lets us use it in a struct
/// while still letting us use `#[derive(PartialEq)]`
#[derive(Debug)]
pub struct PartialEqBacktrace(Backtrace);

impl PartialEq for PartialEqBacktrace {
fn eq(&self, _other: &Self) -> bool {
true
}
}

impl Display for InvalidMd {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
InvalidMd::Unsupported(node) => {
write!(f, "unsupported node: {:?}", node)
}
InvalidMd::NonListItemDirectlyUnderList(node) => {
write!(f, "expected a list item, but found: {:?}", node)
}
InvalidMd::NonRowDirectlyUnderTable(node) => {
write!(f, "expected a row, but found: {:?}", node)
}
InvalidMd::NonInlineWhereInlineExpected(node) => {
write!(f, "expected an inline element, but found: {:?}", node)
}
InvalidMd::MissingReferenceDefinition(id) => {
write!(f, "couldn't find definition for link/image/footnote: {}", id)
}
InvalidMd::ConflictingReferenceDefinition(id) => {
write!(f, "found multiple definitions for link/image/footnote: {}", id)
}
InvalidMd::InternalError(backtrace) => {
f.write_str("internal error\n")?;
std::fmt::Display::fmt(&backtrace.0, f)
}
InvalidMd::UnknownMarkdown(description) => {
write!(f, "encountered unknown markdown: {}\n\n", description)?;
f.write_str("* Please consider reporting this at https://github.com/yshavit/mdq/issues\n")?;
f.write_str("* You can suppress this error by using --allow-unknown-markdown.")
}
}?;
f.write_char('\n')
}
}

#[derive(Debug, PartialEq, Hash)]
Expand Down Expand Up @@ -316,7 +358,7 @@ impl MdElem {
})),
mdast::Node::ImageReference(node) => MdElem::Inline(Inline::Image(Image {
alt: node.alt,
link: lookups.resolve_link(node.identifier, node.label, node.reference_kind)?,
link: lookups.resolve_link(node.identifier, node.label, node.reference_kind, lookups)?,
})),
mdast::Node::Link(node) => MdElem::Inline(Inline::Link(Link {
text: MdElem::inlines(node.children, lookups)?,
Expand All @@ -328,10 +370,10 @@ impl MdElem {
})),
mdast::Node::LinkReference(node) => MdElem::Inline(Inline::Link(Link {
text: MdElem::inlines(node.children, lookups)?,
link_definition: lookups.resolve_link(node.identifier, node.label, node.reference_kind)?,
link_definition: lookups.resolve_link(node.identifier, node.label, node.reference_kind, lookups)?,
})),
mdast::Node::FootnoteReference(node) => {
let definition = lookups.resolve_footnote(&node.identifier, &node.label)?;
let definition = lookups.resolve_footnote(&node.identifier, &node.label, lookups)?;
MdElem::Inline(Inline::Footnote(Footnote {
label: node.label.unwrap_or(node.identifier),
text: MdElem::all(definition.children.clone(), lookups)?,
Expand Down Expand Up @@ -383,7 +425,7 @@ impl MdElem {
let mut column = Vec::with_capacity(cell_nodes.len());
for cell_node in cell_nodes {
let mdast::Node::TableCell(table_cell) = cell_node else {
return Err(InvalidMd::InternalError);
return Err(InvalidMd::InternalError(PartialEqBacktrace(Backtrace::force_capture())));
};
let cell_contents = Self::inlines(table_cell.children, lookups)?;
column.push(cell_contents);
Expand All @@ -397,7 +439,8 @@ impl MdElem {
}
mdast::Node::ThematicBreak(_) => m_node!(MdElem::ThematicBreak),
mdast::Node::TableRow(_) | mdast::Node::TableCell(_) | mdast::Node::ListItem(_) => {
return Err(InvalidMd::InternalError); // should have been handled by Node::Table
// should have been handled by Node::Table
return Err(InvalidMd::InternalError(PartialEqBacktrace(Backtrace::force_capture())));
}
mdast::Node::Definition(_) => return Ok(Vec::new()),
mdast::Node::Paragraph(node) => m_node!(MdElem::Paragraph {
Expand Down Expand Up @@ -532,7 +575,7 @@ impl MdElem {
let mut result = Vec::with_capacity(mdq_children.len());
for child in mdq_children {
let MdElem::Inline(inline) = child else {
return Err(InvalidMd::NonInlineWhereInlineExpected);
return Err(InvalidMd::NonInlineWhereInlineExpected(child));
};
// If both this and the previous were plain text, then just combine the texts. This can happen if there was
// a Node::Break between them.
Expand Down Expand Up @@ -595,6 +638,7 @@ where
struct Lookups {
link_definitions: HashMap<String, mdast::Definition>,
footnote_definitions: HashMap<String, mdast::FootnoteDefinition>,
allow_unknown_markdown: bool,
}

impl Lookups {
Expand All @@ -604,21 +648,31 @@ impl Lookups {
let mut result = Self {
link_definitions: HashMap::with_capacity(DEFAULT_CAPACITY),
footnote_definitions: HashMap::with_capacity(DEFAULT_CAPACITY),
allow_unknown_markdown: read_opts.allow_unknown_markdown,
};

result.build_lookups(node, &read_opts)?;

Ok(result)
}

fn unknown_markdown(&self, description: &'static str) -> Result<(), InvalidMd> {
if self.allow_unknown_markdown {
Ok(())
} else {
Err(InvalidMd::UnknownMarkdown(description))
}
}

fn resolve_link(
&self,
identifier: String,
label: Option<String>,
reference_kind: mdast::ReferenceKind,
lookups: &Lookups,
) -> Result<LinkDefinition, InvalidMd> {
if let None = label {
todo!("What is this case???");
lookups.unknown_markdown("link label was None")?;
}
let Some(definition) = self.link_definitions.get(&identifier) else {
let human_visible_identifier = label.unwrap_or(identifier);
Expand All @@ -641,9 +695,10 @@ impl Lookups {
&self,
identifier: &String,
label: &Option<String>,
lookups: &Lookups,
) -> Result<&mdast::FootnoteDefinition, InvalidMd> {
if label.is_none() {
todo!("What is this case???");
lookups.unknown_markdown("footnote label was None")?;
}
let Some(definition) = self.footnote_definitions.get(identifier) else {
let human_visible_identifier = label.to_owned().unwrap_or_else(|| identifier.to_string());
Expand Down Expand Up @@ -836,7 +891,7 @@ mod tests {

check!(&root.children[0], Node::List(ul), lookups => m_node!(MdElem::List{starting_index, items}) = {
for child in &ul.children {
check!(error: child, Node::ListItem(_), lookups => InvalidMd::InternalError);
check!(error: child, Node::ListItem(_), lookups => internal_error());
}
assert_eq!(starting_index, None);
assert_eq!(items, vec![
Expand All @@ -856,7 +911,7 @@ mod tests {
});
check!(&root.children[1], Node::List(ol), lookups => m_node!(MdElem::List{starting_index, items}) = {
for child in &ol.children {
check!(error: child, Node::ListItem(_), lookups => InvalidMd::InternalError);
check!(error: child, Node::ListItem(_), lookups => internal_error());
}
assert_eq!(starting_index, Some(4));
assert_eq!(items, vec![
Expand Down Expand Up @@ -1681,9 +1736,9 @@ mod tests {
);
// Do a spot check for the rows and cells; mainly just so that we'll have called check! on them.
assert_eq!(table_node.children.len(), 2); // two rows
check!(error: &table_node.children[0], Node::TableRow(tr), lookups => InvalidMd::InternalError, {
check!(error: &table_node.children[0], Node::TableRow(tr), lookups => internal_error(), {
assert_eq!(tr.children.len(), 4); // four columns
check!(error: &tr.children[0], Node::TableCell(_), lookups => InvalidMd::InternalError);
check!(error: &tr.children[0], Node::TableCell(_), lookups => internal_error());
})
});
}
Expand Down Expand Up @@ -1750,6 +1805,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links: true,
allow_unknown_markdown: false,
},
indoc! {r#"
Hello [world][1]
Expand All @@ -1774,6 +1830,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links: true,
allow_unknown_markdown: false,
},
indoc! {r#"
This [looks like a link][^1], but mdast parses it as a footnote.
Expand Down Expand Up @@ -1804,6 +1861,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links: true,
allow_unknown_markdown: false,
},
md,
);
Expand All @@ -1822,6 +1880,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links: true,
allow_unknown_markdown: false,
},
md,
);
Expand All @@ -1839,6 +1898,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links: true,
allow_unknown_markdown: false,
},
indoc! {r#"
This [link is duplicated][1].
Expand All @@ -1859,6 +1919,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links,
allow_unknown_markdown: false,
},
indoc! {r#"
This [link is duplicated][1].
Expand Down Expand Up @@ -2203,4 +2264,17 @@ mod tests {
nodes.iter().for_each(|n| build(&mut s, n));
s
}

fn internal_error() -> InvalidMd {
InvalidMd::InternalError(PartialEqBacktrace(Backtrace::force_capture()))
}

impl Default for ReadOptions {
fn default() -> Self {
Self {
validate_no_conflicting_links: false,
allow_unknown_markdown: false,
}
}
}
}

0 comments on commit 3ff304d

Please sign in to comment.