Skip to content

Commit

Permalink
Add a separate mode to parse footnotes the same way GitHub does
Browse files Browse the repository at this point in the history
Resolves pulldown-cmark#20

Resolves pulldown-cmark#530

This change is similar to, but a more limited change than,
 <pulldown-cmark#544>. It changes
the syntax, but does not touch the generated HTML or event API.

Motivation
----------

This commit is written with usage in mdBook, rustdoc, and docs.rs
in mind.

* Having a standard to follow, or at least a public test suite in
  [cmark-gfm] [^c], makes it easier to distinguish bugs from features.
* It makes sense to commit to following GitHub's behavior specifically,
  because mdBook chapters and docs.rs README files are often viewed in
  GitHub preview windows, so any divergence will be very annoying.
* If mdBook and docs.rs are going to use this syntax, then rustdoc
  should, too.
* Having both footnote syntaxes use the same API and rendering makes it
  more feasible for rustdoc to change the syntax over an [edition].
  To introduce a syntax change in a new edition of Rust, we must make
  rustdoc warn anyone who writes code that will have its meaning change.
  To do it, run the parser twice in lockstep (with `ENABLE_FOOTNOTES`
  on one parser, and `ENABLE_GFM_FOOTNOTES` on the other), and warn if
  they diverge.
  * Alternatively, run a Crater build with this same code to check if
    this actually causes widespread breakage.
* In particular, using tree rewriting to push the footnotes to the end
  is not as useful as it sounds, since that's not enough to exactly
  copy the way GitHub renders footnotes. To do that, you also need to
  sort the footnotes by the order in which they are *referenced*, not
  the order in which they are defined. This type of tree rewriting is
  also a waste of time if you want "margin note" rendering instead of
  putting them all at the end.

[cmark-gfm]: https://github.com/github/cmark-gfm/blob/1e230827a584ebc9938c3eadc5059c55ef3c9abf/test/extensions.txt#L702
[edition]: https://doc.rust-lang.org/edition-guide/editions/index.html

[^c]: cmark-gfm is under the MIT license, so incorporating parts of its
    test suite into pulldown-cmark should be fine.
  • Loading branch information
notriddle committed Jun 1, 2023
1 parent 4921c42 commit e3dc6cf
Show file tree
Hide file tree
Showing 20 changed files with 1,808 additions and 866 deletions.
12 changes: 8 additions & 4 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ fn generate_tests_from_spec() {

let spec_name = file_path.file_stem().unwrap().to_str().unwrap();

let spec = Spec::new(&raw_spec);
let spec = Spec::new(&raw_spec, spec_name.starts_with("gfm_"));
let mut n_tests = 0;

spec_rs
Expand All @@ -86,7 +86,7 @@ fn {}_test_{i}() {{
let original = r##"{original}"##;
let expected = r##"{expected}"##;
test_markdown_html(original, expected, {smart_punct}, {metadata_blocks});
test_markdown_html(original, expected, {smart_punct}, {metadata_blocks}, {is_gfm});
}}
"###,
spec_name,
Expand All @@ -95,6 +95,7 @@ fn {}_test_{i}() {{
expected = testcase.expected,
smart_punct = testcase.smart_punct,
metadata_blocks = testcase.metadata_blocks,
is_gfm = testcase.is_gfm,
))
.unwrap();

Expand Down Expand Up @@ -134,12 +135,13 @@ fn {}_test_{i}() {{
#[cfg(feature = "gen-tests")]
pub struct Spec<'a> {
spec: &'a str,
is_gfm: bool,
}

#[cfg(feature = "gen-tests")]
impl<'a> Spec<'a> {
pub fn new(spec: &'a str) -> Self {
Spec { spec }
pub fn new(spec: &'a str, is_gfm: bool) -> Self {
Spec { spec, is_gfm }
}
}

Expand All @@ -149,6 +151,7 @@ pub struct TestCase {
pub expected: String,
pub smart_punct: bool,
pub metadata_blocks: bool,
pub is_gfm: bool,
}

#[cfg(feature = "gen-tests")]
Expand Down Expand Up @@ -190,6 +193,7 @@ impl<'a> Iterator for Spec<'a> {
let test_case = TestCase {
original: spec[i_start..i_end].to_string().replace("→", "\t"),
expected: spec[i_end + 2..e_end].to_string().replace("→", "\t"),
is_gfm: self.is_gfm,
smart_punct,
metadata_blocks,
};
Expand Down
153 changes: 153 additions & 0 deletions examples/footnote-rewrite.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
use std::io::Write as _;
use std::fmt::Write as _;
use std::collections::HashMap;

use pulldown_cmark::{html, Event, Options, Parser, Tag, TagEnd, CowStr};

/// This example shows how to do footnotes as bottom-notes, in the style of GitHub.
fn main() {
let markdown_input: &str = "This is an [^a] footnote [^a].\n\n[^a]: footnote contents";
println!("Parsing the following markdown string:\n{}", markdown_input);

// To generate this style, you have to collect the footnotes at the end, while parsing.
// You also need to count usages.
let mut footnotes = Vec::new();
let mut in_footnote = Vec::new();
let mut footnote_numbers = HashMap::new();
// ENABLE_GFM_FOOTNOTES is used in this example, but ENABLE_FOOTNOTES would work, too.
let parser = Parser::new_ext(markdown_input, Options::ENABLE_GFM_FOOTNOTES)
.filter_map(|event| {
match event {
Event::Start(Tag::FootnoteDefinition(_)) => {
in_footnote.push(vec![event]);
None
}
Event::End(TagEnd::FootnoteDefinition) => {
let mut f = in_footnote.pop().unwrap();
f.push(event);
footnotes.push(f);
None
}
Event::FootnoteReference(name) => {
let n = footnote_numbers.len() + 1;
let (n, nr) = footnote_numbers.entry(name.clone()).or_insert((n, 0usize));
*nr += 1;
let html = Event::Html(format!(r##"<sup class="footnote-reference" id="fr-{name}-{nr}"><a href="#fn-{name}">[{n}]</a></sup>"##).into());
if in_footnote.len() == 0 {
Some(html)
} else {
in_footnote.last_mut().unwrap().push(html);
None
}
}
_ if in_footnote.len() != 0 => {
in_footnote.last_mut().unwrap().push(event);
None
}
_ => Some(event),
}
});

// Write to anything implementing the `Write` trait. This could also be a file
// or network socket.
let stdout = std::io::stdout();
let mut handle = stdout.lock();
handle.write_all(b"\nHTML output:\n").unwrap();
html::write_html(&mut handle, parser).unwrap();

// To make the footnotes look right, we need to sort them by their appearance order, not by
// the in-tree order of their actual definitions. Unused items are omitted entirely.
//
// For example, this code:
//
// test [^1] [^2]
// [^2]: second used, first defined
// [^1]: test
//
// Gets rendered like *this* if you copy it into a GitHub comment box:
//
// <p>test <sup>[1]</sup> <sup>[2]</sup></p>
// <hr>
// <ol>
// <li>test ↩</li>
// <li>second used, first defined ↩</li>
// </ol>
if footnotes.len() != 0 {
footnotes.retain(|f| match f.first() {
Some(Event::Start(Tag::FootnoteDefinition(name))) => footnote_numbers.get(name).unwrap_or(&(0, 0)).1 != 0,
_ => false,
});
footnotes.sort_by_cached_key(|f| match f.first() {
Some(Event::Start(Tag::FootnoteDefinition(name))) => footnote_numbers.get(name).unwrap_or(&(0, 0)).0,
_ => unreachable!(),
});
handle.write_all(b"<hr><ol class=\"footnotes-list\">\n").unwrap();
html::write_html(&mut handle, footnotes.into_iter().flat_map(|fl| {
// To write backrefs, the name needs kept until the end of the footnote definition.
let mut name = CowStr::from("");
// Backrefs are included in the final paragraph of the footnote, if it's normal text.
// For example, this DOM can be produced:
//
// Markdown:
//
// five [^feet].
//
// [^feet]:
// A foot is defined, in this case, as 0.3048 m.
//
// Historically, the foot has not been defined this way, corresponding to many
// subtly different units depending on the location.
//
// HTML:
//
// <p>five <sup class="footnote-reference" id="fr-feet-1"><a href="#fn-feet">[1]</a></sup>.</p>
//
// <ol class="footnotes-list">
// <li id="fn-feet">
// <p>A foot is defined, in this case, as 0.3048 m.</p>
// <p>Historically, the foot has not been defined this way, corresponding to many
// subtly different units depending on the location. <a href="#fr-feet-1">↩</a></p>
// </li>
// </ol>
//
// This is mostly a visual hack, so that footnotes use less vertical space.
//
// If there is no final paragraph, such as a tabular, list, or image footnote, it gets
// pushed after the last tag instead.
let mut has_written_backrefs = false;
let fl_len = fl.len();
let footnote_numbers = &footnote_numbers;
fl.into_iter().enumerate().map(move |(i, f)| {
match f {
Event::Start(Tag::FootnoteDefinition(current_name)) => {
name = current_name;
has_written_backrefs = false;
Event::Html(format!(r##"<li id="fn-{name}">"##).into())
}
Event::End(TagEnd::FootnoteDefinition) | Event::End(TagEnd::Paragraph) if !has_written_backrefs && i >= fl_len - 2 => {
let usage_count = footnote_numbers.get(&name).unwrap().1;
let mut end = String::with_capacity(name.len() + (r##" <a href="#fr--1">↩</a></li>"##.len() * usage_count));
for usage in 1 ..= usage_count {
if usage == 1 {
write!(&mut end, r##" <a href="#fr-{name}-{usage}">↩</a>"##).unwrap();
} else {
write!(&mut end, r##" <a href="#fr-{name}-{usage}">↩{usage}</a>"##).unwrap();
}
}
has_written_backrefs = true;
if f == Event::End(TagEnd::FootnoteDefinition) {
end.push_str("</li>\n");
} else {
end.push_str("</p>\n");
}
Event::Html(end.into())
}
Event::End(TagEnd::FootnoteDefinition) => Event::Html("</li>\n".into()),
Event::FootnoteReference(_) => unreachable!("converted to HTML earlier"),
f => f,
}
})
})).unwrap();
handle.write_all(b"</ol>\n").unwrap();
}
}
Loading

0 comments on commit e3dc6cf

Please sign in to comment.