Skip to content

Commit

Permalink
fix: correctly hide lines in Rust code blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
max-heller committed Jan 17, 2025
1 parent 9a95722 commit ef0facd
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 48 deletions.
21 changes: 21 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,14 @@ hello[^1]
# fn main() {
# // another hidden line
println!("Hello, world!");
#foo
# foo
##foo
## foo
# # foo
#[test]
#![test]
#
# }
```
"#;
Expand All @@ -967,6 +975,11 @@ println!("Hello, world!");
├─ markdown/book.md
│ ``` rust
│ println!("Hello, world!");
│ #foo
│ #foo
│ # foo
│ #[test]
│ #![test]
│ ```
"#);
let book = MDBook::init()
Expand All @@ -988,6 +1001,14 @@ println!("Hello, world!");
│ # fn main() {
│ # // another hidden line
│ println!("Hello, world!");
│ #foo
│ # foo
│ ##foo
│ ## foo
│ # # foo
│ #[test]
│ #![test]
│ #
│ # }
│ ```
"#);
Expand Down
2 changes: 2 additions & 0 deletions src/preprocess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ use walkdir::WalkDir;

use crate::pandoc::{self, native::ColWidth, OutputFormat, RenderContext};

mod code;

pub mod tree;
use tree::{Element, MdElement, Node, QualNameExt, TreeBuilder};

Expand Down
133 changes: 133 additions & 0 deletions src/preprocess/code.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
use std::{borrow::Cow, iter, str};

use pulldown_cmark::CodeBlockKind;

use crate::CodeConfig;

pub enum CodeBlock<'book> {
Rust,
Other {
language: Option<&'book str>,
hidelines_prefix: Option<&'book str>,
},
}

impl<'book> CodeBlock<'book> {
pub fn new(kind: &'book CodeBlockKind<'_>, cfg: Option<&'book mdbook::config::Code>) -> Self {
// MdBook supports custom attributes in code block info strings.
// Attributes are separated by a comma, space, or tab from the language name.
// See https://rust-lang.github.io/mdBook/format/mdbook.html#rust-code-block-attributes
// This processes and strips out the attributes.
let (language, mut attributes) = {
let info_string = match kind {
CodeBlockKind::Indented => "",
CodeBlockKind::Fenced(info_string) => info_string,
};
let mut parts = info_string.split([',', ' ', '\t']).map(|part| part.trim());
(parts.next(), parts)
};

match language {
Some("rust") => Self::Rust,
language => {
let hidelines_override =
attributes.find_map(|attr| attr.strip_prefix("hidelines="));
let hidelines_prefix = hidelines_override.or_else(|| {
let language = language?;
// Respect [output.html.code.hidelines]
Some(cfg?.hidelines.get(language)?.as_str())
});
Self::Other {
language,
hidelines_prefix,
}
}
}
}
}

impl CodeBlock<'_> {
pub fn language(&self) -> Option<&str> {
match self {
Self::Rust => Some("rust"),
Self::Other { language, .. } => *language,
}
}

pub fn lines<'code>(
&self,
code: impl Iterator<Item = &'code str>,
cfg: &CodeConfig,
) -> Vec<Cow<'code, str>> {
/// Like [`str::Lines`] but yields [""] on ""
enum Lines<'a> {
One(iter::Once<&'a str>),
Lines(str::Lines<'a>),
}

impl<'a> Iterator for Lines<'a> {
type Item = &'a str;

fn next(&mut self) -> Option<Self::Item> {
match self {
Self::One(one) => one.next(),
Self::Lines(lines) => lines.next(),
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
match self {
Self::One(one) => one.size_hint(),
Self::Lines(lines) => lines.size_hint(),
}
}
}

let lines = code.flat_map(|code| {
if code.is_empty() {
Lines::One(iter::once(code))
} else {
Lines::Lines(code.lines())
}
});

// https://rust-lang.github.io/mdBook/format/mdbook.html#hiding-code-lines
if cfg.show_hidden_lines {
lines.map(Cow::Borrowed).collect()
} else {
match self {
Self::Rust => lines.filter_map(Self::displayed_rust_line).collect(),
Self::Other {
hidelines_prefix, ..
} => {
if let Some(prefix) = hidelines_prefix {
lines
.filter(|line| !line.trim_start().starts_with(prefix))
.map(Cow::Borrowed)
.collect::<Vec<_>>()
} else {
lines.map(Cow::Borrowed).collect()
}
}
}
}
}

fn displayed_rust_line(line: &str) -> Option<Cow<'_, str>> {
let Some(start) = line.find(|c: char| !c.is_whitespace()) else {
return Some(line.into());
};
let (whitespace, trimmed) = line.split_at(start);
match trimmed.strip_prefix('#') {
None => Some(line.into()),
Some(rest) => match rest.chars().next() {
// Two consecutive hashes override line hiding
// https://doc.rust-lang.org/rustdoc/write-documentation/documentation-tests.html#hiding-portions-of-the-example
Some('#') => Some(format!("{whitespace}{rest}").into()),
Some('!' | '[') => Some(line.into()),
Some(' ') | None => None,
Some(_) => Some(line.into()),
},
}
}
}
65 changes: 17 additions & 48 deletions src/preprocess/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use html5ever::{
tendril::{fmt::UTF8, format_tendril, Tendril, TendrilSink},
LocalName,
};
use pulldown_cmark::{CodeBlockKind, CowStr, LinkType};
use pulldown_cmark::{CowStr, LinkType};

use crate::{html, latex, pandoc, preprocess::UnresolvableRemoteImage};

Expand All @@ -23,6 +23,8 @@ pub use node::{Attributes, Element, MdElement, Node, QualNameExt};
mod sink;
pub use sink::HtmlTreeSink;

use super::code;

#[derive(Debug)]
pub struct Tree<'book> {
errors: Vec<Cow<'static, str>>,
Expand Down Expand Up @@ -362,63 +364,29 @@ impl<'book> Emitter<'book> {
inlines.serialize_element()?.serialize_code((), s)
}),
MdElement::CodeBlock(kind) => {
// MdBook supports custom attributes in code block info strings.
// Attributes are separated by a comma, space, or tab from the language name.
// See https://rust-lang.github.io/mdBook/format/mdbook.html#rust-code-block-attributes
// This processes and strips out the attributes.
let (language, mut attributes) = {
let info_string = match kind {
CodeBlockKind::Indented => "",
CodeBlockKind::Fenced(info_string) => info_string,
};
let mut parts = info_string.split([',', ' ', '\t']).map(|part| part.trim());
(parts.next(), parts)
};
let ctx = &serializer.preprocessor().preprocessor.ctx;

// https://rust-lang.github.io/mdBook/format/mdbook.html?highlight=hide#hiding-code-lines
let hide_lines = !serializer
.preprocessor()
.preprocessor
.ctx
.code
.show_hidden_lines;
let hidden_line_prefix = hide_lines
.then(|| {
let hidelines_override =
attributes.find_map(|attr| attr.strip_prefix("hidelines="));
hidelines_override.or_else(|| {
let lang = language?;
// Respect [output.html.code.hidelines]
let html = serializer.preprocessor().preprocessor.ctx.html;
html.and_then(|html| Some(html.code.hidelines.get(lang)?.as_str()))
.or((lang == "rust").then_some("#"))
})
})
.flatten();
let code_block = code::CodeBlock::new(kind, ctx.html.map(|cfg| &cfg.code));

let texts = node.children().map(|node| {
let lines = node.children().map(|node| {
match node.value() {
Node::Element(Element::Markdown(MdElement::Text(text))) => text,
event => panic!("Code blocks should contain only literal text, but encountered {event:?}"),
}
});
let lines = texts
.flat_map(|text| text.lines())
.filter(|line| {
hidden_line_prefix
.map_or(true, |prefix| !line.trim_start().starts_with(prefix))
})
.collect::<Vec<_>>();
}).flat_map(|text| text.lines());
let lines = code_block.lines(lines, ctx.code);

// Pandoc+fvextra only wraps long lines in code blocks with info strings
// so fall back to "text"
let language = language.unwrap_or("text");
let mut language = code_block.language();

if let pandoc::OutputFormat::Latex { .. } =
serializer.preprocessor().preprocessor.ctx.output
{
const CODE_BLOCK_LINE_LENGTH_LIMIT: usize = 1000;

// Pandoc+fvextra only wraps long lines in code blocks with info strings
// so fall back to "text"
language = language.or(Some("text"));

let overly_long_line = lines
.iter()
.any(|line| line.len() > CODE_BLOCK_LINE_LENGTH_LIMIT);
Expand All @@ -438,7 +406,7 @@ impl<'book> Emitter<'book> {
let ac = AhoCorasick::new(patterns).unwrap();
lines
.into_iter()
.map(move |line| ac.replace_all(line, replace_with))
.map(move |line| ac.replace_all(&line, replace_with))
};
return serializer
.blocks()?
Expand All @@ -454,13 +422,14 @@ impl<'book> Emitter<'book> {
}
}

let classes = [CowStr::Borrowed(language)];
let language = language.map(CowStr::Borrowed);
let classes = language.as_slice();
serializer
.blocks()?
.serialize_element()?
.serialize_code_block((None, &classes, &[]), |code| {
for line in lines {
code.serialize_code(line)?;
code.serialize_code(&line)?;
code.serialize_code("\n")?;
}
Ok(())
Expand Down

0 comments on commit ef0facd

Please sign in to comment.