Skip to content

Commit

Permalink
Rollup merge of rust-lang#115947 - GuillaumeGomez:custom_code_classes…
Browse files Browse the repository at this point in the history
…_in_docs-warning, r=notriddle

Custom code classes in docs warning

Fixes rust-lang#115938.

This PR does two things:
 1. Unless the `custom_code_classes_in_docs` feature is enabled, it will use the old codeblock tag parser.
 2. If there is a codeblock tag that starts with a `.`, it will emit a behaviour change warning.

Hopefully this is the last missing part for this feature until stabilization.

Follow-up of rust-lang#110800.

r? `@notriddle`
  • Loading branch information
GuillaumeGomez authored Sep 19, 2023
2 parents 0060db7 + 295ec09 commit 52a0d13
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 143 deletions.
275 changes: 162 additions & 113 deletions src/librustdoc/html/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ impl<'tcx> ExtraInfo<'tcx> {

#[derive(Eq, PartialEq, Clone, Debug)]
pub(crate) struct LangString {
original: String,
pub(crate) original: String,
pub(crate) should_panic: bool,
pub(crate) no_run: bool,
pub(crate) ignore: Ignore,
Expand All @@ -893,11 +893,13 @@ pub(crate) enum Ignore {
/// ```eBNF
/// lang-string = *(token-list / delimited-attribute-list / comment)
///
/// bareword = CHAR *(CHAR)
/// bareword = LEADINGCHAR *(CHAR)
/// bareword-without-leading-char = CHAR *(CHAR)
/// quoted-string = QUOTE *(NONQUOTE) QUOTE
/// token = bareword / quoted-string
/// token-without-leading-char = bareword-without-leading-char / quoted-string
/// sep = COMMA/WS *(COMMA/WS)
/// attribute = (DOT token)/(token EQUAL token)
/// attribute = (DOT token)/(token EQUAL token-without-leading-char)
/// attribute-list = [sep] attribute *(sep attribute) [sep]
/// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET
/// token-list = [sep] token *(sep token) [sep]
Expand All @@ -907,8 +909,15 @@ pub(crate) enum Ignore {
/// CLOSE_PARENT = ")"
/// OPEN-CURLY-BRACKET = "{"
/// CLOSE-CURLY-BRACKET = "}"
/// CHAR = ALPHA / DIGIT / "_" / "-" / ":"
/// QUOTE = %x22
/// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":"
/// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces.
/// ; Comma is used to separate language tokens, so it can't be used in one.
/// ; Quote is used to allow otherwise-disallowed characters in language tokens.
/// ; Equals is used to make key=value pairs in attribute blocks.
/// ; Backslash and grave are special Markdown characters.
/// ; Braces are used to start an attribute block.
/// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" |
/// ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~"
/// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"`
/// COMMA = ","
/// DOT = "."
Expand All @@ -932,9 +941,12 @@ pub(crate) enum LangStringToken<'a> {
KeyValueAttribute(&'a str, &'a str),
}

fn is_bareword_char(c: char) -> bool {
fn is_leading_char(c: char) -> bool {
c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
}
fn is_bareword_char(c: char) -> bool {
is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c)
}
fn is_separator(c: char) -> bool {
c == ' ' || c == ',' || c == '\t'
}
Expand Down Expand Up @@ -1077,7 +1089,7 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
return self.next();
} else if c == '.' {
return self.parse_class(pos);
} else if c == '"' || is_bareword_char(c) {
} else if c == '"' || is_leading_char(c) {
return self.parse_key_value(c, pos);
} else {
self.emit_error(format!("unexpected character `{c}`"));
Expand Down Expand Up @@ -1107,16 +1119,18 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
return None;
}
let indices = self.parse_string(pos)?;
if let Some((_, c)) = self.inner.peek().copied() && c != '{' && !is_separator(c) && c != '(' {
if let Some((_, c)) = self.inner.peek().copied() &&
c != '{' &&
!is_separator(c) &&
c != '('
{
self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
return None;
}
return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
} else if c == '{' {
self.is_in_attribute_block = true;
return self.next();
} else if is_bareword_char(c) {
continue;
} else if is_separator(c) {
if pos != start {
return Some(LangStringToken::LangToken(&self.data[start..pos]));
Expand All @@ -1130,6 +1144,10 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
return Some(LangStringToken::LangToken(&self.data[start..pos]));
}
return self.next();
} else if pos == start && is_leading_char(c) {
continue;
} else if pos != start && is_bareword_char(c) {
continue;
} else {
self.emit_error(format!("unexpected character `{c}`"));
return None;
Expand Down Expand Up @@ -1158,6 +1176,29 @@ impl<'a, 'tcx> Iterator for TagIterator<'a, 'tcx> {
}
}

fn tokens(string: &str) -> impl Iterator<Item = LangStringToken<'_>> {
// Pandoc, which Rust once used for generating documentation,
// expects lang strings to be surrounded by `{}` and for each token
// to be proceeded by a `.`. Since some of these lang strings are still
// loose in the wild, we strip a pair of surrounding `{}` from the lang
// string and a leading `.` from each token.

let string = string.trim();

let first = string.chars().next();
let last = string.chars().last();

let string =
if first == Some('{') && last == Some('}') { &string[1..string.len() - 1] } else { string };

string
.split(|c| c == ',' || c == ' ' || c == '\t')
.map(str::trim)
.map(|token| token.strip_prefix('.').unwrap_or(token))
.filter(|token| !token.is_empty())
.map(|token| LangStringToken::LangToken(token))
}

impl Default for LangString {
fn default() -> Self {
Self {
Expand Down Expand Up @@ -1208,122 +1249,130 @@ impl LangString {

data.original = string.to_owned();

for token in TagIterator::new(string, extra) {
match token {
LangStringToken::LangToken("should_panic") => {
data.should_panic = true;
seen_rust_tags = !seen_other_tags;
}
LangStringToken::LangToken("no_run") => {
data.no_run = true;
seen_rust_tags = !seen_other_tags;
}
LangStringToken::LangToken("ignore") => {
data.ignore = Ignore::All;
seen_rust_tags = !seen_other_tags;
}
LangStringToken::LangToken(x) if x.starts_with("ignore-") => {
if enable_per_target_ignores {
ignores.push(x.trim_start_matches("ignore-").to_owned());
let mut call = |tokens: &mut dyn Iterator<Item = LangStringToken<'_>>| {
for token in tokens {
match token {
LangStringToken::LangToken("should_panic") => {
data.should_panic = true;
seen_rust_tags = !seen_other_tags;
}
}
LangStringToken::LangToken("rust") => {
data.rust = true;
seen_rust_tags = true;
}
LangStringToken::LangToken("custom") => {
if custom_code_classes_in_docs {
seen_custom_tag = true;
} else {
seen_other_tags = true;
LangStringToken::LangToken("no_run") => {
data.no_run = true;
seen_rust_tags = !seen_other_tags;
}
}
LangStringToken::LangToken("test_harness") => {
data.test_harness = true;
seen_rust_tags = !seen_other_tags || seen_rust_tags;
}
LangStringToken::LangToken("compile_fail") => {
data.compile_fail = true;
seen_rust_tags = !seen_other_tags || seen_rust_tags;
data.no_run = true;
}
LangStringToken::LangToken(x) if x.starts_with("edition") => {
data.edition = x[7..].parse::<Edition>().ok();
}
LangStringToken::LangToken(x)
if allow_error_code_check && x.starts_with('E') && x.len() == 5 =>
{
if x[1..].parse::<u32>().is_ok() {
data.error_codes.push(x.to_owned());
LangStringToken::LangToken("ignore") => {
data.ignore = Ignore::All;
seen_rust_tags = !seen_other_tags;
}
LangStringToken::LangToken(x) if x.starts_with("ignore-") => {
if enable_per_target_ignores {
ignores.push(x.trim_start_matches("ignore-").to_owned());
seen_rust_tags = !seen_other_tags;
}
}
LangStringToken::LangToken("rust") => {
data.rust = true;
seen_rust_tags = true;
}
LangStringToken::LangToken("custom") => {
if custom_code_classes_in_docs {
seen_custom_tag = true;
} else {
seen_other_tags = true;
}
}
LangStringToken::LangToken("test_harness") => {
data.test_harness = true;
seen_rust_tags = !seen_other_tags || seen_rust_tags;
} else {
seen_other_tags = true;
}
}
LangStringToken::LangToken(x) if extra.is_some() => {
let s = x.to_lowercase();
if let Some((flag, help)) = if s == "compile-fail"
|| s == "compile_fail"
|| s == "compilefail"
LangStringToken::LangToken("compile_fail") => {
data.compile_fail = true;
seen_rust_tags = !seen_other_tags || seen_rust_tags;
data.no_run = true;
}
LangStringToken::LangToken(x) if x.starts_with("edition") => {
data.edition = x[7..].parse::<Edition>().ok();
}
LangStringToken::LangToken(x)
if allow_error_code_check && x.starts_with('E') && x.len() == 5 =>
{
Some((
"compile_fail",
"the code block will either not be tested if not marked as a rust one \
or won't fail if it compiles successfully",
))
} else if s == "should-panic" || s == "should_panic" || s == "shouldpanic" {
Some((
"should_panic",
"the code block will either not be tested if not marked as a rust one \
or won't fail if it doesn't panic when running",
))
} else if s == "no-run" || s == "no_run" || s == "norun" {
Some((
"no_run",
"the code block will either not be tested if not marked as a rust one \
or will be run (which you might not want)",
))
} else if s == "test-harness" || s == "test_harness" || s == "testharness" {
Some((
"test_harness",
"the code block will either not be tested if not marked as a rust one \
or the code will be wrapped inside a main function",
))
} else {
None
} {
if let Some(extra) = extra {
extra.error_invalid_codeblock_attr_with_help(
format!("unknown attribute `{x}`. Did you mean `{flag}`?"),
help,
);
if x[1..].parse::<u32>().is_ok() {
data.error_codes.push(x.to_owned());
seen_rust_tags = !seen_other_tags || seen_rust_tags;
} else {
seen_other_tags = true;
}
}
seen_other_tags = true;
data.unknown.push(x.to_owned());
}
LangStringToken::LangToken(x) => {
seen_other_tags = true;
data.unknown.push(x.to_owned());
}
LangStringToken::KeyValueAttribute(key, value) => {
if custom_code_classes_in_docs {
if key == "class" {
data.added_classes.push(value.to_owned());
} else if let Some(extra) = extra {
extra.error_invalid_codeblock_attr(format!(
"unsupported attribute `{key}`"
));
LangStringToken::LangToken(x) if extra.is_some() => {
let s = x.to_lowercase();
if let Some((flag, help)) = if s == "compile-fail"
|| s == "compile_fail"
|| s == "compilefail"
{
Some((
"compile_fail",
"the code block will either not be tested if not marked as a rust one \
or won't fail if it compiles successfully",
))
} else if s == "should-panic" || s == "should_panic" || s == "shouldpanic" {
Some((
"should_panic",
"the code block will either not be tested if not marked as a rust one \
or won't fail if it doesn't panic when running",
))
} else if s == "no-run" || s == "no_run" || s == "norun" {
Some((
"no_run",
"the code block will either not be tested if not marked as a rust one \
or will be run (which you might not want)",
))
} else if s == "test-harness" || s == "test_harness" || s == "testharness" {
Some((
"test_harness",
"the code block will either not be tested if not marked as a rust one \
or the code will be wrapped inside a main function",
))
} else {
None
} {
if let Some(extra) = extra {
extra.error_invalid_codeblock_attr_with_help(
format!("unknown attribute `{x}`. Did you mean `{flag}`?"),
help,
);
}
}
} else {
seen_other_tags = true;
data.unknown.push(x.to_owned());
}
LangStringToken::LangToken(x) => {
seen_other_tags = true;
data.unknown.push(x.to_owned());
}
LangStringToken::KeyValueAttribute(key, value) => {
if custom_code_classes_in_docs {
if key == "class" {
data.added_classes.push(value.to_owned());
} else if let Some(extra) = extra {
extra.error_invalid_codeblock_attr(format!(
"unsupported attribute `{key}`"
));
}
} else {
seen_other_tags = true;
}
}
LangStringToken::ClassAttribute(class) => {
data.added_classes.push(class.to_owned());
}
}
LangStringToken::ClassAttribute(class) => {
data.added_classes.push(class.to_owned());
}
}
};

if custom_code_classes_in_docs {
call(&mut TagIterator::new(string, extra).into_iter())
} else {
call(&mut tokens(string))
}

// ignore-foo overrides ignore
Expand Down
21 changes: 18 additions & 3 deletions src/librustdoc/html/markdown/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,13 +226,28 @@ fn test_lang_string_parse() {
..Default::default()
});
// error
t(LangString { original: "{.first.second}".into(), rust: true, ..Default::default() });
t(LangString {
original: "{.first.second}".into(),
rust: true,
added_classes: vec!["first.second".into()],
..Default::default()
});
// error
t(LangString { original: "{class=first=second}".into(), rust: true, ..Default::default() });
// error
t(LangString { original: "{class=first.second}".into(), rust: true, ..Default::default() });
t(LangString {
original: "{class=first.second}".into(),
rust: true,
added_classes: vec!["first.second".into()],
..Default::default()
});
// error
t(LangString { original: "{class=.first}".into(), rust: true, ..Default::default() });
t(LangString {
original: "{class=.first}".into(),
added_classes: vec![".first".into()],
rust: true,
..Default::default()
});
t(LangString {
original: r#"{class="first"}"#.into(),
added_classes: vec!["first".into()],
Expand Down
Loading

0 comments on commit 52a0d13

Please sign in to comment.