Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rustdoc: overhaul code block lexing errors #56884

Merged
merged 2 commits into from
Jan 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 55 additions & 37 deletions src/librustdoc/html/highlight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,40 +25,51 @@ pub fn render_with_highlighting(
tooltip: Option<(&str, &str)>,
) -> String {
debug!("highlighting: ================\n{}\n==============", src);
let sess = parse::ParseSess::new(FilePathMapping::empty());
let fm = sess.source_map().new_source_file(FileName::Custom("stdin".to_string()),
src.to_string());

let mut out = Vec::new();
if let Some((tooltip, class)) = tooltip {
write!(out, "<div class='information'><div class='tooltip {}'>ⓘ<span \
class='tooltiptext'>{}</span></div></div>",
class, tooltip).unwrap();
}
write_header(class, &mut out).unwrap();

let lexer = match lexer::StringReader::new_without_err(&sess, fm, None, "Output from rustc:") {
Ok(l) => l,
Err(_) => {
let first_line = src.lines().next().unwrap_or_else(|| "");
let mut err = sess.span_diagnostic
.struct_warn(&format!("Invalid doc comment starting with: `{}`\n\
(Ignoring this codeblock)",
first_line));
err.emit();
return String::new();

let sess = parse::ParseSess::new(FilePathMapping::empty());
let fm = sess.source_map().new_source_file(
FileName::Custom(String::from("rustdoc-highlighting")),
src.to_owned(),
);
let highlight_result =
lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| {
let mut classifier = Classifier::new(lexer, sess.source_map());

let mut highlighted_source = vec![];
if classifier.write_source(&mut highlighted_source).is_err() {
Err(classifier.lexer.buffer_fatal_errors())
} else {
Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
}
});

match highlight_result {
Ok(highlighted_source) => {
write_header(class, &mut out).unwrap();
write!(out, "{}", highlighted_source).unwrap();
if let Some(extension) = extension {
write!(out, "{}", extension).unwrap();
}
write_footer(&mut out).unwrap();
}
};
let mut classifier = Classifier::new(lexer, sess.source_map());
if classifier.write_source(&mut out).is_err() {
classifier.lexer.emit_fatal_errors();
return format!("<pre>{}</pre>", src);
}
Err(errors) => {
// If errors are encountered while trying to highlight, cancel the errors and just emit
// the unhighlighted source. The errors will have already been reported in the
// `check-code-block-syntax` pass.
for mut error in errors {
error.cancel();
}

if let Some(extension) = extension {
write!(out, "{}", extension).unwrap();
write!(out, "<pre><code>{}</code></pre>", src).unwrap();
}
}
write_footer(&mut out).unwrap();

String::from_utf8_lossy(&out[..]).into_owned()
}

Expand Down Expand Up @@ -151,6 +162,17 @@ impl<U: Write> Writer for U {
}
}

enum HighlightError {
LexError,
IoError(io::Error),
}

impl From<io::Error> for HighlightError {
fn from(err: io::Error) -> Self {
HighlightError::IoError(err)
}
}

impl<'a> Classifier<'a> {
fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> {
Classifier {
Expand All @@ -162,17 +184,11 @@ impl<'a> Classifier<'a> {
}
}

/// Gets the next token out of the lexer, emitting fatal errors if lexing fails.
fn try_next_token(&mut self) -> io::Result<TokenAndSpan> {
/// Gets the next token out of the lexer.
fn try_next_token(&mut self) -> Result<TokenAndSpan, HighlightError> {
match self.lexer.try_next_token() {
Ok(tas) => Ok(tas),
Err(_) => {
let mut err = self.lexer.sess.span_diagnostic
.struct_warn("Backing out of syntax highlighting");
err.note("You probably did not intend to render this as a rust code-block");
err.emit();
Err(io::Error::new(io::ErrorKind::Other, ""))
}
Err(_) => Err(HighlightError::LexError),
}
}

Expand All @@ -185,7 +201,7 @@ impl<'a> Classifier<'a> {
/// source.
fn write_source<W: Writer>(&mut self,
out: &mut W)
-> io::Result<()> {
-> Result<(), HighlightError> {
loop {
let next = self.try_next_token()?;
if next.tok == token::Eof {
Expand All @@ -202,7 +218,7 @@ impl<'a> Classifier<'a> {
fn write_token<W: Writer>(&mut self,
out: &mut W,
tas: TokenAndSpan)
-> io::Result<()> {
-> Result<(), HighlightError> {
let klass = match tas.tok {
token::Shebang(s) => {
out.string(Escape(&s.as_str()), Class::None)?;
Expand Down Expand Up @@ -341,7 +357,9 @@ impl<'a> Classifier<'a> {

// Anything that didn't return above is the simple case where we the
// class just spans a single token, so we can use the `string` method.
out.string(Escape(&self.snip(tas.sp)), klass)
out.string(Escape(&self.snip(tas.sp)), klass)?;

Ok(())
}

// Helper function to get a snippet from the source_map.
Expand Down
109 changes: 109 additions & 0 deletions src/librustdoc/html/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,115 @@ pub fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
links
}

#[derive(Debug)]
crate struct RustCodeBlock {
/// The range in the markdown that the code block occupies. Note that this includes the fences
/// for fenced code blocks.
pub range: Range<usize>,
/// The range in the markdown that the code within the code block occupies.
pub code: Range<usize>,
pub is_fenced: bool,
pub syntax: Option<String>,
}

/// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or
/// untagged (and assumed to be rust).
crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {
let mut code_blocks = vec![];

if md.is_empty() {
return code_blocks;
}

let mut opts = Options::empty();
opts.insert(OPTION_ENABLE_TABLES);
opts.insert(OPTION_ENABLE_FOOTNOTES);
let mut p = Parser::new_ext(md, opts);

let mut code_block_start = 0;
let mut code_start = 0;
let mut is_fenced = false;
let mut previous_offset = 0;
let mut in_rust_code_block = false;
while let Some(event) = p.next() {
let offset = p.get_offset();

match event {
Event::Start(Tag::CodeBlock(syntax)) => {
let lang_string = if syntax.is_empty() {
LangString::all_false()
} else {
LangString::parse(&*syntax, ErrorCodes::Yes)
};

if lang_string.rust {
in_rust_code_block = true;

code_start = offset;
code_block_start = match md[previous_offset..offset].find("```") {
Some(fence_idx) => {
is_fenced = true;
previous_offset + fence_idx
}
None => offset,
};
}
}
Event::End(Tag::CodeBlock(syntax)) if in_rust_code_block => {
in_rust_code_block = false;

let code_block_end = if is_fenced {
let fence_str = &md[previous_offset..offset]
.chars()
.rev()
.collect::<String>();
fence_str
.find("```")
.map(|fence_idx| offset - fence_idx)
.unwrap_or_else(|| offset)
} else if md
.as_bytes()
.get(offset)
.map(|b| *b == b'\n')
.unwrap_or_default()
{
offset - 1
} else {
offset
};

let code_end = if is_fenced {
previous_offset
} else {
code_block_end
};

code_blocks.push(RustCodeBlock {
is_fenced,
range: Range {
start: code_block_start,
end: code_block_end,
},
code: Range {
start: code_start,
end: code_end,
},
syntax: if !syntax.is_empty() {
Some(syntax.into_owned())
} else {
None
},
});
}
_ => (),
}

previous_offset = offset;
}

code_blocks
}

#[derive(Clone, Default, Debug)]
pub struct IdMap {
map: FxHashMap<String, usize>,
Expand Down
1 change: 1 addition & 0 deletions src/librustdoc/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
html_root_url = "https://doc.rust-lang.org/nightly/",
html_playground_url = "https://play.rust-lang.org/")]

#![feature(bind_by_move_pattern_guards)]
#![feature(rustc_private)]
#![feature(box_patterns)]
#![feature(box_syntax)]
Expand Down
109 changes: 109 additions & 0 deletions src/librustdoc/passes/check_code_block_syntax.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
use errors::Applicability;
use syntax::parse::lexer::{TokenAndSpan, StringReader as Lexer};
use syntax::parse::{ParseSess, token};
use syntax::source_map::FilePathMapping;
use syntax_pos::FileName;

use clean;
use core::DocContext;
use fold::DocFolder;
use html::markdown::{self, RustCodeBlock};
use passes::Pass;

pub const CHECK_CODE_BLOCK_SYNTAX: Pass =
Pass::early("check-code-block-syntax", check_code_block_syntax,
"validates syntax inside Rust code blocks");

pub fn check_code_block_syntax(krate: clean::Crate, cx: &DocContext) -> clean::Crate {
SyntaxChecker { cx }.fold_crate(krate)
}

struct SyntaxChecker<'a, 'tcx: 'a, 'rcx: 'a> {
cx: &'a DocContext<'a, 'tcx, 'rcx>,
}

impl<'a, 'tcx, 'rcx> SyntaxChecker<'a, 'tcx, 'rcx> {
fn check_rust_syntax(&self, item: &clean::Item, dox: &str, code_block: RustCodeBlock) {
let sess = ParseSess::new(FilePathMapping::empty());
let source_file = sess.source_map().new_source_file(
FileName::Custom(String::from("doctest")),
dox[code_block.code].to_owned(),
);

let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| {
while let Ok(TokenAndSpan { tok, .. }) = lexer.try_next_token() {
if tok == token::Eof {
break;
}
}

let errors = lexer.buffer_fatal_errors();

if !errors.is_empty() {
Err(errors)
} else {
Ok(())
}
});

if let Err(errors) = errors {
let mut diag = if let Some(sp) =
super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
{
let mut diag = self
.cx
.sess()
.struct_span_warn(sp, "could not parse code block as Rust code");

for mut err in errors {
diag.note(&format!("error from rustc: {}", err.message()));
err.cancel();
}

if code_block.syntax.is_none() && code_block.is_fenced {
let sp = sp.from_inner_byte_pos(0, 3);
diag.span_suggestion_with_applicability(
sp,
"mark blocks that do not contain Rust code as text",
String::from("```text"),
Applicability::MachineApplicable,
);
}

diag
} else {
// We couldn't calculate the span of the markdown block that had the error, so our
// diagnostics are going to be a bit lacking.
let mut diag = self.cx.sess().struct_span_warn(
super::span_of_attrs(&item.attrs),
"doc comment contains an invalid Rust code block",
);

for mut err in errors {
// Don't bother reporting the error, because we can't show where it happened.
err.cancel();
}

if code_block.syntax.is_none() && code_block.is_fenced {
diag.help("mark blocks that do not contain Rust code as text: ```text");
}

diag
};

diag.emit();
}
}
}

impl<'a, 'tcx, 'rcx> DocFolder for SyntaxChecker<'a, 'tcx, 'rcx> {
fn fold_item(&mut self, item: clean::Item) -> Option<clean::Item> {
if let Some(dox) = &item.attrs.collapsed_doc_value() {
for code_block in markdown::rust_code_blocks(&dox) {
self.check_rust_syntax(&item, &dox, code_block);
}
}

self.fold_item_recur(item)
}
}
Loading