From 840a5769b0879f39e21d973a60ac0b0db172f050 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Fri, 27 Mar 2020 22:02:18 -0400 Subject: [PATCH 1/6] Move raw string tests into the raw directory --- src/test/ui/parser/{ => raw}/raw-byte-string-eof.rs | 0 src/test/ui/parser/{ => raw}/raw-byte-string-eof.stderr | 0 src/test/ui/parser/{ => raw}/raw-byte-string-literals.rs | 0 src/test/ui/parser/{ => raw}/raw-byte-string-literals.stderr | 0 src/test/ui/parser/{ => raw}/raw-str-delim.rs | 0 src/test/ui/parser/{ => raw}/raw-str-delim.stderr | 0 src/test/ui/parser/{ => raw}/raw-str-unbalanced.rs | 0 src/test/ui/parser/{ => raw}/raw-str-unbalanced.stderr | 0 src/test/ui/parser/{ => raw}/raw-str-unterminated.rs | 0 src/test/ui/parser/{ => raw}/raw-str-unterminated.stderr | 0 10 files changed, 0 insertions(+), 0 deletions(-) rename src/test/ui/parser/{ => raw}/raw-byte-string-eof.rs (100%) rename src/test/ui/parser/{ => raw}/raw-byte-string-eof.stderr (100%) rename src/test/ui/parser/{ => raw}/raw-byte-string-literals.rs (100%) rename src/test/ui/parser/{ => raw}/raw-byte-string-literals.stderr (100%) rename src/test/ui/parser/{ => raw}/raw-str-delim.rs (100%) rename src/test/ui/parser/{ => raw}/raw-str-delim.stderr (100%) rename src/test/ui/parser/{ => raw}/raw-str-unbalanced.rs (100%) rename src/test/ui/parser/{ => raw}/raw-str-unbalanced.stderr (100%) rename src/test/ui/parser/{ => raw}/raw-str-unterminated.rs (100%) rename src/test/ui/parser/{ => raw}/raw-str-unterminated.stderr (100%) diff --git a/src/test/ui/parser/raw-byte-string-eof.rs b/src/test/ui/parser/raw/raw-byte-string-eof.rs similarity index 100% rename from src/test/ui/parser/raw-byte-string-eof.rs rename to src/test/ui/parser/raw/raw-byte-string-eof.rs diff --git a/src/test/ui/parser/raw-byte-string-eof.stderr b/src/test/ui/parser/raw/raw-byte-string-eof.stderr similarity index 100% rename from src/test/ui/parser/raw-byte-string-eof.stderr rename to src/test/ui/parser/raw/raw-byte-string-eof.stderr diff --git a/src/test/ui/parser/raw-byte-string-literals.rs b/src/test/ui/parser/raw/raw-byte-string-literals.rs similarity index 100% rename from src/test/ui/parser/raw-byte-string-literals.rs rename to src/test/ui/parser/raw/raw-byte-string-literals.rs diff --git a/src/test/ui/parser/raw-byte-string-literals.stderr b/src/test/ui/parser/raw/raw-byte-string-literals.stderr similarity index 100% rename from src/test/ui/parser/raw-byte-string-literals.stderr rename to src/test/ui/parser/raw/raw-byte-string-literals.stderr diff --git a/src/test/ui/parser/raw-str-delim.rs b/src/test/ui/parser/raw/raw-str-delim.rs similarity index 100% rename from src/test/ui/parser/raw-str-delim.rs rename to src/test/ui/parser/raw/raw-str-delim.rs diff --git a/src/test/ui/parser/raw-str-delim.stderr b/src/test/ui/parser/raw/raw-str-delim.stderr similarity index 100% rename from src/test/ui/parser/raw-str-delim.stderr rename to src/test/ui/parser/raw/raw-str-delim.stderr diff --git a/src/test/ui/parser/raw-str-unbalanced.rs b/src/test/ui/parser/raw/raw-str-unbalanced.rs similarity index 100% rename from src/test/ui/parser/raw-str-unbalanced.rs rename to src/test/ui/parser/raw/raw-str-unbalanced.rs diff --git a/src/test/ui/parser/raw-str-unbalanced.stderr b/src/test/ui/parser/raw/raw-str-unbalanced.stderr similarity index 100% rename from src/test/ui/parser/raw-str-unbalanced.stderr rename to src/test/ui/parser/raw/raw-str-unbalanced.stderr diff --git a/src/test/ui/parser/raw-str-unterminated.rs b/src/test/ui/parser/raw/raw-str-unterminated.rs similarity index 100% rename from src/test/ui/parser/raw-str-unterminated.rs rename to src/test/ui/parser/raw/raw-str-unterminated.rs diff --git a/src/test/ui/parser/raw-str-unterminated.stderr b/src/test/ui/parser/raw/raw-str-unterminated.stderr similarity index 100% rename from src/test/ui/parser/raw-str-unterminated.stderr rename to src/test/ui/parser/raw/raw-str-unterminated.stderr From 629e97a5a02edb3d8dc63c5157962c093217d441 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Sat, 28 Mar 2020 01:46:20 -0400 Subject: [PATCH 2/6] Improve error messages for raw strings (#60762) This diff improves error messages around raw strings in a few ways: - Catch extra trailing `#` in the parser. This can't be handled in the lexer because we could be in a macro that actually expects another # (see test) - Refactor & unify error handling in the lexer between ByteStrings and RawByteStrings - Detect potentially intended terminators (longest sequence of "#*" is suggested) --- src/librustc_lexer/src/cursor.rs | 2 +- src/librustc_lexer/src/lib.rs | 131 +++++++++++++++--- src/librustc_lexer/src/tests.rs | 119 ++++++++++++++++ src/librustc_parse/lexer/mod.rs | 94 ++++++++----- src/librustc_parse/parser/diagnostics.rs | 31 ++++- .../ui/parser/raw/raw-byte-string-eof.stderr | 4 +- .../ui/parser/raw/raw-str-in-macro-call.rs | 14 ++ src/test/ui/parser/raw/raw-str-unbalanced.rs | 2 +- .../ui/parser/raw/raw-str-unbalanced.stderr | 6 +- src/test/ui/parser/raw/raw_string.stderr | 4 +- 10 files changed, 344 insertions(+), 63 deletions(-) create mode 100644 src/librustc_lexer/src/tests.rs create mode 100644 src/test/ui/parser/raw/raw-str-in-macro-call.rs diff --git a/src/librustc_lexer/src/cursor.rs b/src/librustc_lexer/src/cursor.rs index ed0911379c4b3..13d0b07d98bae 100644 --- a/src/librustc_lexer/src/cursor.rs +++ b/src/librustc_lexer/src/cursor.rs @@ -41,7 +41,7 @@ impl<'a> Cursor<'a> { /// If requested position doesn't exist, `EOF_CHAR` is returned. /// However, getting `EOF_CHAR` doesn't always mean actual end of file, /// it should be checked with `is_eof` method. - fn nth_char(&self, n: usize) -> char { + pub(crate) fn nth_char(&self, n: usize) -> char { self.chars().nth(n).unwrap_or(EOF_CHAR) } diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs index d3ac58a49c8d5..70df6d210f4a1 100644 --- a/src/librustc_lexer/src/lib.rs +++ b/src/librustc_lexer/src/lib.rs @@ -17,9 +17,13 @@ mod cursor; pub mod unescape; +#[cfg(test)] +mod tests; + use self::LiteralKind::*; use self::TokenKind::*; use crate::cursor::{Cursor, EOF_CHAR}; +use std::convert::TryInto; /// Parsed token. /// It doesn't contain information about data that has been parsed, @@ -132,9 +136,65 @@ pub enum LiteralKind { /// "b"abc"", "b"abc" ByteStr { terminated: bool }, /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a" - RawStr { n_hashes: usize, started: bool, terminated: bool }, + RawStr(UnvalidatedRawStr), /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a" - RawByteStr { n_hashes: usize, started: bool, terminated: bool }, + RawByteStr(UnvalidatedRawStr), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct UnvalidatedRawStr { + valid_start: bool, + n_start_hashes: usize, + n_end_hashes: usize, + possible_terminator_offset: Option, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum LexRawStrError { + /// Non # characters between `r` and `"` eg. `r#~"..` + InvalidStarter, + /// The string was never terminated. `possible_terminator_offset` is the best guess of where they + /// may have intended to terminate it. + NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option }, + /// More than 65536 # signs + TooManyDelimiters, +} + +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +pub struct ValidatedRawStr { + n_hashes: u16, +} + +impl ValidatedRawStr { + pub fn num_hashes(&self) -> u16 { + self.n_hashes + } +} + +impl UnvalidatedRawStr { + pub fn started(&self) -> bool { + self.valid_start + } + + pub fn validate(self) -> Result { + if !self.valid_start { + return Err(LexRawStrError::InvalidStarter); + } + + let n_start_safe: u16 = + self.n_start_hashes.try_into().map_err(|_| LexRawStrError::TooManyDelimiters)?; + match (self.n_start_hashes, self.n_end_hashes) { + (n_start, n_end) if n_start > n_end => Err(LexRawStrError::NoTerminator { + expected: n_start, + found: self.n_end_hashes, + possible_terminator_offset: self.possible_terminator_offset, + }), + (n_start, n_end) => { + debug_assert_eq!(n_start, n_end); + Ok(ValidatedRawStr { n_hashes: n_start_safe }) + } + } + } } /// Base of numeric literal encoding according to its prefix. @@ -209,7 +269,7 @@ pub fn is_whitespace(c: char) -> bool { // Dedicated whitespace characters from Unicode | '\u{2028}' // LINE SEPARATOR | '\u{2029}' // PARAGRAPH SEPARATOR - => true, + => true, _ => false, } } @@ -258,12 +318,12 @@ impl Cursor<'_> { 'r' => match (self.first(), self.second()) { ('#', c1) if is_id_start(c1) => self.raw_ident(), ('#', _) | ('"', _) => { - let (n_hashes, started, terminated) = self.raw_double_quoted_string(); + let raw_str_i = self.raw_double_quoted_string(1); let suffix_start = self.len_consumed(); - if terminated { + if raw_str_i.n_end_hashes == raw_str_i.n_start_hashes { self.eat_literal_suffix(); } - let kind = RawStr { n_hashes, started, terminated }; + let kind = RawStr(raw_str_i); Literal { kind, suffix_start } } _ => self.ident(), @@ -293,12 +353,14 @@ impl Cursor<'_> { } ('r', '"') | ('r', '#') => { self.bump(); - let (n_hashes, started, terminated) = self.raw_double_quoted_string(); + let raw_str_i = self.raw_double_quoted_string(2); let suffix_start = self.len_consumed(); + let terminated = raw_str_i.n_start_hashes == raw_str_i.n_end_hashes; if terminated { self.eat_literal_suffix(); } - let kind = RawByteStr { n_hashes, started, terminated }; + + let kind = RawByteStr(raw_str_i); Literal { kind, suffix_start } } _ => self.ident(), @@ -594,29 +656,41 @@ impl Cursor<'_> { false } - /// Eats the double-quoted string and returns a tuple of - /// (amount of the '#' symbols, raw string started, raw string terminated) - fn raw_double_quoted_string(&mut self) -> (usize, bool, bool) { + /// Eats the double-quoted string an UnvalidatedRawStr + fn raw_double_quoted_string(&mut self, prefix_len: usize) -> UnvalidatedRawStr { debug_assert!(self.prev() == 'r'); - let mut started: bool = false; - let mut finished: bool = false; + let mut valid_start: bool = false; + let start_pos = self.len_consumed(); + let (mut possible_terminator_offset, mut max_hashes) = (None, 0); // Count opening '#' symbols. - let n_hashes = self.eat_while(|c| c == '#'); + let n_start_hashes = self.eat_while(|c| c == '#'); // Check that string is started. match self.bump() { - Some('"') => started = true, - _ => return (n_hashes, started, finished), + Some('"') => valid_start = true, + _ => { + return UnvalidatedRawStr { + valid_start, + n_start_hashes, + n_end_hashes: 0, + possible_terminator_offset, + }; + } } // Skip the string contents and on each '#' character met, check if this is // a raw string termination. - while !finished { + loop { self.eat_while(|c| c != '"'); if self.is_eof() { - return (n_hashes, started, finished); + return UnvalidatedRawStr { + valid_start, + n_start_hashes, + n_end_hashes: max_hashes, + possible_terminator_offset, + }; } // Eat closing double quote. @@ -624,7 +698,7 @@ impl Cursor<'_> { // Check that amount of closing '#' symbols // is equal to the amount of opening ones. - let mut hashes_left = n_hashes; + let mut hashes_left = n_start_hashes; let is_closing_hash = |c| { if c == '#' && hashes_left != 0 { hashes_left -= 1; @@ -633,10 +707,23 @@ impl Cursor<'_> { false } }; - finished = self.eat_while(is_closing_hash) == n_hashes; + let n_end_hashes = self.eat_while(is_closing_hash); + + if n_end_hashes == n_start_hashes { + return UnvalidatedRawStr { + valid_start, + n_start_hashes, + n_end_hashes, + possible_terminator_offset: None, + }; + } else if n_end_hashes > 0 && n_end_hashes > max_hashes { + // Keep track of possible terminators to give a hint about where there might be + // a missing terminator + possible_terminator_offset = + Some(self.len_consumed() - start_pos - n_end_hashes + prefix_len); + max_hashes = n_end_hashes; + } } - - (n_hashes, started, finished) } fn eat_decimal_digits(&mut self) -> bool { diff --git a/src/librustc_lexer/src/tests.rs b/src/librustc_lexer/src/tests.rs new file mode 100644 index 0000000000000..ba5897c5d4218 --- /dev/null +++ b/src/librustc_lexer/src/tests.rs @@ -0,0 +1,119 @@ +#[cfg(test)] +mod tests { + use crate::*; + + fn check_raw_str( + s: &str, + expected: UnvalidatedRawStr, + validated: Result, + ) { + let mut cursor = Cursor::new(s); + let tok = cursor.raw_double_quoted_string(0); + assert_eq!(tok, expected); + assert_eq!(tok.validate(), validated); + } + + #[test] + fn test_naked_raw_str() { + check_raw_str( + r#""abc""#, + UnvalidatedRawStr { + n_start_hashes: 0, + n_end_hashes: 0, + valid_start: true, + possible_terminator_offset: None, + }, + Ok(ValidatedRawStr { n_hashes: 0 }), + ); + } + + #[test] + fn test_raw_no_start() { + check_raw_str( + r##""abc"#"##, + UnvalidatedRawStr { + n_start_hashes: 0, + n_end_hashes: 0, + valid_start: true, + possible_terminator_offset: None, + }, + Ok(ValidatedRawStr { n_hashes: 0 }), + ); + } + + #[test] + fn test_too_many_terminators() { + // this error is handled in the parser later + check_raw_str( + r###"#"abc"##"###, + UnvalidatedRawStr { + n_start_hashes: 1, + n_end_hashes: 1, + valid_start: true, + possible_terminator_offset: None, + }, + Ok(ValidatedRawStr { n_hashes: 1 }), + ); + } + + #[test] + fn test_unterminated() { + check_raw_str( + r#"#"abc"#, + UnvalidatedRawStr { + n_start_hashes: 1, + n_end_hashes: 0, + valid_start: true, + possible_terminator_offset: None, + }, + Err(LexRawStrError::NoTerminator { + expected: 1, + found: 0, + possible_terminator_offset: None, + }), + ); + check_raw_str( + r###"##"abc"#"###, + UnvalidatedRawStr { + n_start_hashes: 2, + n_end_hashes: 1, + valid_start: true, + possible_terminator_offset: Some(7), + }, + Err(LexRawStrError::NoTerminator { + expected: 2, + found: 1, + possible_terminator_offset: Some(7), + }), + ); + // We're looking for "# not just any # + check_raw_str( + r###"##"abc#"###, + UnvalidatedRawStr { + n_start_hashes: 2, + n_end_hashes: 0, + valid_start: true, + possible_terminator_offset: None, + }, + Err(LexRawStrError::NoTerminator { + expected: 2, + found: 0, + possible_terminator_offset: None, + }), + ) + } + + #[test] + fn test_invalid_start() { + check_raw_str( + r##"#~"abc"#"##, + UnvalidatedRawStr { + n_start_hashes: 1, + n_end_hashes: 0, + valid_start: false, + possible_terminator_offset: None, + }, + Err(LexRawStrError::InvalidStarter), + ); + } +} diff --git a/src/librustc_parse/lexer/mod.rs b/src/librustc_parse/lexer/mod.rs index ac58cbb9e8dae..2f720d95c6d2f 100644 --- a/src/librustc_parse/lexer/mod.rs +++ b/src/librustc_parse/lexer/mod.rs @@ -1,20 +1,20 @@ use rustc_ast::token::{self, Token, TokenKind}; use rustc_ast::util::comments; use rustc_data_structures::sync::Lrc; -use rustc_errors::{error_code, DiagnosticBuilder, FatalError}; -use rustc_lexer::unescape; +use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError}; use rustc_lexer::Base; +use rustc_lexer::{unescape, LexRawStrError, UnvalidatedRawStr, ValidatedRawStr}; use rustc_session::parse::ParseSess; use rustc_span::symbol::{sym, Symbol}; use rustc_span::{BytePos, Pos, Span}; use log::debug; use std::char; -use std::convert::TryInto; mod tokentrees; mod unescape_error_reporting; mod unicode_chars; + use unescape_error_reporting::{emit_unescape_error, push_escaped_char}; #[derive(Clone, Debug)] @@ -376,30 +376,22 @@ impl<'a> StringReader<'a> { let id = self.symbol_from_to(content_start, content_end); (token::ByteStr, id) } - rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => { - if !started { - self.report_non_started_raw_string(start); - } - if !terminated { - self.report_unterminated_raw_string(start, n_hashes) - } - let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); + rustc_lexer::LiteralKind::RawStr(unvalidated_raw_str) => { + let valid_raw_str = self.validate_and_report_errors(start, unvalidated_raw_str); + let n_hashes = valid_raw_str.num_hashes(); let n = u32::from(n_hashes); + let content_start = start + BytePos(2 + n); let content_end = suffix_start - BytePos(1 + n); self.validate_raw_str_escape(content_start, content_end); let id = self.symbol_from_to(content_start, content_end); (token::StrRaw(n_hashes), id) } - rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => { - if !started { - self.report_non_started_raw_string(start); - } - if !terminated { - self.report_unterminated_raw_string(start, n_hashes) - } - let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); + rustc_lexer::LiteralKind::RawByteStr(unvalidated_raw_str) => { + let validated_raw_str = self.validate_and_report_errors(start, unvalidated_raw_str); + let n_hashes = validated_raw_str.num_hashes(); let n = u32::from(n_hashes); + let content_start = start + BytePos(3 + n); let content_end = suffix_start - BytePos(1 + n); self.validate_raw_byte_str_escape(content_start, content_end); @@ -485,6 +477,26 @@ impl<'a> StringReader<'a> { } } + fn validate_and_report_errors( + &self, + start: BytePos, + unvalidated_raw_str: UnvalidatedRawStr, + ) -> ValidatedRawStr { + match unvalidated_raw_str.validate() { + Err(LexRawStrError::InvalidStarter) => self.report_non_started_raw_string(start), + Err(LexRawStrError::NoTerminator { expected, found, possible_terminator_offset }) => { + self.report_unterminated_raw_string( + start, + expected, + possible_terminator_offset, + found, + ) + } + Err(LexRawStrError::TooManyDelimiters) => self.report_too_many_hashes(start), + Ok(valid) => valid, + } + } + fn report_non_started_raw_string(&self, start: BytePos) -> ! { let bad_char = self.str_from(start).chars().last().unwrap(); self.struct_fatal_span_char( @@ -498,38 +510,52 @@ impl<'a> StringReader<'a> { FatalError.raise() } - fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! { + fn report_unterminated_raw_string( + &self, + start: BytePos, + n_hashes: usize, + possible_offset: Option, + found_terminators: usize, + ) -> ! { let mut err = self.sess.span_diagnostic.struct_span_fatal_with_code( self.mk_sp(start, start), "unterminated raw string", error_code!(E0748), ); + err.span_label(self.mk_sp(start, start), "unterminated raw string"); if n_hashes > 0 { err.note(&format!( "this raw string should be terminated with `\"{}`", - "#".repeat(n_hashes as usize) + "#".repeat(n_hashes) )); } + if let Some(possible_offset) = possible_offset { + let span = self.mk_sp( + start + BytePos(possible_offset as u32), + start + BytePos(possible_offset as u32) + BytePos(found_terminators as u32), + ); + err.span_suggestion( + span, + "you might have intended to terminate the string here", + "#".repeat(n_hashes), + Applicability::MaybeIncorrect, + ); + } + err.emit(); FatalError.raise() } - fn restrict_n_hashes(&self, start: BytePos, n_hashes: usize) -> u16 { - match n_hashes.try_into() { - Ok(n_hashes) => n_hashes, - Err(_) => { - self.fatal_span_( - start, - self.pos, - "too many `#` symbols: raw strings may be \ - delimited by up to 65535 `#` symbols", - ) - .raise(); - } - } + fn report_too_many_hashes(&self, start: BytePos) -> ! { + self.fatal_span_( + start, + self.pos, + "too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols", + ) + .raise(); } fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) { diff --git a/src/librustc_parse/parser/diagnostics.rs b/src/librustc_parse/parser/diagnostics.rs index c4546dedfcdd4..7b6840307cb42 100644 --- a/src/librustc_parse/parser/diagnostics.rs +++ b/src/librustc_parse/parser/diagnostics.rs @@ -6,7 +6,7 @@ use rustc_ast::ast::{ }; use rustc_ast::ast::{AttrVec, ItemKind, Mutability, Pat, PatKind, PathSegment, QSelf, Ty, TyKind}; use rustc_ast::ptr::P; -use rustc_ast::token::{self, TokenKind}; +use rustc_ast::token::{self, Lit, LitKind, Token, TokenKind}; use rustc_ast::util::parser::AssocOp; use rustc_ast_pretty::pprust; use rustc_data_structures::fx::FxHashSet; @@ -255,6 +255,10 @@ impl<'a> Parser<'a> { } } + if self.check_too_many_raw_str_terminators(&mut err) { + return Err(err); + } + let sm = self.sess.source_map(); if self.prev_token.span == DUMMY_SP { // Account for macro context where the previous span might not be @@ -282,6 +286,31 @@ impl<'a> Parser<'a> { Err(err) } + fn check_too_many_raw_str_terminators(&mut self, err: &mut DiagnosticBuilder<'_>) -> bool { + let prev_token_raw_str = match self.prev_token { + Token { kind: TokenKind::Literal(Lit { kind: LitKind::StrRaw(n), .. }), .. } => Some(n), + Token { + kind: TokenKind::Literal(Lit { kind: LitKind::ByteStrRaw(n), .. }), .. + } => Some(n), + _ => None, + }; + + if let Some(n_hashes) = prev_token_raw_str { + if self.token.kind == TokenKind::Pound { + err.set_primary_message("too many `#` when terminating raw string"); + err.span_suggestion( + self.token.span, + "Remove the extra `#`", + String::new(), + Applicability::MachineApplicable, + ); + err.note(&format!("The raw string started with {} `#`s", n_hashes)); + return true; + } + } + false + } + pub fn maybe_annotate_with_ascription( &mut self, err: &mut DiagnosticBuilder<'_>, diff --git a/src/test/ui/parser/raw/raw-byte-string-eof.stderr b/src/test/ui/parser/raw/raw-byte-string-eof.stderr index d5f22e2a1a814..81344841c2700 100644 --- a/src/test/ui/parser/raw/raw-byte-string-eof.stderr +++ b/src/test/ui/parser/raw/raw-byte-string-eof.stderr @@ -2,7 +2,9 @@ error[E0748]: unterminated raw string --> $DIR/raw-byte-string-eof.rs:2:5 | LL | br##"a"#; - | ^ unterminated raw string + | ^ - help: you might have intended to terminate the string here: `##` + | | + | unterminated raw string | = note: this raw string should be terminated with `"##` diff --git a/src/test/ui/parser/raw/raw-str-in-macro-call.rs b/src/test/ui/parser/raw/raw-str-in-macro-call.rs new file mode 100644 index 0000000000000..462c2279f5c1c --- /dev/null +++ b/src/test/ui/parser/raw/raw-str-in-macro-call.rs @@ -0,0 +1,14 @@ +// check-pass + +macro_rules! m1 { + ($tt:tt #) => () +} + +macro_rules! m2 { + ($tt:tt) => () +} + +fn main() { + m1!(r#"abc"##); + m2!(r#"abc"#); +} diff --git a/src/test/ui/parser/raw/raw-str-unbalanced.rs b/src/test/ui/parser/raw/raw-str-unbalanced.rs index 5a1d1be11b633..35f118f5ce6ee 100644 --- a/src/test/ui/parser/raw/raw-str-unbalanced.rs +++ b/src/test/ui/parser/raw/raw-str-unbalanced.rs @@ -1,4 +1,4 @@ static s: &'static str = r#" - "## //~ ERROR expected one of `.`, `;`, `?`, or an operator, found `#` + "## //~ too many `#` when terminating raw string ; diff --git a/src/test/ui/parser/raw/raw-str-unbalanced.stderr b/src/test/ui/parser/raw/raw-str-unbalanced.stderr index ddb75722bef9f..891f1d6337cd2 100644 --- a/src/test/ui/parser/raw/raw-str-unbalanced.stderr +++ b/src/test/ui/parser/raw/raw-str-unbalanced.stderr @@ -1,8 +1,10 @@ -error: expected one of `.`, `;`, `?`, or an operator, found `#` +error: too many `#` when terminating raw string --> $DIR/raw-str-unbalanced.rs:3:9 | LL | "## - | ^ expected one of `.`, `;`, `?`, or an operator + | ^ help: Remove the extra `#` + | + = note: The raw string started with 1 `#`s error: aborting due to previous error diff --git a/src/test/ui/parser/raw/raw_string.stderr b/src/test/ui/parser/raw/raw_string.stderr index 0f1d7e4651deb..e91a16bedc46e 100644 --- a/src/test/ui/parser/raw/raw_string.stderr +++ b/src/test/ui/parser/raw/raw_string.stderr @@ -2,7 +2,9 @@ error[E0748]: unterminated raw string --> $DIR/raw_string.rs:2:13 | LL | let x = r##"lol"#; - | ^ unterminated raw string + | ^ - help: you might have intended to terminate the string here: `##` + | | + | unterminated raw string | = note: this raw string should be terminated with `"##` From c15f86b4b35a260b105dc472fc6e3556af8a8db0 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Sun, 29 Mar 2020 11:12:48 -0400 Subject: [PATCH 3/6] Cleanup error messages, improve docstrings --- src/librustc_lexer/src/cursor.rs | 2 +- src/librustc_lexer/src/lib.rs | 47 ++++++++++++------- src/librustc_parse/lexer/mod.rs | 9 ++-- src/librustc_parse/lib.rs | 1 + src/librustc_parse/parser/diagnostics.rs | 11 +++-- .../ui/parser/raw/raw-byte-string-eof.stderr | 2 +- .../ui/parser/raw/raw-str-unbalanced.stderr | 4 +- src/test/ui/parser/raw/raw_string.stderr | 2 +- 8 files changed, 48 insertions(+), 30 deletions(-) diff --git a/src/librustc_lexer/src/cursor.rs b/src/librustc_lexer/src/cursor.rs index 13d0b07d98bae..ed0911379c4b3 100644 --- a/src/librustc_lexer/src/cursor.rs +++ b/src/librustc_lexer/src/cursor.rs @@ -41,7 +41,7 @@ impl<'a> Cursor<'a> { /// If requested position doesn't exist, `EOF_CHAR` is returned. /// However, getting `EOF_CHAR` doesn't always mean actual end of file, /// it should be checked with `is_eof` method. - pub(crate) fn nth_char(&self, n: usize) -> char { + fn nth_char(&self, n: usize) -> char { self.chars().nth(n).unwrap_or(EOF_CHAR) } diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs index 70df6d210f4a1..132607031ce65 100644 --- a/src/librustc_lexer/src/lib.rs +++ b/src/librustc_lexer/src/lib.rs @@ -141,25 +141,41 @@ pub enum LiteralKind { RawByteStr(UnvalidatedRawStr), } +/// Represents something that looks like a raw string, but may have some +/// problems. Use `.validate()` to convert it into something +/// usable. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct UnvalidatedRawStr { + /// The prefix (`r###"`) is valid valid_start: bool, + /// The number of leading `#` n_start_hashes: usize, + /// The number of trailing `#`. `n_end_hashes` <= `n_start_hashes` n_end_hashes: usize, + /// The offset starting at `r` or `br` where the user may have intended to end the string. + /// Currently, it is the longest sequence of pattern `"#+"`. possible_terminator_offset: Option, } +/// Error produced validating a raw string. Represents cases like: +/// - `r##~"abcde"##`: `LexRawStrError::InvalidStarter` +/// - `r###"abcde"##`: `LexRawStrError::NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)` +/// - Too many `#`s (>65536): `TooManyDelimiters` #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum LexRawStrError { - /// Non # characters between `r` and `"` eg. `r#~"..` + /// Non `#` characters exist between `r` and `"` eg. `r#~"..` InvalidStarter, - /// The string was never terminated. `possible_terminator_offset` is the best guess of where they + /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they /// may have intended to terminate it. NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option }, - /// More than 65536 # signs + /// More than 65536 `#`s exist. TooManyDelimiters, } +/// Raw String that contains a valid prefix (`#+"`) and postfix (`"#+`) where +/// there are a matching number of `#` characters in both. Note that this will +/// not consume extra trailing `#` characters: `r###"abcde"####` is lexed as a +/// `ValidatedRawString { n_hashes: 3 }` followed by a `#` token. #[derive(Debug, Eq, PartialEq, Copy, Clone)] pub struct ValidatedRawStr { n_hashes: u16, @@ -172,27 +188,26 @@ impl ValidatedRawStr { } impl UnvalidatedRawStr { - pub fn started(&self) -> bool { - self.valid_start - } - pub fn validate(self) -> Result { if !self.valid_start { return Err(LexRawStrError::InvalidStarter); } + // Only up to 65535 `#`s are allowed in raw strings let n_start_safe: u16 = self.n_start_hashes.try_into().map_err(|_| LexRawStrError::TooManyDelimiters)?; - match (self.n_start_hashes, self.n_end_hashes) { - (n_start, n_end) if n_start > n_end => Err(LexRawStrError::NoTerminator { - expected: n_start, + + if self.n_start_hashes > self.n_end_hashes { + Err(LexRawStrError::NoTerminator { + expected: self.n_start_hashes, found: self.n_end_hashes, possible_terminator_offset: self.possible_terminator_offset, - }), - (n_start, n_end) => { - debug_assert_eq!(n_start, n_end); - Ok(ValidatedRawStr { n_hashes: n_start_safe }) - } + }) + } else { + // Since the lexer should never produce a literal with n_end > n_start, if n_start <= n_end, + // they must be equal. + debug_assert_eq!(self.n_start_hashes, self.n_end_hashes); + Ok(ValidatedRawStr { n_hashes: n_start_safe }) } } } @@ -656,7 +671,7 @@ impl Cursor<'_> { false } - /// Eats the double-quoted string an UnvalidatedRawStr + /// Eats the double-quoted string and returns an `UnvalidatedRawStr`. fn raw_double_quoted_string(&mut self, prefix_len: usize) -> UnvalidatedRawStr { debug_assert!(self.prev() == 'r'); let mut valid_start: bool = false; diff --git a/src/librustc_parse/lexer/mod.rs b/src/librustc_parse/lexer/mod.rs index 2f720d95c6d2f..a367131b3f309 100644 --- a/src/librustc_parse/lexer/mod.rs +++ b/src/librustc_parse/lexer/mod.rs @@ -533,13 +533,12 @@ impl<'a> StringReader<'a> { } if let Some(possible_offset) = possible_offset { - let span = self.mk_sp( - start + BytePos(possible_offset as u32), - start + BytePos(possible_offset as u32) + BytePos(found_terminators as u32), - ); + let lo = start + BytePos(possible_offset as u32); + let hi = lo + BytePos(found_terminators as u32); + let span = self.mk_sp(lo, hi); err.span_suggestion( span, - "you might have intended to terminate the string here", + "consider terminating the string here", "#".repeat(n_hashes), Applicability::MaybeIncorrect, ); diff --git a/src/librustc_parse/lib.rs b/src/librustc_parse/lib.rs index 13fb85db84779..8e2a9513d6b82 100644 --- a/src/librustc_parse/lib.rs +++ b/src/librustc_parse/lib.rs @@ -4,6 +4,7 @@ #![feature(crate_visibility_modifier)] #![feature(bindings_after_at)] #![feature(try_blocks)] +#![feature(or_patterns)] use rustc_ast::ast; use rustc_ast::token::{self, Nonterminal}; diff --git a/src/librustc_parse/parser/diagnostics.rs b/src/librustc_parse/parser/diagnostics.rs index 7b6840307cb42..2fc20e15c5aca 100644 --- a/src/librustc_parse/parser/diagnostics.rs +++ b/src/librustc_parse/parser/diagnostics.rs @@ -288,9 +288,12 @@ impl<'a> Parser<'a> { fn check_too_many_raw_str_terminators(&mut self, err: &mut DiagnosticBuilder<'_>) -> bool { let prev_token_raw_str = match self.prev_token { - Token { kind: TokenKind::Literal(Lit { kind: LitKind::StrRaw(n), .. }), .. } => Some(n), Token { - kind: TokenKind::Literal(Lit { kind: LitKind::ByteStrRaw(n), .. }), .. + kind: + TokenKind::Literal(Lit { + kind: LitKind::StrRaw(n) | LitKind::ByteStrRaw(n), .. + }), + .. } => Some(n), _ => None, }; @@ -300,11 +303,11 @@ impl<'a> Parser<'a> { err.set_primary_message("too many `#` when terminating raw string"); err.span_suggestion( self.token.span, - "Remove the extra `#`", + "remove the extra `#`", String::new(), Applicability::MachineApplicable, ); - err.note(&format!("The raw string started with {} `#`s", n_hashes)); + err.note(&format!("the raw string started with {} `#`s", n_hashes)); return true; } } diff --git a/src/test/ui/parser/raw/raw-byte-string-eof.stderr b/src/test/ui/parser/raw/raw-byte-string-eof.stderr index 81344841c2700..a76668e8051b5 100644 --- a/src/test/ui/parser/raw/raw-byte-string-eof.stderr +++ b/src/test/ui/parser/raw/raw-byte-string-eof.stderr @@ -2,7 +2,7 @@ error[E0748]: unterminated raw string --> $DIR/raw-byte-string-eof.rs:2:5 | LL | br##"a"#; - | ^ - help: you might have intended to terminate the string here: `##` + | ^ - help: consider terminating the string here: `##` | | | unterminated raw string | diff --git a/src/test/ui/parser/raw/raw-str-unbalanced.stderr b/src/test/ui/parser/raw/raw-str-unbalanced.stderr index 891f1d6337cd2..bf8f3a7a5a4bd 100644 --- a/src/test/ui/parser/raw/raw-str-unbalanced.stderr +++ b/src/test/ui/parser/raw/raw-str-unbalanced.stderr @@ -2,9 +2,9 @@ error: too many `#` when terminating raw string --> $DIR/raw-str-unbalanced.rs:3:9 | LL | "## - | ^ help: Remove the extra `#` + | ^ help: remove the extra `#` | - = note: The raw string started with 1 `#`s + = note: the raw string started with 1 `#`s error: aborting due to previous error diff --git a/src/test/ui/parser/raw/raw_string.stderr b/src/test/ui/parser/raw/raw_string.stderr index e91a16bedc46e..cc0eb4927003d 100644 --- a/src/test/ui/parser/raw/raw_string.stderr +++ b/src/test/ui/parser/raw/raw_string.stderr @@ -2,7 +2,7 @@ error[E0748]: unterminated raw string --> $DIR/raw_string.rs:2:13 | LL | let x = r##"lol"#; - | ^ - help: you might have intended to terminate the string here: `##` + | ^ - help: consider terminating the string here: `##` | | | unterminated raw string | From 82b2989ae0dbb1166289a360620e07865135a5e8 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Sun, 29 Mar 2020 11:34:15 -0400 Subject: [PATCH 4/6] More raw string tests --- src/test/ui/parser/raw/raw-string-2.rs | 4 ++++ src/test/ui/parser/raw/raw-string-2.stderr | 11 +++++++++++ .../ui/parser/raw/{raw_string.rs => raw-string.rs} | 0 .../raw/{raw_string.stderr => raw-string.stderr} | 2 +- 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 src/test/ui/parser/raw/raw-string-2.rs create mode 100644 src/test/ui/parser/raw/raw-string-2.stderr rename src/test/ui/parser/raw/{raw_string.rs => raw-string.rs} (100%) rename src/test/ui/parser/raw/{raw_string.stderr => raw-string.stderr} (92%) diff --git a/src/test/ui/parser/raw/raw-string-2.rs b/src/test/ui/parser/raw/raw-string-2.rs new file mode 100644 index 0000000000000..067332d2819bd --- /dev/null +++ b/src/test/ui/parser/raw/raw-string-2.rs @@ -0,0 +1,4 @@ +fn main() { + let x = r###"here's a long string"# "# "##; + //~^ ERROR unterminated raw string +} diff --git a/src/test/ui/parser/raw/raw-string-2.stderr b/src/test/ui/parser/raw/raw-string-2.stderr new file mode 100644 index 0000000000000..8bbac9d7bd0bd --- /dev/null +++ b/src/test/ui/parser/raw/raw-string-2.stderr @@ -0,0 +1,11 @@ +error[E0748]: unterminated raw string + --> $DIR/raw-string-2.rs:2:13 + | +LL | let x = r###"here's a long string"# "# "##; + | ^ unterminated raw string -- help: consider terminating the string here: `###` + | + = note: this raw string should be terminated with `"###` + +error: aborting due to previous error + +For more information about this error, try `rustc --explain E0748`. diff --git a/src/test/ui/parser/raw/raw_string.rs b/src/test/ui/parser/raw/raw-string.rs similarity index 100% rename from src/test/ui/parser/raw/raw_string.rs rename to src/test/ui/parser/raw/raw-string.rs diff --git a/src/test/ui/parser/raw/raw_string.stderr b/src/test/ui/parser/raw/raw-string.stderr similarity index 92% rename from src/test/ui/parser/raw/raw_string.stderr rename to src/test/ui/parser/raw/raw-string.stderr index cc0eb4927003d..b2b853a89e751 100644 --- a/src/test/ui/parser/raw/raw_string.stderr +++ b/src/test/ui/parser/raw/raw-string.stderr @@ -1,5 +1,5 @@ error[E0748]: unterminated raw string - --> $DIR/raw_string.rs:2:13 + --> $DIR/raw-string.rs:2:13 | LL | let x = r##"lol"#; | ^ - help: consider terminating the string here: `##` From bceab25d6c206a7b92716e0c9e9a89b97d131e8e Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Sun, 29 Mar 2020 12:02:28 -0400 Subject: [PATCH 5/6] Cleanup match expression --- src/librustc_parse/parser/diagnostics.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/librustc_parse/parser/diagnostics.rs b/src/librustc_parse/parser/diagnostics.rs index 2fc20e15c5aca..e542588d8b5cc 100644 --- a/src/librustc_parse/parser/diagnostics.rs +++ b/src/librustc_parse/parser/diagnostics.rs @@ -6,7 +6,7 @@ use rustc_ast::ast::{ }; use rustc_ast::ast::{AttrVec, ItemKind, Mutability, Pat, PatKind, PathSegment, QSelf, Ty, TyKind}; use rustc_ast::ptr::P; -use rustc_ast::token::{self, Lit, LitKind, Token, TokenKind}; +use rustc_ast::token::{self, Lit, LitKind, TokenKind}; use rustc_ast::util::parser::AssocOp; use rustc_ast_pretty::pprust; use rustc_data_structures::fx::FxHashSet; @@ -287,14 +287,10 @@ impl<'a> Parser<'a> { } fn check_too_many_raw_str_terminators(&mut self, err: &mut DiagnosticBuilder<'_>) -> bool { - let prev_token_raw_str = match self.prev_token { - Token { - kind: - TokenKind::Literal(Lit { - kind: LitKind::StrRaw(n) | LitKind::ByteStrRaw(n), .. - }), - .. - } => Some(n), + let prev_token_raw_str = match self.prev_token.kind { + TokenKind::Literal(Lit { + kind: LitKind::StrRaw(n) | LitKind::ByteStrRaw(n), .. + }) => Some(n), _ => None, }; @@ -523,7 +519,7 @@ impl<'a> Parser<'a> { .unwrap_or_else(|_| pprust::expr_to_string(&e)) }; err.span_suggestion_verbose( - inner_op.span.shrink_to_hi(), + inner_op.span.shrink_to_hi(), "split the comparison into two", format!(" && {}", expr_to_str(&r1)), Applicability::MaybeIncorrect, @@ -1118,7 +1114,7 @@ impl<'a> Parser<'a> { self.look_ahead(2, |t| t.is_ident()) || self.look_ahead(1, |t| t == &token::ModSep) && (self.look_ahead(2, |t| t.is_ident()) || // `foo:bar::baz` - self.look_ahead(2, |t| t == &token::Lt)) // `foo:bar::` + self.look_ahead(2, |t| t == &token::Lt)) // `foo:bar::` } pub(super) fn recover_seq_parse_error( From 20e21902bb993487c1486f2dd33d3bd65101f00c Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Mon, 30 Mar 2020 12:39:40 -0400 Subject: [PATCH 6/6] Clean up redudant conditions and match exprs --- src/librustc_lexer/src/lib.rs | 2 +- src/librustc_parse/parser/diagnostics.rs | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs index 132607031ce65..fcb7475cc2e89 100644 --- a/src/librustc_lexer/src/lib.rs +++ b/src/librustc_lexer/src/lib.rs @@ -731,7 +731,7 @@ impl Cursor<'_> { n_end_hashes, possible_terminator_offset: None, }; - } else if n_end_hashes > 0 && n_end_hashes > max_hashes { + } else if n_end_hashes > max_hashes { // Keep track of possible terminators to give a hint about where there might be // a missing terminator possible_terminator_offset = diff --git a/src/librustc_parse/parser/diagnostics.rs b/src/librustc_parse/parser/diagnostics.rs index e542588d8b5cc..12b9b68268248 100644 --- a/src/librustc_parse/parser/diagnostics.rs +++ b/src/librustc_parse/parser/diagnostics.rs @@ -287,15 +287,14 @@ impl<'a> Parser<'a> { } fn check_too_many_raw_str_terminators(&mut self, err: &mut DiagnosticBuilder<'_>) -> bool { - let prev_token_raw_str = match self.prev_token.kind { - TokenKind::Literal(Lit { - kind: LitKind::StrRaw(n) | LitKind::ByteStrRaw(n), .. - }) => Some(n), - _ => None, - }; - - if let Some(n_hashes) = prev_token_raw_str { - if self.token.kind == TokenKind::Pound { + match (&self.prev_token.kind, &self.token.kind) { + ( + TokenKind::Literal(Lit { + kind: LitKind::StrRaw(n_hashes) | LitKind::ByteStrRaw(n_hashes), + .. + }), + TokenKind::Pound, + ) => { err.set_primary_message("too many `#` when terminating raw string"); err.span_suggestion( self.token.span, @@ -304,10 +303,10 @@ impl<'a> Parser<'a> { Applicability::MachineApplicable, ); err.note(&format!("the raw string started with {} `#`s", n_hashes)); - return true; + true } + _ => false, } - false } pub fn maybe_annotate_with_ascription(