From 64abe8be33326643bcc32e25e4c6aaf3555e145b Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 13 Dec 2024 20:54:36 +1100 Subject: [PATCH 1/4] Simplify `AllKeywords`. It's a verbose reinvention of a range type, and can be cut down a lot. --- .../rustc_parse/src/parser/diagnostics.rs | 4 +- compiler/rustc_span/src/symbol.rs | 52 ++++++------------- 2 files changed, 17 insertions(+), 39 deletions(-) diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs index d1a725e729aa2..9b335df6c0f3f 100644 --- a/compiler/rustc_parse/src/parser/diagnostics.rs +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -22,7 +22,7 @@ use rustc_errors::{ use rustc_session::errors::ExprParenthesesNeeded; use rustc_span::edit_distance::find_best_match_for_name; use rustc_span::source_map::Spanned; -use rustc_span::symbol::AllKeywords; +use rustc_span::symbol::used_keywords; use rustc_span::{BytePos, DUMMY_SP, Ident, Span, SpanSnippetError, Symbol, kw, sym}; use thin_vec::{ThinVec, thin_vec}; use tracing::{debug, trace}; @@ -811,7 +811,7 @@ impl<'a> Parser<'a> { // so that it gets generated only when the diagnostic needs it. // Also, it is unlikely that this list is generated multiple times because the // parser halts after execution hits this path. - let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition()); + let all_keywords = used_keywords(|| prev_ident.span.edition()); // Otherwise, check the previous token with all the keywords as possible candidates. // This handles code like `Struct Human;` and `While a < b {}`. diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 7d99ca5a31e2d..64ee9007df089 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -20,8 +20,8 @@ mod tests; // The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`. symbols! { - // If you modify this list, adjust `is_special`, `is_used_keyword`/`is_unused_keyword` - // and `AllKeywords`. + // If you modify this list, adjust any relevant `Symbol::{is,can_be}_*` functions and + // `used_keywords`. // But this should rarely be necessary if the keywords are kept in alphabetic order. Keywords { // Special reserved identifiers used internally for elided lifetimes, @@ -2683,41 +2683,19 @@ impl Ident { } } -/// An iterator over all the keywords in Rust. -#[derive(Copy, Clone)] -pub struct AllKeywords { - curr_idx: u32, - end_idx: u32, -} - -impl AllKeywords { - /// Initialize a new iterator over all the keywords. - /// - /// *Note:* Please update this if a new keyword is added beyond the current - /// range. - pub fn new() -> Self { - AllKeywords { curr_idx: kw::Empty.as_u32(), end_idx: kw::Yeet.as_u32() } - } - - /// Collect all the keywords in a given edition into a vector. - pub fn collect_used(&self, edition: impl Copy + FnOnce() -> Edition) -> Vec { - self.filter(|&keyword| { - keyword.is_used_keyword_always() || keyword.is_used_keyword_conditional(edition) +/// Collect all the keywords in a given edition into a vector. +/// +/// *Note:* Please update this if a new keyword is added beyond the current +/// range. +pub fn used_keywords(edition: impl Copy + FnOnce() -> Edition) -> Vec { + (kw::Empty.as_u32()..kw::Yeet.as_u32()) + .filter_map(|kw| { + let kw = Symbol::new(kw); + if kw.is_used_keyword_always() || kw.is_used_keyword_conditional(edition) { + Some(kw) + } else { + None + } }) .collect() - } -} - -impl Iterator for AllKeywords { - type Item = Symbol; - - fn next(&mut self) -> Option { - if self.curr_idx <= self.end_idx { - let keyword = Symbol::new(self.curr_idx); - self.curr_idx += 1; - Some(keyword) - } else { - None - } - } } From 1564318482e80152d43150fdfc3ade2c868f2677 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 13 Dec 2024 21:07:58 +1100 Subject: [PATCH 2/4] Only have one source of truth for keywords. `rustc_symbol` is the source of truth for keywords. rustdoc has its own implicit definition of keywords, via the `is_doc_keyword`. It (presumably) intends to include all keywords, but it omits `yeet`. rustfmt has its own explicit list of Rust keywords. It also (presumably) intends to include all keywords, but it omits `await`, `builtin`, `gen`, `macro_rules`, `raw`, `reuse`, `safe`, and `yeet`. Also, it does linear searches through this list, which is inefficient. This commit fixes all of the above problems by introducing a new predicate `is_any_keyword` in rustc and using it in rustdoc and rustfmt. It documents that it's not the right predicate in most cases. --- compiler/rustc_ast/src/token.rs | 8 +- .../rustc_parse/src/parser/diagnostics.rs | 4 +- compiler/rustc_span/src/symbol.rs | 10 +++ src/tools/rustfmt/src/parse/macros/mod.rs | 87 +++---------------- 4 files changed, 31 insertions(+), 78 deletions(-) diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index ab82f18133e78..f639e785bc4f4 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -903,7 +903,8 @@ impl Token { self.is_non_raw_ident_where(|id| id.name == kw) } - /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this token is an identifier equal to `kw` ignoring the case. + /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this + /// token is an identifier equal to `kw` ignoring the case. pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool { self.is_keyword(kw) || (case == Case::Insensitive @@ -916,6 +917,11 @@ impl Token { self.is_non_raw_ident_where(Ident::is_path_segment_keyword) } + /// Don't use this unless you're doing something very loose and heuristic-y. + pub fn is_any_keyword(&self) -> bool { + self.is_non_raw_ident_where(Ident::is_any_keyword) + } + /// Returns true for reserved identifiers used internally for elided lifetimes, /// unnamed method parameters, crate root module, error recovery etc. pub fn is_special_ident(&self) -> bool { diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs index 9b335df6c0f3f..8417701ac0cd5 100644 --- a/compiler/rustc_parse/src/parser/diagnostics.rs +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -815,8 +815,8 @@ impl<'a> Parser<'a> { // Otherwise, check the previous token with all the keywords as possible candidates. // This handles code like `Struct Human;` and `While a < b {}`. - // We check the previous token only when the current token is an identifier to avoid false - // positives like suggesting keyword `for` for `extern crate foo {}`. + // We check the previous token only when the current token is an identifier to avoid + // false positives like suggesting keyword `for` for `extern crate foo {}`. if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) { err.subdiagnostic(misspelled_kw); // We don't want other suggestions to be added as they are most likely meaningless diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 64ee9007df089..d2391f30168f9 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -2589,6 +2589,11 @@ pub mod sym { } impl Symbol { + /// Don't use this unless you're doing something very loose and heuristic-y. + pub fn is_any_keyword(self) -> bool { + self >= kw::As && self <= kw::Yeet + } + fn is_special(self) -> bool { self <= kw::Underscore } @@ -2645,6 +2650,11 @@ impl Symbol { } impl Ident { + /// Don't use this unless you're doing something very loose and heuristic-y. + pub fn is_any_keyword(self) -> bool { + self.name.is_any_keyword() + } + /// Returns `true` for reserved identifiers used internally for elided lifetimes, /// unnamed method parameters, crate root module, error recovery etc. pub fn is_special(self) -> bool { diff --git a/src/tools/rustfmt/src/parse/macros/mod.rs b/src/tools/rustfmt/src/parse/macros/mod.rs index 7271e73db8dd4..680a35f7e03ad 100644 --- a/src/tools/rustfmt/src/parse/macros/mod.rs +++ b/src/tools/rustfmt/src/parse/macros/mod.rs @@ -4,8 +4,7 @@ use rustc_ast::{ast, ptr}; use rustc_parse::MACRO_ARGUMENTS; use rustc_parse::parser::{ForceCollect, Parser, Recovery}; use rustc_session::parse::ParseSess; -use rustc_span::Symbol; -use rustc_span::symbol::{self, kw}; +use rustc_span::symbol; use crate::macros::MacroArg; use crate::rewrite::RewriteContext; @@ -82,18 +81,18 @@ pub(crate) struct ParsedMacroArgs { } fn check_keyword<'a, 'b: 'a>(parser: &'a mut Parser<'b>) -> Option { - for &keyword in RUST_KW.iter() { - if parser.token.is_keyword(keyword) - && parser.look_ahead(1, |t| *t == TokenKind::Eof || *t == TokenKind::Comma) - { - parser.bump(); - return Some(MacroArg::Keyword( - symbol::Ident::with_dummy_span(keyword), - parser.prev_token.span, - )); - } + if parser.token.is_any_keyword() + && parser.look_ahead(1, |t| *t == TokenKind::Eof || *t == TokenKind::Comma) + { + let keyword = parser.token.ident().unwrap().0.name; + parser.bump(); + Some(MacroArg::Keyword( + symbol::Ident::with_dummy_span(keyword), + parser.prev_token.span, + )) + } else { + None } - None } pub(crate) fn parse_macro_args( @@ -169,65 +168,3 @@ pub(crate) fn parse_expr( let mut parser = build_parser(context, tokens); parser.parse_expr().ok() } - -const RUST_KW: [Symbol; 59] = [ - kw::PathRoot, - kw::DollarCrate, - kw::Underscore, - kw::As, - kw::Box, - kw::Break, - kw::Const, - kw::Continue, - kw::Crate, - kw::Else, - kw::Enum, - kw::Extern, - kw::False, - kw::Fn, - kw::For, - kw::If, - kw::Impl, - kw::In, - kw::Let, - kw::Loop, - kw::Match, - kw::Mod, - kw::Move, - kw::Mut, - kw::Pub, - kw::Ref, - kw::Return, - kw::SelfLower, - kw::SelfUpper, - kw::Static, - kw::Struct, - kw::Super, - kw::Trait, - kw::True, - kw::Type, - kw::Unsafe, - kw::Use, - kw::Where, - kw::While, - kw::Abstract, - kw::Become, - kw::Do, - kw::Final, - kw::Macro, - kw::Override, - kw::Priv, - kw::Typeof, - kw::Unsized, - kw::Virtual, - kw::Yield, - kw::Dyn, - kw::Async, - kw::Try, - kw::UnderscoreLifetime, - kw::StaticLifetime, - kw::Auto, - kw::Catch, - kw::Default, - kw::Union, -]; From ed5b91abc8591fbbc4337cc14b484eb3ed5a2955 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 13 Dec 2024 13:43:47 +1100 Subject: [PATCH 3/4] Move `gen` in the keyword list. `gen` is an edition-specific keyword used in unstable Rust, and so belongs with `try` (as `is_unused_keyword_conditional` indicates). Also, the cases in `is_unused_keyword_conditional` should be in alphabetical order, to match the keyword list. These changes don't affect the behaviour of any of the `Symbol::is_*` functions. --- compiler/rustc_span/src/symbol.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index d2391f30168f9..11441854a76c7 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -88,6 +88,7 @@ symbols! { Dyn: "dyn", // >= 2018 Edition only // Edition-specific keywords that are used in unstable Rust or reserved for future use. + Gen: "gen", // >= 2024 Edition only Try: "try", // >= 2018 Edition only // Special lifetime names @@ -99,7 +100,6 @@ symbols! { Builtin: "builtin", Catch: "catch", Default: "default", - Gen: "gen", MacroRules: "macro_rules", Raw: "raw", Reuse: "reuse", @@ -2611,8 +2611,8 @@ impl Symbol { } fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool { - self == kw::Try && edition().at_least_rust_2018() - || self == kw::Gen && edition().at_least_rust_2024() + self == kw::Gen && edition().at_least_rust_2024() + || self == kw::Try && edition().at_least_rust_2018() } pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool { From 6de550cc68d5eb7dc5013987c87859e2f589ac12 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 17 Dec 2024 11:46:23 +1100 Subject: [PATCH 4/4] Improve comments on `Keywords`. In particular, clarify which predicates apply to which keywords. --- compiler/rustc_span/src/symbol.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 11441854a76c7..a7ff0576f92d6 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -20,18 +20,26 @@ mod tests; // The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`. symbols! { - // If you modify this list, adjust any relevant `Symbol::{is,can_be}_*` functions and + // This list includes things that are definitely keywords (e.g. `if`), + // a few things that are definitely not keywords (e.g. the empty symbol, + // `{{root}}`) and things where there is disagreement between people and/or + // documents (such as the Rust Reference) about whether it is a keyword + // (e.g. `_`). + // + // If you modify this list, adjust any relevant `Symbol::{is,can_be}_*` predicates and // `used_keywords`. // But this should rarely be necessary if the keywords are kept in alphabetic order. Keywords { // Special reserved identifiers used internally for elided lifetimes, // unnamed method parameters, crate root module, error recovery etc. + // Matching predicates: `is_any_keyword`, `is_special`/`is_reserved` Empty: "", PathRoot: "{{root}}", DollarCrate: "$crate", Underscore: "_", // Keywords that are used in stable Rust. + // Matching predicates: `is_any_keyword`, `is_used_keyword_always`/`is_reserved` As: "as", Break: "break", Const: "const", @@ -69,6 +77,7 @@ symbols! { While: "while", // Keywords that are used in unstable Rust or reserved for future use. + // Matching predicates: `is_any_keyword`, `is_unused_keyword_always`/`is_reserved` Abstract: "abstract", Become: "become", Box: "box", @@ -83,19 +92,25 @@ symbols! { Yield: "yield", // Edition-specific keywords that are used in stable Rust. + // Matching predicates: `is_any_keyword`, `is_used_keyword_conditional`/`is_reserved` (if + // the edition suffices) Async: "async", // >= 2018 Edition only Await: "await", // >= 2018 Edition only Dyn: "dyn", // >= 2018 Edition only // Edition-specific keywords that are used in unstable Rust or reserved for future use. + // Matching predicates: `is_any_keyword`, `is_unused_keyword_conditional`/`is_reserved` (if + // the edition suffices) Gen: "gen", // >= 2024 Edition only Try: "try", // >= 2018 Edition only - // Special lifetime names + // "Lifetime keywords": regular keywords with a leading `'`. + // Matching predicates: `is_any_keyword` UnderscoreLifetime: "'_", StaticLifetime: "'static", // Weak keywords, have special meaning only in specific contexts. + // Matching predicates: `is_any_keyword` Auto: "auto", Builtin: "builtin", Catch: "catch",