Skip to content

Commit

Permalink
Rollup merge of rust-lang#134253 - nnethercote:overhaul-keywords, r=p…
Browse files Browse the repository at this point in the history
…etrochenkov

Overhaul keyword handling

The compiler's list of keywords has some problems.
- It contains several items that aren't keywords.
- The order isn't quite right in a couple of places.
- Some of the names of predicates relating to keywords are confusing.
- rustdoc and rustfmt have their own (incorrect) versions of the keyword list.
- `AllKeywords` is unnecessarily complex.

r? ``@jieyouxu``
  • Loading branch information
jieyouxu authored Dec 18, 2024
2 parents 78e269b + 6de550c commit e552264
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 121 deletions.
8 changes: 7 additions & 1 deletion compiler/rustc_ast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,8 @@ impl Token {
self.is_non_raw_ident_where(|id| id.name == kw)
}

/// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this token is an identifier equal to `kw` ignoring the case.
/// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this
/// token is an identifier equal to `kw` ignoring the case.
pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool {
self.is_keyword(kw)
|| (case == Case::Insensitive
Expand All @@ -916,6 +917,11 @@ impl Token {
self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
}

/// Don't use this unless you're doing something very loose and heuristic-y.
pub fn is_any_keyword(&self) -> bool {
self.is_non_raw_ident_where(Ident::is_any_keyword)
}

/// Returns true for reserved identifiers used internally for elided lifetimes,
/// unnamed method parameters, crate root module, error recovery etc.
pub fn is_special_ident(&self) -> bool {
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_parse/src/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use rustc_errors::{
use rustc_session::errors::ExprParenthesesNeeded;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::source_map::Spanned;
use rustc_span::symbol::AllKeywords;
use rustc_span::symbol::used_keywords;
use rustc_span::{BytePos, DUMMY_SP, Ident, Span, SpanSnippetError, Symbol, kw, sym};
use thin_vec::{ThinVec, thin_vec};
use tracing::{debug, trace};
Expand Down Expand Up @@ -811,12 +811,12 @@ impl<'a> Parser<'a> {
// so that it gets generated only when the diagnostic needs it.
// Also, it is unlikely that this list is generated multiple times because the
// parser halts after execution hits this path.
let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition());
let all_keywords = used_keywords(|| prev_ident.span.edition());

// Otherwise, check the previous token with all the keywords as possible candidates.
// This handles code like `Struct Human;` and `While a < b {}`.
// We check the previous token only when the current token is an identifier to avoid false
// positives like suggesting keyword `for` for `extern crate foo {}`.
// We check the previous token only when the current token is an identifier to avoid
// false positives like suggesting keyword `for` for `extern crate foo {}`.
if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) {
err.subdiagnostic(misspelled_kw);
// We don't want other suggestions to be added as they are most likely meaningless
Expand Down
85 changes: 44 additions & 41 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,26 @@ mod tests;

// The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`.
symbols! {
// If you modify this list, adjust `is_special`, `is_used_keyword`/`is_unused_keyword`
// and `AllKeywords`.
// This list includes things that are definitely keywords (e.g. `if`),
// a few things that are definitely not keywords (e.g. the empty symbol,
// `{{root}}`) and things where there is disagreement between people and/or
// documents (such as the Rust Reference) about whether it is a keyword
// (e.g. `_`).
//
// If you modify this list, adjust any relevant `Symbol::{is,can_be}_*` predicates and
// `used_keywords`.
// But this should rarely be necessary if the keywords are kept in alphabetic order.
Keywords {
// Special reserved identifiers used internally for elided lifetimes,
// unnamed method parameters, crate root module, error recovery etc.
// Matching predicates: `is_any_keyword`, `is_special`/`is_reserved`
Empty: "",
PathRoot: "{{root}}",
DollarCrate: "$crate",
Underscore: "_",

// Keywords that are used in stable Rust.
// Matching predicates: `is_any_keyword`, `is_used_keyword_always`/`is_reserved`
As: "as",
Break: "break",
Const: "const",
Expand Down Expand Up @@ -69,6 +77,7 @@ symbols! {
While: "while",

// Keywords that are used in unstable Rust or reserved for future use.
// Matching predicates: `is_any_keyword`, `is_unused_keyword_always`/`is_reserved`
Abstract: "abstract",
Become: "become",
Box: "box",
Expand All @@ -83,23 +92,29 @@ symbols! {
Yield: "yield",

// Edition-specific keywords that are used in stable Rust.
// Matching predicates: `is_any_keyword`, `is_used_keyword_conditional`/`is_reserved` (if
// the edition suffices)
Async: "async", // >= 2018 Edition only
Await: "await", // >= 2018 Edition only
Dyn: "dyn", // >= 2018 Edition only

// Edition-specific keywords that are used in unstable Rust or reserved for future use.
// Matching predicates: `is_any_keyword`, `is_unused_keyword_conditional`/`is_reserved` (if
// the edition suffices)
Gen: "gen", // >= 2024 Edition only
Try: "try", // >= 2018 Edition only

// Special lifetime names
// "Lifetime keywords": regular keywords with a leading `'`.
// Matching predicates: `is_any_keyword`
UnderscoreLifetime: "'_",
StaticLifetime: "'static",

// Weak keywords, have special meaning only in specific contexts.
// Matching predicates: `is_any_keyword`
Auto: "auto",
Builtin: "builtin",
Catch: "catch",
Default: "default",
Gen: "gen",
MacroRules: "macro_rules",
Raw: "raw",
Reuse: "reuse",
Expand Down Expand Up @@ -2589,6 +2604,11 @@ pub mod sym {
}

impl Symbol {
/// Don't use this unless you're doing something very loose and heuristic-y.
pub fn is_any_keyword(self) -> bool {
self >= kw::As && self <= kw::Yeet
}

fn is_special(self) -> bool {
self <= kw::Underscore
}
Expand All @@ -2606,8 +2626,8 @@ impl Symbol {
}

fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
self == kw::Try && edition().at_least_rust_2018()
|| self == kw::Gen && edition().at_least_rust_2024()
self == kw::Gen && edition().at_least_rust_2024()
|| self == kw::Try && edition().at_least_rust_2018()
}

pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
Expand Down Expand Up @@ -2645,6 +2665,11 @@ impl Symbol {
}

impl Ident {
/// Don't use this unless you're doing something very loose and heuristic-y.
pub fn is_any_keyword(self) -> bool {
self.name.is_any_keyword()
}

/// Returns `true` for reserved identifiers used internally for elided lifetimes,
/// unnamed method parameters, crate root module, error recovery etc.
pub fn is_special(self) -> bool {
Expand Down Expand Up @@ -2683,41 +2708,19 @@ impl Ident {
}
}

/// An iterator over all the keywords in Rust.
#[derive(Copy, Clone)]
pub struct AllKeywords {
curr_idx: u32,
end_idx: u32,
}

impl AllKeywords {
/// Initialize a new iterator over all the keywords.
///
/// *Note:* Please update this if a new keyword is added beyond the current
/// range.
pub fn new() -> Self {
AllKeywords { curr_idx: kw::Empty.as_u32(), end_idx: kw::Yeet.as_u32() }
}

/// Collect all the keywords in a given edition into a vector.
pub fn collect_used(&self, edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
self.filter(|&keyword| {
keyword.is_used_keyword_always() || keyword.is_used_keyword_conditional(edition)
/// Collect all the keywords in a given edition into a vector.
///
/// *Note:* Please update this if a new keyword is added beyond the current
/// range.
pub fn used_keywords(edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
(kw::Empty.as_u32()..kw::Yeet.as_u32())
.filter_map(|kw| {
let kw = Symbol::new(kw);
if kw.is_used_keyword_always() || kw.is_used_keyword_conditional(edition) {
Some(kw)
} else {
None
}
})
.collect()
}
}

impl Iterator for AllKeywords {
type Item = Symbol;

fn next(&mut self) -> Option<Self::Item> {
if self.curr_idx <= self.end_idx {
let keyword = Symbol::new(self.curr_idx);
self.curr_idx += 1;
Some(keyword)
} else {
None
}
}
}
87 changes: 12 additions & 75 deletions src/tools/rustfmt/src/parse/macros/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ use rustc_ast::{ast, ptr};
use rustc_parse::MACRO_ARGUMENTS;
use rustc_parse::parser::{ForceCollect, Parser, Recovery};
use rustc_session::parse::ParseSess;
use rustc_span::Symbol;
use rustc_span::symbol::{self, kw};
use rustc_span::symbol;

use crate::macros::MacroArg;
use crate::rewrite::RewriteContext;
Expand Down Expand Up @@ -82,18 +81,18 @@ pub(crate) struct ParsedMacroArgs {
}

fn check_keyword<'a, 'b: 'a>(parser: &'a mut Parser<'b>) -> Option<MacroArg> {
for &keyword in RUST_KW.iter() {
if parser.token.is_keyword(keyword)
&& parser.look_ahead(1, |t| *t == TokenKind::Eof || *t == TokenKind::Comma)
{
parser.bump();
return Some(MacroArg::Keyword(
symbol::Ident::with_dummy_span(keyword),
parser.prev_token.span,
));
}
if parser.token.is_any_keyword()
&& parser.look_ahead(1, |t| *t == TokenKind::Eof || *t == TokenKind::Comma)
{
let keyword = parser.token.ident().unwrap().0.name;
parser.bump();
Some(MacroArg::Keyword(
symbol::Ident::with_dummy_span(keyword),
parser.prev_token.span,
))
} else {
None
}
None
}

pub(crate) fn parse_macro_args(
Expand Down Expand Up @@ -169,65 +168,3 @@ pub(crate) fn parse_expr(
let mut parser = build_parser(context, tokens);
parser.parse_expr().ok()
}

const RUST_KW: [Symbol; 59] = [
kw::PathRoot,
kw::DollarCrate,
kw::Underscore,
kw::As,
kw::Box,
kw::Break,
kw::Const,
kw::Continue,
kw::Crate,
kw::Else,
kw::Enum,
kw::Extern,
kw::False,
kw::Fn,
kw::For,
kw::If,
kw::Impl,
kw::In,
kw::Let,
kw::Loop,
kw::Match,
kw::Mod,
kw::Move,
kw::Mut,
kw::Pub,
kw::Ref,
kw::Return,
kw::SelfLower,
kw::SelfUpper,
kw::Static,
kw::Struct,
kw::Super,
kw::Trait,
kw::True,
kw::Type,
kw::Unsafe,
kw::Use,
kw::Where,
kw::While,
kw::Abstract,
kw::Become,
kw::Do,
kw::Final,
kw::Macro,
kw::Override,
kw::Priv,
kw::Typeof,
kw::Unsized,
kw::Virtual,
kw::Yield,
kw::Dyn,
kw::Async,
kw::Try,
kw::UnderscoreLifetime,
kw::StaticLifetime,
kw::Auto,
kw::Catch,
kw::Default,
kw::Union,
];

0 comments on commit e552264

Please sign in to comment.