Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overhaul keyword handling #134253

Merged
merged 4 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion compiler/rustc_ast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,8 @@ impl Token {
self.is_non_raw_ident_where(|id| id.name == kw)
}

/// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this token is an identifier equal to `kw` ignoring the case.
/// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this
/// token is an identifier equal to `kw` ignoring the case.
pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool {
self.is_keyword(kw)
|| (case == Case::Insensitive
Expand All @@ -916,6 +917,11 @@ impl Token {
self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
}

/// Don't use this unless you're doing something very loose and heuristic-y.
pub fn is_any_keyword(&self) -> bool {
self.is_non_raw_ident_where(Ident::is_any_keyword)
}

/// Returns true for reserved identifiers used internally for elided lifetimes,
/// unnamed method parameters, crate root module, error recovery etc.
pub fn is_special_ident(&self) -> bool {
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_parse/src/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use rustc_errors::{
use rustc_session::errors::ExprParenthesesNeeded;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::source_map::Spanned;
use rustc_span::symbol::AllKeywords;
use rustc_span::symbol::used_keywords;
use rustc_span::{BytePos, DUMMY_SP, Ident, Span, SpanSnippetError, Symbol, kw, sym};
use thin_vec::{ThinVec, thin_vec};
use tracing::{debug, trace};
Expand Down Expand Up @@ -811,12 +811,12 @@ impl<'a> Parser<'a> {
// so that it gets generated only when the diagnostic needs it.
// Also, it is unlikely that this list is generated multiple times because the
// parser halts after execution hits this path.
let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition());
let all_keywords = used_keywords(|| prev_ident.span.edition());

// Otherwise, check the previous token with all the keywords as possible candidates.
// This handles code like `Struct Human;` and `While a < b {}`.
// We check the previous token only when the current token is an identifier to avoid false
// positives like suggesting keyword `for` for `extern crate foo {}`.
// We check the previous token only when the current token is an identifier to avoid
// false positives like suggesting keyword `for` for `extern crate foo {}`.
if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) {
err.subdiagnostic(misspelled_kw);
// We don't want other suggestions to be added as they are most likely meaningless
Expand Down
85 changes: 44 additions & 41 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,26 @@ mod tests;

// The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`.
symbols! {
// If you modify this list, adjust `is_special`, `is_used_keyword`/`is_unused_keyword`
// and `AllKeywords`.
// This list includes things that are definitely keywords (e.g. `if`),
// a few things that are definitely not keywords (e.g. the empty symbol,
// `{{root}}`) and things where there is disagreement between people and/or
// documents (such as the Rust Reference) about whether it is a keyword
// (e.g. `_`).
//
// If you modify this list, adjust any relevant `Symbol::{is,can_be}_*` predicates and
// `used_keywords`.
// But this should rarely be necessary if the keywords are kept in alphabetic order.
Keywords {
// Special reserved identifiers used internally for elided lifetimes,
// unnamed method parameters, crate root module, error recovery etc.
// Matching predicates: `is_any_keyword`, `is_special`/`is_reserved`
Empty: "",
PathRoot: "{{root}}",
DollarCrate: "$crate",
nnethercote marked this conversation as resolved.
Show resolved Hide resolved
Underscore: "_",
nnethercote marked this conversation as resolved.
Show resolved Hide resolved

// Keywords that are used in stable Rust.
// Matching predicates: `is_any_keyword`, `is_used_keyword_always`/`is_reserved`
As: "as",
Break: "break",
Const: "const",
Expand Down Expand Up @@ -69,6 +77,7 @@ symbols! {
While: "while",

// Keywords that are used in unstable Rust or reserved for future use.
// Matching predicates: `is_any_keyword`, `is_unused_keyword_always`/`is_reserved`
Abstract: "abstract",
Become: "become",
Box: "box",
Expand All @@ -83,23 +92,29 @@ symbols! {
Yield: "yield",

// Edition-specific keywords that are used in stable Rust.
// Matching predicates: `is_any_keyword`, `is_used_keyword_conditional`/`is_reserved` (if
// the edition suffices)
Async: "async", // >= 2018 Edition only
Await: "await", // >= 2018 Edition only
Dyn: "dyn", // >= 2018 Edition only

// Edition-specific keywords that are used in unstable Rust or reserved for future use.
// Matching predicates: `is_any_keyword`, `is_unused_keyword_conditional`/`is_reserved` (if
// the edition suffices)
Gen: "gen", // >= 2024 Edition only
Try: "try", // >= 2018 Edition only

// Special lifetime names
// "Lifetime keywords": regular keywords with a leading `'`.
// Matching predicates: `is_any_keyword`
UnderscoreLifetime: "'_",
nnethercote marked this conversation as resolved.
Show resolved Hide resolved
StaticLifetime: "'static",
nnethercote marked this conversation as resolved.
Show resolved Hide resolved

// Weak keywords, have special meaning only in specific contexts.
// Matching predicates: `is_any_keyword`
Auto: "auto",
Builtin: "builtin",
Catch: "catch",
Default: "default",
Gen: "gen",
MacroRules: "macro_rules",
Raw: "raw",
Reuse: "reuse",
Expand Down Expand Up @@ -2589,6 +2604,11 @@ pub mod sym {
}

impl Symbol {
/// Don't use this unless you're doing something very loose and heuristic-y.
pub fn is_any_keyword(self) -> bool {
self >= kw::As && self <= kw::Yeet
nnethercote marked this conversation as resolved.
Show resolved Hide resolved
}

fn is_special(self) -> bool {
self <= kw::Underscore
}
Expand All @@ -2606,8 +2626,8 @@ impl Symbol {
}

fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
self == kw::Try && edition().at_least_rust_2018()
|| self == kw::Gen && edition().at_least_rust_2024()
self == kw::Gen && edition().at_least_rust_2024()
|| self == kw::Try && edition().at_least_rust_2018()
}

pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
Expand Down Expand Up @@ -2645,6 +2665,11 @@ impl Symbol {
}

impl Ident {
/// Don't use this unless you're doing something very loose and heuristic-y.
pub fn is_any_keyword(self) -> bool {
self.name.is_any_keyword()
}

/// Returns `true` for reserved identifiers used internally for elided lifetimes,
/// unnamed method parameters, crate root module, error recovery etc.
pub fn is_special(self) -> bool {
Expand Down Expand Up @@ -2683,41 +2708,19 @@ impl Ident {
}
}

/// An iterator over all the keywords in Rust.
#[derive(Copy, Clone)]
pub struct AllKeywords {
curr_idx: u32,
end_idx: u32,
}

impl AllKeywords {
/// Initialize a new iterator over all the keywords.
///
/// *Note:* Please update this if a new keyword is added beyond the current
/// range.
pub fn new() -> Self {
AllKeywords { curr_idx: kw::Empty.as_u32(), end_idx: kw::Yeet.as_u32() }
}

/// Collect all the keywords in a given edition into a vector.
pub fn collect_used(&self, edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
self.filter(|&keyword| {
keyword.is_used_keyword_always() || keyword.is_used_keyword_conditional(edition)
/// Collect all the keywords in a given edition into a vector.
///
/// *Note:* Please update this if a new keyword is added beyond the current
/// range.
pub fn used_keywords(edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
(kw::Empty.as_u32()..kw::Yeet.as_u32())
.filter_map(|kw| {
let kw = Symbol::new(kw);
if kw.is_used_keyword_always() || kw.is_used_keyword_conditional(edition) {
nnethercote marked this conversation as resolved.
Show resolved Hide resolved
Some(kw)
} else {
None
}
})
.collect()
}
}

impl Iterator for AllKeywords {
type Item = Symbol;

fn next(&mut self) -> Option<Self::Item> {
if self.curr_idx <= self.end_idx {
let keyword = Symbol::new(self.curr_idx);
self.curr_idx += 1;
Some(keyword)
} else {
None
}
}
}
87 changes: 12 additions & 75 deletions src/tools/rustfmt/src/parse/macros/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ use rustc_ast::{ast, ptr};
use rustc_parse::MACRO_ARGUMENTS;
use rustc_parse::parser::{ForceCollect, Parser, Recovery};
use rustc_session::parse::ParseSess;
use rustc_span::Symbol;
use rustc_span::symbol::{self, kw};
use rustc_span::symbol;

use crate::macros::MacroArg;
use crate::rewrite::RewriteContext;
Expand Down Expand Up @@ -82,18 +81,18 @@ pub(crate) struct ParsedMacroArgs {
}

fn check_keyword<'a, 'b: 'a>(parser: &'a mut Parser<'b>) -> Option<MacroArg> {
for &keyword in RUST_KW.iter() {
if parser.token.is_keyword(keyword)
&& parser.look_ahead(1, |t| *t == TokenKind::Eof || *t == TokenKind::Comma)
{
parser.bump();
return Some(MacroArg::Keyword(
symbol::Ident::with_dummy_span(keyword),
parser.prev_token.span,
));
}
if parser.token.is_any_keyword()
&& parser.look_ahead(1, |t| *t == TokenKind::Eof || *t == TokenKind::Comma)
{
let keyword = parser.token.ident().unwrap().0.name;
parser.bump();
Some(MacroArg::Keyword(
symbol::Ident::with_dummy_span(keyword),
parser.prev_token.span,
))
} else {
None
}
None
}

pub(crate) fn parse_macro_args(
Expand Down Expand Up @@ -169,65 +168,3 @@ pub(crate) fn parse_expr(
let mut parser = build_parser(context, tokens);
parser.parse_expr().ok()
}

const RUST_KW: [Symbol; 59] = [
kw::PathRoot,
kw::DollarCrate,
kw::Underscore,
kw::As,
kw::Box,
kw::Break,
kw::Const,
kw::Continue,
kw::Crate,
kw::Else,
kw::Enum,
kw::Extern,
kw::False,
kw::Fn,
kw::For,
kw::If,
kw::Impl,
kw::In,
kw::Let,
kw::Loop,
kw::Match,
kw::Mod,
kw::Move,
kw::Mut,
kw::Pub,
kw::Ref,
kw::Return,
kw::SelfLower,
kw::SelfUpper,
kw::Static,
kw::Struct,
kw::Super,
kw::Trait,
kw::True,
kw::Type,
kw::Unsafe,
kw::Use,
kw::Where,
kw::While,
kw::Abstract,
kw::Become,
kw::Do,
kw::Final,
kw::Macro,
kw::Override,
kw::Priv,
kw::Typeof,
kw::Unsized,
kw::Virtual,
kw::Yield,
kw::Dyn,
kw::Async,
kw::Try,
kw::UnderscoreLifetime,
kw::StaticLifetime,
kw::Auto,
kw::Catch,
kw::Default,
kw::Union,
];
Loading