Skip to content

Commit

Permalink
Rollup merge of #69384 - petrochenkov:nounnorm, r=Centril
Browse files Browse the repository at this point in the history
parser: `token` -> `normalized_token`, `nonnormalized_token` -> `token`

So, after #69006, its follow-ups and an attempt to remove `Parser::prev_span` I came to the conclusion that the unnormalized token and its span is what you want in most cases, so it should be default.

Normalization only makes difference in few cases where we are checking against `token::Ident` or `token::Lifetime` specifically.
This PR uses `normalized_token` for those cases.

Using normalization explicitly means that people writing code should remember about `NtIdent` and `NtLifetime` in general. (That is alleviated by the fact that `token.ident()` and `fn parse_ident_*` are already written.)
Remembering about `NtIdent`, was, however, already the case, kind of, because the implicit normalization was performed only for the current/previous token, but not for things like `look_ahead`.
As a result, most of token classification methods in `token.rs` already take `NtIdent` into account (this PR fixes a few pre-existing minor mistakes though).

The next step is removing `normalized(_prev)_token` entirely and replacing it with `token.ident()` (mostly) and `token.normalize()` (occasionally).
I want to make it a separate PR for that and run it though perf.
`normalized_token` filled on every bump has both a potential to avoid repeated normalization, and to do unnecessary work in advance (it probably doesn't matter anyway, the normalization is very cheap).

r? @Centril
  • Loading branch information
Centril authored Feb 28, 2020
2 parents 76fe449 + b2605c1 commit 3828fa2
Show file tree
Hide file tree
Showing 16 changed files with 132 additions and 125 deletions.
2 changes: 1 addition & 1 deletion src/librustc_builtin_macros/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ fn parse_args<'a>(
} // accept trailing commas
if p.token.is_ident() && p.look_ahead(1, |t| *t == token::Eq) {
named = true;
let name = if let token::Ident(name, _) = p.token.kind {
let name = if let token::Ident(name, _) = p.normalized_token.kind {
p.bump();
name
} else {
Expand Down
9 changes: 7 additions & 2 deletions src/librustc_expand/mbe/macro_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,12 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na
fn get_macro_name(token: &Token) -> Option<(Name, bool)> {
match token.kind {
token::Ident(name, is_raw) if name != kw::Underscore => Some((name, is_raw)),
token::Interpolated(ref nt) => match **nt {
token::NtIdent(ident, is_raw) if ident.name != kw::Underscore => {
Some((ident.name, is_raw))
}
_ => None,
},
_ => None,
}
}
Expand Down Expand Up @@ -883,9 +889,8 @@ fn parse_nt_inner<'a>(p: &mut Parser<'a>, sp: Span, name: Symbol) -> PResult<'a,
// this could be handled like a token, since it is one
sym::ident => {
if let Some((name, is_raw)) = get_macro_name(&p.token) {
let span = p.token.span;
p.bump();
token::NtIdent(Ident::new(name, span), is_raw)
token::NtIdent(Ident::new(name, p.normalized_prev_token.span), is_raw)
} else {
let token_str = pprust::token_to_string(&p.token);
let msg = &format!("expected ident, found {}", &token_str);
Expand Down
13 changes: 6 additions & 7 deletions src/librustc_parse/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use syntax::ast::{
};
use syntax::ast::{AttrVec, ItemKind, Mutability, Pat, PatKind, PathSegment, QSelf, Ty, TyKind};
use syntax::ptr::P;
use syntax::token::{self, token_can_begin_expr, TokenKind};
use syntax::token::{self, TokenKind};
use syntax::util::parser::AssocOp;

use log::{debug, trace};
Expand Down Expand Up @@ -192,12 +192,12 @@ impl<'a> Parser<'a> {
TokenKind::CloseDelim(token::DelimToken::Brace),
TokenKind::CloseDelim(token::DelimToken::Paren),
];
if let token::Ident(name, false) = self.token.kind {
if Ident::new(name, self.token.span).is_raw_guess()
if let token::Ident(name, false) = self.normalized_token.kind {
if Ident::new(name, self.normalized_token.span).is_raw_guess()
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind))
{
err.span_suggestion(
self.token.span,
self.normalized_token.span,
"you can escape reserved keywords to use them as identifiers",
format!("r#{}", name),
Applicability::MaybeIncorrect,
Expand Down Expand Up @@ -900,8 +900,7 @@ impl<'a> Parser<'a> {
} else if !sm.is_multiline(self.prev_span.until(self.token.span)) {
// The current token is in the same line as the prior token, not recoverable.
} else if self.look_ahead(1, |t| {
t == &token::CloseDelim(token::Brace)
|| token_can_begin_expr(t) && t.kind != token::Colon
t == &token::CloseDelim(token::Brace) || t.can_begin_expr() && t.kind != token::Colon
}) && [token::Comma, token::Colon].contains(&self.token.kind)
{
// Likely typo: `,` → `;` or `:` → `;`. This is triggered if the current token is
Expand All @@ -919,7 +918,7 @@ impl<'a> Parser<'a> {
} else if self.look_ahead(0, |t| {
t == &token::CloseDelim(token::Brace)
|| (
token_can_begin_expr(t) && t != &token::Semi && t != &token::Pound
t.can_begin_expr() && t != &token::Semi && t != &token::Pound
// Avoid triggering with too many trailing `#` in raw string.
)
}) {
Expand Down
39 changes: 18 additions & 21 deletions src/librustc_parse/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,14 @@ impl<'a> Parser<'a> {
fn parse_expr_catch_underscore(&mut self) -> PResult<'a, P<Expr>> {
match self.parse_expr() {
Ok(expr) => Ok(expr),
Err(mut err) => match self.token.kind {
Err(mut err) => match self.normalized_token.kind {
token::Ident(name, false)
if name == kw::Underscore && self.look_ahead(1, |t| t == &token::Comma) =>
{
// Special-case handling of `foo(_, _, _)`
err.emit();
let sp = self.token.span;
self.bump();
Ok(self.mk_expr(sp, ExprKind::Err, AttrVec::new()))
Ok(self.mk_expr(self.prev_token.span, ExprKind::Err, AttrVec::new()))
}
_ => Err(err),
},
Expand Down Expand Up @@ -166,7 +165,7 @@ impl<'a> Parser<'a> {
while let Some(op) = self.check_assoc_op() {
// Adjust the span for interpolated LHS to point to the `$lhs` token
// and not to what it refers to.
let lhs_span = match self.unnormalized_prev_token.kind {
let lhs_span = match self.prev_token.kind {
TokenKind::Interpolated(..) => self.prev_span,
_ => lhs.span,
};
Expand Down Expand Up @@ -333,7 +332,7 @@ impl<'a> Parser<'a> {
/// Also performs recovery for `and` / `or` which are mistaken for `&&` and `||` respectively.
fn check_assoc_op(&self) -> Option<Spanned<AssocOp>> {
Some(Spanned {
node: match (AssocOp::from_token(&self.token), &self.token.kind) {
node: match (AssocOp::from_token(&self.token), &self.normalized_token.kind) {
(Some(op), _) => op,
(None, token::Ident(sym::and, false)) => {
self.error_bad_logical_op("and", "&&", "conjunction");
Expand All @@ -345,7 +344,7 @@ impl<'a> Parser<'a> {
}
_ => return None,
},
span: self.token.span,
span: self.normalized_token.span,
})
}

Expand Down Expand Up @@ -437,7 +436,7 @@ impl<'a> Parser<'a> {
let attrs = self.parse_or_use_outer_attributes(attrs)?;
let lo = self.token.span;
// Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr()
let (hi, ex) = match self.token.kind {
let (hi, ex) = match self.normalized_token.kind {
token::Not => self.parse_unary_expr(lo, UnOp::Not), // `!expr`
token::Tilde => self.recover_tilde_expr(lo), // `~expr`
token::BinOp(token::Minus) => self.parse_unary_expr(lo, UnOp::Neg), // `-expr`
Expand Down Expand Up @@ -523,7 +522,7 @@ impl<'a> Parser<'a> {
) -> PResult<'a, (Span, P<Expr>)> {
expr.map(|e| {
(
match self.unnormalized_prev_token.kind {
match self.prev_token.kind {
TokenKind::Interpolated(..) => self.prev_span,
_ => e.span,
},
Expand Down Expand Up @@ -704,7 +703,7 @@ impl<'a> Parser<'a> {
}

fn parse_dot_suffix_expr(&mut self, lo: Span, base: P<Expr>) -> PResult<'a, P<Expr>> {
match self.token.kind {
match self.normalized_token.kind {
token::Ident(..) => self.parse_dot_suffix(base, lo),
token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => {
Ok(self.parse_tuple_field_access_expr(lo, base, symbol, suffix))
Expand Down Expand Up @@ -773,8 +772,8 @@ impl<'a> Parser<'a> {
field: Symbol,
suffix: Option<Symbol>,
) -> P<Expr> {
let span = self.token.span;
self.bump();
let span = self.prev_token.span;
let field = ExprKind::Field(base, Ident::new(field, span));
self.expect_no_suffix(span, "a tuple index", suffix);
self.mk_expr(lo.to(span), field, AttrVec::new())
Expand All @@ -798,7 +797,7 @@ impl<'a> Parser<'a> {

/// Assuming we have just parsed `.`, continue parsing into an expression.
fn parse_dot_suffix(&mut self, self_arg: P<Expr>, lo: Span) -> PResult<'a, P<Expr>> {
if self.token.span.rust_2018() && self.eat_keyword(kw::Await) {
if self.normalized_token.span.rust_2018() && self.eat_keyword(kw::Await) {
return self.mk_await_expr(self_arg, lo);
}

Expand Down Expand Up @@ -912,7 +911,7 @@ impl<'a> Parser<'a> {
// | ^ expected expression
self.bump();
Ok(self.mk_expr_err(self.token.span))
} else if self.token.span.rust_2018() {
} else if self.normalized_token.span.rust_2018() {
// `Span::rust_2018()` is somewhat expensive; don't get it repeatedly.
if self.check_keyword(kw::Async) {
if self.is_async_block() {
Expand Down Expand Up @@ -1342,7 +1341,7 @@ impl<'a> Parser<'a> {
if self.eat_keyword(kw::Static) { Movability::Static } else { Movability::Movable };

let asyncness =
if self.token.span.rust_2018() { self.parse_asyncness() } else { Async::No };
if self.normalized_token.span.rust_2018() { self.parse_asyncness() } else { Async::No };
if asyncness.is_async() {
// Feature-gate `async ||` closures.
self.sess.gated_spans.gate(sym::async_closure, self.prev_span);
Expand Down Expand Up @@ -1556,9 +1555,8 @@ impl<'a> Parser<'a> {

fn eat_label(&mut self) -> Option<Label> {
self.token.lifetime().map(|ident| {
let span = self.token.span;
self.bump();
Label { ident: Ident::new(ident.name, span) }
Label { ident }
})
}

Expand Down Expand Up @@ -1700,7 +1698,7 @@ impl<'a> Parser<'a> {
fn is_try_block(&self) -> bool {
self.token.is_keyword(kw::Try) &&
self.look_ahead(1, |t| *t == token::OpenDelim(token::Brace)) &&
self.token.span.rust_2018() &&
self.normalized_token.span.rust_2018() &&
// Prevent `while try {} {}`, `if try {} {} else {}`, etc.
!self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL)
}
Expand Down Expand Up @@ -1850,13 +1848,12 @@ impl<'a> Parser<'a> {

/// Use in case of error after field-looking code: `S { foo: () with a }`.
fn find_struct_error_after_field_looking_code(&self) -> Option<Field> {
if let token::Ident(name, _) = self.token.kind {
if let token::Ident(name, _) = self.normalized_token.kind {
if !self.token.is_reserved_ident() && self.look_ahead(1, |t| *t == token::Colon) {
let span = self.token.span;
return Some(ast::Field {
ident: Ident::new(name, span),
span,
expr: self.mk_expr_err(span),
ident: Ident::new(name, self.normalized_token.span),
span: self.token.span,
expr: self.mk_expr_err(self.token.span),
is_shorthand: false,
attrs: AttrVec::new(),
id: DUMMY_NODE_ID,
Expand Down
14 changes: 6 additions & 8 deletions src/librustc_parse/parser/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -741,11 +741,10 @@ impl<'a> Parser<'a> {
}

fn parse_ident_or_underscore(&mut self) -> PResult<'a, ast::Ident> {
match self.token.kind {
match self.normalized_token.kind {
token::Ident(name @ kw::Underscore, false) => {
let span = self.token.span;
self.bump();
Ok(Ident::new(name, span))
Ok(Ident::new(name, self.normalized_prev_token.span))
}
_ => self.parse_ident(),
}
Expand Down Expand Up @@ -1537,7 +1536,7 @@ impl<'a> Parser<'a> {

let is_name_required = match self.token.kind {
token::DotDotDot => false,
_ => req_name(&self.token),
_ => req_name(&self.normalized_token),
};
let (pat, ty) = if is_name_required || self.is_named_param() {
debug!("parse_param_general parse_pat (is_name_required:{})", is_name_required);
Expand Down Expand Up @@ -1603,12 +1602,11 @@ impl<'a> Parser<'a> {
fn parse_self_param(&mut self) -> PResult<'a, Option<Param>> {
// Extract an identifier *after* having confirmed that the token is one.
let expect_self_ident = |this: &mut Self| {
match this.token.kind {
match this.normalized_token.kind {
// Preserve hygienic context.
token::Ident(name, _) => {
let span = this.token.span;
this.bump();
Ident::new(name, span)
Ident::new(name, this.normalized_prev_token.span)
}
_ => unreachable!(),
}
Expand Down Expand Up @@ -1645,7 +1643,7 @@ impl<'a> Parser<'a> {
// Only a limited set of initial token sequences is considered `self` parameters; anything
// else is parsed as a normal function parameter list, so some lookahead is required.
let eself_lo = self.token.span;
let (eself, eself_ident, eself_hi) = match self.token.kind {
let (eself, eself_ident, eself_hi) = match self.normalized_token.kind {
token::BinOp(token::And) => {
let eself = if is_isolated_self(self, 1) {
// `&self`
Expand Down
48 changes: 23 additions & 25 deletions src/librustc_parse/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,23 +86,22 @@ macro_rules! maybe_recover_from_interpolated_ty_qpath {
#[derive(Clone)]
pub struct Parser<'a> {
pub sess: &'a ParseSess,
/// The current non-normalized token.
pub token: Token,
/// The current normalized token.
/// "Normalized" means that some interpolated tokens
/// (`$i: ident` and `$l: lifetime` meta-variables) are replaced
/// with non-interpolated identifier and lifetime tokens they refer to.
/// Use span from this token if you need an isolated span.
pub token: Token,
/// The current non-normalized token if it's different from `token`.
/// Use span from this token if you need to concatenate it with some neighbouring spans.
unnormalized_token: Token,
/// Use this if you need to check for `token::Ident` or `token::Lifetime` specifically,
/// this also includes edition checks for edition-specific keyword identifiers.
pub normalized_token: Token,
/// The previous non-normalized token.
pub prev_token: Token,
/// The previous normalized token.
/// Use span from this token if you need an isolated span.
prev_token: Token,
/// The previous non-normalized token if it's different from `prev_token`.
/// Use span from this token if you need to concatenate it with some neighbouring spans.
unnormalized_prev_token: Token,
/// Equivalent to `unnormalized_prev_token.span`.
/// FIXME: Remove in favor of `(unnormalized_)prev_token.span`.
/// Use this if you need to check for `token::Ident` or `token::Lifetime` specifically,
/// this also includes edition checks for edition-specific keyword identifiers.
pub normalized_prev_token: Token,
/// FIXME: Remove in favor of the equivalent `prev_token.span`.
pub prev_span: Span,
restrictions: Restrictions,
/// Used to determine the path to externally loaded source files.
Expand Down Expand Up @@ -375,9 +374,9 @@ impl<'a> Parser<'a> {
let mut parser = Parser {
sess,
token: Token::dummy(),
unnormalized_token: Token::dummy(),
normalized_token: Token::dummy(),
prev_token: Token::dummy(),
unnormalized_prev_token: Token::dummy(),
normalized_prev_token: Token::dummy(),
prev_span: DUMMY_SP,
restrictions: Restrictions::empty(),
recurse_into_file_modules,
Expand Down Expand Up @@ -482,7 +481,7 @@ impl<'a> Parser<'a> {
}

fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, ast::Ident> {
match self.token.kind {
match self.normalized_token.kind {
token::Ident(name, _) => {
if self.token.is_reserved_ident() {
let mut err = self.expected_ident_found();
Expand All @@ -492,9 +491,8 @@ impl<'a> Parser<'a> {
return Err(err);
}
}
let span = self.token.span;
self.bump();
Ok(Ident::new(name, span))
Ok(Ident::new(name, self.normalized_prev_token.span))
}
_ => Err(match self.prev_token.kind {
TokenKind::DocComment(..) => {
Expand Down Expand Up @@ -824,16 +822,16 @@ impl<'a> Parser<'a> {
// tokens are replaced with usual identifier and lifetime tokens,
// so the former are never encountered during normal parsing.
crate fn set_token(&mut self, token: Token) {
self.unnormalized_token = token;
self.token = match &self.unnormalized_token.kind {
self.token = token;
self.normalized_token = match &self.token.kind {
token::Interpolated(nt) => match **nt {
token::NtIdent(ident, is_raw) => {
Token::new(token::Ident(ident.name, is_raw), ident.span)
}
token::NtLifetime(ident) => Token::new(token::Lifetime(ident.name), ident.span),
_ => self.unnormalized_token.clone(),
_ => self.token.clone(),
},
_ => self.unnormalized_token.clone(),
_ => self.token.clone(),
}
}

Expand All @@ -847,19 +845,19 @@ impl<'a> Parser<'a> {

// Update the current and previous tokens.
self.prev_token = self.token.take();
self.unnormalized_prev_token = self.unnormalized_token.take();
self.normalized_prev_token = self.normalized_token.take();
self.set_token(next_token);

// Update fields derived from the previous token.
self.prev_span = self.unnormalized_prev_token.span;
self.prev_span = self.prev_token.span;

// Diagnostics.
self.expected_tokens.clear();
}

/// Advance the parser by one token.
pub fn bump(&mut self) {
let next_token = self.next_tok(self.unnormalized_token.span);
let next_token = self.next_tok(self.token.span);
self.bump_with(next_token);
}

Expand Down Expand Up @@ -890,7 +888,7 @@ impl<'a> Parser<'a> {
/// Parses asyncness: `async` or nothing.
fn parse_asyncness(&mut self) -> Async {
if self.eat_keyword(kw::Async) {
let span = self.prev_span;
let span = self.normalized_prev_token.span;
Async::Yes { span, closure_id: DUMMY_NODE_ID, return_impl_trait_id: DUMMY_NODE_ID }
} else {
Async::No
Expand Down
Loading

0 comments on commit 3828fa2

Please sign in to comment.