From 7f9cc88f4a2226521d68f4b54f6d641978bb73e6 Mon Sep 17 00:00:00 2001 From: Charles Lew Date: Sun, 29 Dec 2019 19:50:43 +0800 Subject: [PATCH 1/2] Add symbol normalization for proc_macro_server. --- src/librustc_expand/proc_macro_server.rs | 2 ++ src/librustc_parse/lexer/mod.rs | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/librustc_expand/proc_macro_server.rs b/src/librustc_expand/proc_macro_server.rs index 790e1f0edc01b..b41490011af3f 100644 --- a/src/librustc_expand/proc_macro_server.rs +++ b/src/librustc_expand/proc_macro_server.rs @@ -1,5 +1,6 @@ use crate::base::ExtCtxt; +use rustc_parse::lexer::nfc_normalize; use rustc_parse::{nt_to_tokenstream, parse_stream_from_source_str}; use syntax::ast; use syntax::print::pprust; @@ -327,6 +328,7 @@ impl Ident { } } fn new(sym: Symbol, is_raw: bool, span: Span) -> Ident { + let sym = nfc_normalize(&sym.as_str()); let string = sym.as_str(); if !Self::is_valid(&string) { panic!("`{:?}` is not a valid identifier", string) diff --git a/src/librustc_parse/lexer/mod.rs b/src/librustc_parse/lexer/mod.rs index d69cd14d544db..b981e8d066fcd 100644 --- a/src/librustc_parse/lexer/mod.rs +++ b/src/librustc_parse/lexer/mod.rs @@ -471,16 +471,9 @@ impl<'a> StringReader<'a> { /// As symbol_from, with the text normalized into Unicode NFC form. fn nfc_symbol_from(&self, start: BytePos) -> Symbol { - use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization}; debug!("taking an normalized ident from {:?} to {:?}", start, self.pos); let sym = self.str_from(start); - match is_nfc_quick(sym.chars()) { - IsNormalized::Yes => Symbol::intern(sym), - _ => { - let sym_str: String = sym.chars().nfc().collect(); - Symbol::intern(&sym_str) - } - } + nfc_normalize(sym) } /// Slice of the source text spanning from `start` up to but excluding `end`. @@ -651,3 +644,14 @@ impl<'a> StringReader<'a> { } } } + +pub fn nfc_normalize(string: &str) -> Symbol { + use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization}; + match is_nfc_quick(string.chars()) { + IsNormalized::Yes => Symbol::intern(string), + _ => { + let normalized_str: String = string.chars().nfc().collect(); + Symbol::intern(&normalized_str) + } + } +} From 8f84d9e1de91261682d5e4ef8f046c9491802dee Mon Sep 17 00:00:00 2001 From: Charles Lew Date: Mon, 30 Dec 2019 20:00:05 +0800 Subject: [PATCH 2/2] Inline and remove `nfc_symbol_from` method. --- src/librustc_parse/lexer/mod.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/librustc_parse/lexer/mod.rs b/src/librustc_parse/lexer/mod.rs index b981e8d066fcd..30ec202e0ffe3 100644 --- a/src/librustc_parse/lexer/mod.rs +++ b/src/librustc_parse/lexer/mod.rs @@ -220,7 +220,7 @@ impl<'a> StringReader<'a> { if is_raw_ident { ident_start = ident_start + BytePos(2); } - let sym = self.nfc_symbol_from(ident_start); + let sym = nfc_normalize(self.str_from(ident_start)); if is_raw_ident { let span = self.mk_sp(start, self.pos); if !sym.can_be_raw() { @@ -469,13 +469,6 @@ impl<'a> StringReader<'a> { Symbol::intern(self.str_from_to(start, end)) } - /// As symbol_from, with the text normalized into Unicode NFC form. - fn nfc_symbol_from(&self, start: BytePos) -> Symbol { - debug!("taking an normalized ident from {:?} to {:?}", start, self.pos); - let sym = self.str_from(start); - nfc_normalize(sym) - } - /// Slice of the source text spanning from `start` up to but excluding `end`. fn str_from_to(&self, start: BytePos, end: BytePos) -> &str { &self.src[self.src_index(start)..self.src_index(end)]