From 25a5ee809cabe46833718b43c604b50b1ad68342 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sun, 14 Jan 2024 10:50:11 +0000 Subject: [PATCH] perf(parser): lexer match byte not char (#2025) 2 related changes to lexer's `read_next_token()`: 1. Hint to branch predictor that unicode identifiers and non-standard whitespace are rare by marking that branch `#[cold]`. 2. The branch is on whether next character is ASCII or not. This check only requires reading 1 byte, as ASCII characters are always single byte in UTF8. So only do the work of getting a `char` in the cold path, once it's established that character is not ASCII and this work is required. --- crates/oxc_parser/src/lexer/mod.rs | 35 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 4ff2177848515..f066ff82ebdc6 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -374,28 +374,31 @@ impl<'a> Lexer<'a> { let offset = self.offset(); self.current.token.start = offset; - if let Some(c) = self.current.chars.clone().next() { - let kind = self.match_char(c); - if !matches!( - kind, - Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment - ) { - return kind; - } - } else { + let remaining = self.current.chars.as_str(); + if remaining.is_empty() { return Kind::Eof; } - } - } - #[inline] - fn match_char(&mut self, c: char) -> Kind { - let size = c as usize; + let byte = remaining.as_bytes()[0]; + let kind = if byte < 128 { + BYTE_HANDLERS[byte as usize](self) + } else { + self.match_unicode_char() + }; - if size < 128 { - return BYTE_HANDLERS[size](self); + if !matches!( + kind, + Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment + ) { + return kind; + } } + } + // `#[cold]` to hint to branch predictor that unicode identifiers and irregular whitespace are rare + #[cold] + fn match_unicode_char(&mut self) -> Kind { + let c = self.current.chars.clone().next().unwrap(); match c { c if is_id_start_unicode(c) => { let mut builder = AutoCow::new(self);