diff --git a/Cargo.toml b/Cargo.toml index a5a4a972..c9ebee27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,6 @@ repository = "https://github.com/servo/rust-cssparser" readme = "README.md" keywords = ["css", "syntax", "parser"] license = "MPL-2.0" -build = "build.rs" edition = "2018" rust-version = "1.63" diff --git a/build.rs b/build.rs deleted file mode 100644 index 9ecf32b6..00000000 --- a/build.rs +++ /dev/null @@ -1,41 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#[cfg(feature = "dummy_match_byte")] -mod codegen { - pub fn main() {} -} - -#[cfg(not(feature = "dummy_match_byte"))] -#[path = "build/match_byte.rs"] -mod match_byte; - -#[cfg(not(feature = "dummy_match_byte"))] -mod codegen { - use std::env; - use std::path::Path; - use std::thread::Builder; - - pub fn main() { - let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); - - let input = Path::new(&manifest_dir).join("src/tokenizer.rs"); - let output = Path::new(&env::var("OUT_DIR").unwrap()).join("tokenizer.rs"); - println!("cargo:rerun-if-changed={}", input.display()); - - // We have stack overflows on Servo's CI. - let handle = Builder::new() - .stack_size(128 * 1024 * 1024) - .spawn(move || { - crate::match_byte::expand(&input, &output); - }) - .unwrap(); - - handle.join().unwrap(); - } -} - -fn main() { - codegen::main(); -} diff --git a/build/match_byte.rs b/build/match_byte.rs deleted file mode 100644 index edc057e6..00000000 --- a/build/match_byte.rs +++ /dev/null @@ -1,209 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -use quote::{quote, ToTokens}; -use std::fs::File; -use std::io::{Read, Write}; -use std::path::Path; -use syn; -use syn::fold::Fold; -use syn::parse::{Parse, ParseStream, Result}; -use syn::{parse_quote, Token}; - -use proc_macro2::{Span, TokenStream}; - -struct MatchByteParser {} - -pub fn expand(from: &Path, to: &Path) { - let mut source = String::new(); - File::open(from) - .unwrap() - .read_to_string(&mut source) - .unwrap(); - let ast = syn::parse_file(&source).expect("Parsing rules.rs module"); - let mut m = MatchByteParser {}; - let ast = m.fold_file(ast); - - let code = ast - .into_token_stream() - .to_string() - .replace("{ ", "{\n") - .replace(" }", "\n}"); - File::create(to) - .unwrap() - .write_all(code.as_bytes()) - .unwrap(); -} - -struct MatchByte { - expr: syn::Expr, - arms: Vec, -} - -impl Parse for MatchByte { - fn parse(input: ParseStream) -> Result { - Ok(MatchByte { - expr: { - let expr = input.parse()?; - input.parse::()?; - expr - }, - arms: { - let mut arms = Vec::new(); - while !input.is_empty() { - arms.push(input.call(syn::Arm::parse)?); - } - arms - }, - }) - } -} - -fn get_byte_from_expr_lit(expr: &Box) -> u8 { - match **expr { - syn::Expr::Lit(syn::ExprLit { ref lit, .. }) => { - if let syn::Lit::Byte(ref byte) = *lit { - byte.value() - } else { - panic!("Found a pattern that wasn't a byte") - } - } - _ => unreachable!(), - } -} - -/// Parse a pattern and fill the table accordingly -fn parse_pat_to_table<'a>( - pat: &'a syn::Pat, - case_id: u8, - wildcard: &mut Option<&'a syn::Ident>, - table: &mut [u8; 256], -) { - match pat { - &syn::Pat::Lit(syn::PatLit { ref expr, .. }) => { - let value = get_byte_from_expr_lit(expr); - if table[value as usize] == 0 { - table[value as usize] = case_id; - } - } - &syn::Pat::Range(syn::PatRange { ref lo, ref hi, .. }) => { - let lo = get_byte_from_expr_lit(lo); - let hi = get_byte_from_expr_lit(hi); - for value in lo..hi { - if table[value as usize] == 0 { - table[value as usize] = case_id; - } - } - if table[hi as usize] == 0 { - table[hi as usize] = case_id; - } - } - &syn::Pat::Wild(_) => { - for byte in table.iter_mut() { - if *byte == 0 { - *byte = case_id; - } - } - } - &syn::Pat::Ident(syn::PatIdent { ref ident, .. }) => { - assert_eq!(*wildcard, None); - *wildcard = Some(ident); - for byte in table.iter_mut() { - if *byte == 0 { - *byte = case_id; - } - } - } - &syn::Pat::Or(syn::PatOr { ref cases, .. }) => { - for case in cases { - parse_pat_to_table(case, case_id, wildcard, table); - } - } - _ => { - panic!("Unexpected pattern: {:?}. Buggy code ?", pat); - } - } -} - -/// Expand a TokenStream corresponding to the `match_byte` macro. -/// -/// ## Example -/// -/// ```rust -/// match_byte! { tokenizer.next_byte_unchecked(), -/// b'a'..b'z' => { ... } -/// b'0'..b'9' => { ... } -/// b'\n' | b'\\' => { ... } -/// foo => { ... } -/// } -/// ``` -/// -fn expand_match_byte(body: &TokenStream) -> syn::Expr { - let match_byte: MatchByte = syn::parse2(body.clone()).unwrap(); - let expr = match_byte.expr; - let mut cases = Vec::new(); - let mut table = [0u8; 256]; - let mut match_body = Vec::new(); - let mut wildcard = None; - - for (i, ref arm) in match_byte.arms.iter().enumerate() { - let case_id = i + 1; - let index = case_id as isize; - let name = syn::Ident::new(&format!("Case{}", case_id), Span::call_site()); - parse_pat_to_table(&arm.pat, case_id as u8, &mut wildcard, &mut table); - - cases.push(quote!(#name = #index)); - let body = &arm.body; - match_body.push(quote!(Case::#name => { #body })) - } - let en = quote!(enum Case { - #(#cases),* - }); - - let mut table_content = Vec::new(); - for entry in table.iter() { - let name: syn::Path = syn::parse_str(&format!("Case::Case{}", entry)).unwrap(); - table_content.push(name); - } - let table = quote!(static __CASES: [Case; 256] = [#(#table_content),*];); - - let expr = if let Some(binding) = wildcard { - quote!({ #en #table let #binding = #expr; match __CASES[#binding as usize] { #(#match_body),* }}) - } else { - quote!({ #en #table match __CASES[#expr as usize] { #(#match_body),* }}) - }; - - syn::parse2(expr.into()).unwrap() -} - -impl Fold for MatchByteParser { - fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt { - match stmt { - syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => { - if mac.path == parse_quote!(match_byte) { - return syn::fold::fold_stmt( - self, - syn::Stmt::Expr(expand_match_byte(&mac.tokens)), - ); - } - } - _ => {} - } - - syn::fold::fold_stmt(self, stmt) - } - - fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr { - match expr { - syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => { - if mac.path == parse_quote!(match_byte) { - return syn::fold::fold_expr(self, expand_match_byte(&mac.tokens)); - } - } - _ => {} - } - - syn::fold::fold_expr(self, expr) - } -} diff --git a/macros/lib.rs b/macros/lib.rs index 971c0304..64d749f0 100644 --- a/macros/lib.rs +++ b/macros/lib.rs @@ -37,3 +37,150 @@ pub fn _cssparser_internal_max_len(input: TokenStream) -> TokenStream { ) .into() } + +fn get_byte_from_expr_lit(expr: &syn::Expr) -> u8 { + match *expr { + syn::Expr::Lit(syn::ExprLit { ref lit, .. }) => { + if let syn::Lit::Byte(ref byte) = *lit { + byte.value() + } else { + panic!("Found a pattern that wasn't a byte") + } + } + _ => unreachable!(), + } +} + +/// Parse a pattern and fill the table accordingly +fn parse_pat_to_table<'a>( + pat: &'a syn::Pat, + case_id: u8, + wildcard: &mut Option<&'a syn::Ident>, + table: &mut [u8; 256], +) { + match pat { + &syn::Pat::Lit(syn::PatLit { ref expr, .. }) => { + let value = get_byte_from_expr_lit(expr); + if table[value as usize] == 0 { + table[value as usize] = case_id; + } + } + &syn::Pat::Range(syn::PatRange { ref lo, ref hi, .. }) => { + let lo = get_byte_from_expr_lit(lo); + let hi = get_byte_from_expr_lit(hi); + for value in lo..hi { + if table[value as usize] == 0 { + table[value as usize] = case_id; + } + } + if table[hi as usize] == 0 { + table[hi as usize] = case_id; + } + } + &syn::Pat::Wild(_) => { + for byte in table.iter_mut() { + if *byte == 0 { + *byte = case_id; + } + } + } + &syn::Pat::Ident(syn::PatIdent { ref ident, .. }) => { + assert_eq!(*wildcard, None); + *wildcard = Some(ident); + for byte in table.iter_mut() { + if *byte == 0 { + *byte = case_id; + } + } + } + &syn::Pat::Or(syn::PatOr { ref cases, .. }) => { + for case in cases { + parse_pat_to_table(case, case_id, wildcard, table); + } + } + _ => { + panic!("Unexpected pattern: {:?}. Buggy code ?", pat); + } + } +} + +/// Expand a TokenStream corresponding to the `match_byte` macro. +/// +/// ## Example +/// +/// ```rust +/// match_byte! { tokenizer.next_byte_unchecked(), +/// b'a'..b'z' => { ... } +/// b'0'..b'9' => { ... } +/// b'\n' | b'\\' => { ... } +/// foo => { ... } +/// } +/// ``` +/// +#[proc_macro] +pub fn match_byte(input: TokenStream) -> TokenStream { + use syn::spanned::Spanned; + struct MatchByte { + expr: syn::Expr, + arms: Vec, + } + + impl syn::parse::Parse for MatchByte { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + Ok(MatchByte { + expr: { + let expr = input.parse()?; + input.parse::()?; + expr + }, + arms: { + let mut arms = Vec::new(); + while !input.is_empty() { + let arm = input.call(syn::Arm::parse)?; + assert!(arm.guard.is_none(), "match_byte doesn't support guards"); + assert!( + arm.attrs.is_empty(), + "match_byte doesn't support attributes" + ); + arms.push(arm); + } + arms + }, + }) + } + } + let MatchByte { expr, arms } = syn::parse_macro_input!(input); + + let mut cases = Vec::new(); + let mut table = [0u8; 256]; + let mut match_body = Vec::new(); + let mut wildcard = None; + for (i, ref arm) in arms.iter().enumerate() { + let case_id = i + 1; + let index = case_id as isize; + let name = syn::Ident::new(&format!("Case{}", case_id), arm.span()); + let pat = &arm.pat; + parse_pat_to_table(pat, case_id as u8, &mut wildcard, &mut table); + + cases.push(quote::quote!(#name = #index)); + let body = &arm.body; + match_body.push(quote::quote!(Case::#name => { #body })) + } + + let en = quote::quote!(enum Case { + #(#cases),* + }); + + let mut table_content = Vec::new(); + for entry in table.iter() { + let name: syn::Path = syn::parse_str(&format!("Case::Case{}", entry)).unwrap(); + table_content.push(name); + } + let table = quote::quote!(static __CASES: [Case; 256] = [#(#table_content),*];); + + if let Some(binding) = wildcard { + quote::quote!({ #en #table let #binding = #expr; match __CASES[#binding as usize] { #(#match_body),* }}) + } else { + quote::quote!({ #en #table match __CASES[#expr as usize] { #(#match_body),* }}) + }.into() +} diff --git a/src/lib.rs b/src/lib.rs index bac46221..2292aded 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -97,14 +97,8 @@ pub use phf as _cssparser_internal_phf; mod macros; mod rules_and_declarations; - -#[cfg(feature = "dummy_match_byte")] mod tokenizer; -#[cfg(not(feature = "dummy_match_byte"))] -mod tokenizer { - include!(concat!(env!("OUT_DIR"), "/tokenizer.rs")); -} mod color; mod cow_rc_str; mod from_bytes; diff --git a/src/macros.rs b/src/macros.rs index 882b0bbc..981af010 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -183,14 +183,3 @@ pub fn _cssparser_internal_to_lowercase<'a>( }, ) } - -#[cfg(feature = "dummy_match_byte")] -macro_rules! match_byte { - ($value:expr, $($rest:tt)* ) => { - match $value { - $( - $rest - )+ - } - }; -} diff --git a/src/parser.rs b/src/parser.rs index 535e7895..afc6281d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -700,10 +700,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Caller must deal with the fact that the resulting list might be empty, /// if there's no valid component on the list. #[inline] - pub fn parse_comma_separated_ignoring_errors( - &mut self, - parse_one: F, - ) -> Vec + pub fn parse_comma_separated_ignoring_errors(&mut self, parse_one: F) -> Vec where F: for<'tt> FnMut(&mut Parser<'i, 'tt>) -> Result>, { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c87add8a..4cc6c178 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -10,6 +10,20 @@ use crate::parser::ParserState; use std::char; use std::ops::Range; +#[cfg(not(feature = "dummy_match_byte"))] +use cssparser_macros::match_byte; + +#[cfg(feature = "dummy_match_byte")] +macro_rules! match_byte { + ($value:expr, $($rest:tt)* ) => { + match $value { + $( + $rest + )+ + } + }; +} + /// One of the pieces the CSS input is broken into. /// /// Some components use `Cow` in order to borrow from the original input string @@ -222,7 +236,7 @@ impl<'a> Tokenizer<'a> { #[inline] pub fn with_first_line_number(input: &str, first_line_number: u32) -> Tokenizer { Tokenizer { - input: input, + input, position: 0, current_line_start_position: 0, current_line_number: first_line_number, @@ -469,9 +483,7 @@ impl<'a> Tokenizer<'a> { return } } - _ => { - return - } + _ => return, } } } @@ -546,10 +558,8 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { b' ' | b'\t' => { consume_whitespace(tokenizer, false) }, - b'\n' | b'\x0C' | b'\r' => { - consume_whitespace(tokenizer, true) - }, - b'"' => { consume_string(tokenizer, false) }, + b'\n' | b'\x0C' | b'\r' => consume_whitespace(tokenizer, true), + b'"' => consume_string(tokenizer, false), b'#' => { tokenizer.advance(1); if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) } @@ -564,7 +574,7 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { if tokenizer.starts_with(b"$=") { tokenizer.advance(2); SuffixMatch } else { tokenizer.advance(1); Delim('$') } }, - b'\'' => { consume_string(tokenizer, true) }, + b'\'' => consume_string(tokenizer, true), b'(' => { tokenizer.advance(1); ParenthesisBlock }, b')' => { tokenizer.advance(1); CloseParenthesis }, b'*' => { @@ -625,7 +635,7 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { Delim('/') } } - b'0'..=b'9' => { consume_numeric(tokenizer) }, + b'0'..=b'9' => consume_numeric(tokenizer), b':' => { tokenizer.advance(1); Colon }, b';' => { tokenizer.advance(1); Semicolon }, b'<' => { @@ -642,7 +652,7 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { if is_ident_start(tokenizer) { AtKeyword(consume_name(tokenizer)) } else { Delim('@') } }, - b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => { consume_ident_like(tokenizer) }, + b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => consume_ident_like(tokenizer), b'[' => { tokenizer.advance(1); SquareBracketBlock }, b'\\' => { if !tokenizer.has_newline_at(1) { consume_ident_like(tokenizer) } @@ -880,18 +890,18 @@ fn consume_quoted_string<'a>( fn is_ident_start(tokenizer: &mut Tokenizer) -> bool { !tokenizer.is_eof() && match_byte! { tokenizer.next_byte_unchecked(), - b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => { true }, + b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => true, b'-' => { tokenizer.has_at_least(1) && match_byte! { tokenizer.byte_at(1), b'a'..=b'z' | b'A'..=b'Z' | b'-' | b'_' | b'\0' => { true } - b'\\' => { !tokenizer.has_newline_at(1) } - b => { !b.is_ascii() }, + b'\\' => !tokenizer.has_newline_at(1), + b => !b.is_ascii(), } }, - b'\\' => { !tokenizer.has_newline_at(1) }, - b => { !b.is_ascii() }, + b'\\' => !tokenizer.has_newline_at(1), + b => !b.is_ascii(), } } @@ -919,7 +929,7 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> { return tokenizer.slice_from(start_pos).into(); } match_byte! { tokenizer.next_byte_unchecked(), - b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => { tokenizer.advance(1) }, + b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => tokenizer.advance(1), b'\\' | b'\0' => { // * The tokenizer’s input is UTF-8 since it’s `&str`. // * start_pos is at a code point boundary @@ -983,9 +993,9 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> { fn byte_to_hex_digit(b: u8) -> Option { Some(match_byte! { b, - b'0' ..= b'9' => { b - b'0' }, - b'a' ..= b'f' => { b - b'a' + 10 }, - b'A' ..= b'F' => { b - b'A' + 10 }, + b'0' ..= b'9' => b - b'0', + b'a' ..= b'f' => b - b'a' + 10, + b'A' ..= b'F' => b - b'A' + 10, _ => { return None } @@ -1091,24 +1101,24 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { tokenizer.advance(1); return Percentage { unit_value: (value / 100.) as f32, - int_value: int_value, - has_sign: has_sign, + int_value, + has_sign, }; } let value = value as f32; if is_ident_start(tokenizer) { let unit = consume_name(tokenizer); Dimension { - value: value, - int_value: int_value, - has_sign: has_sign, - unit: unit, + value, + int_value, + has_sign, + unit, } } else { Number { - value: value, - int_value: int_value, - has_sign: has_sign, + value, + int_value, + has_sign, } } } @@ -1150,7 +1160,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, last_newline = offset; } } - b'"' | b'\'' => { return Err(()) }, // Do not advance + b'"' | b'\'' => return Err(()), // Do not advance b')' => { // Don't use advance, because we may be skipping // newlines here, and we want to avoid the assert. @@ -1390,6 +1400,6 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char { tokenizer.advance(1); '\u{FFFD}' } - _ => { tokenizer.consume_char() } + _ => tokenizer.consume_char(), } }