From b8cddcf9e16ec8d56c2143530acf51d6bd62d349 Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Tue, 23 Oct 2018 14:06:04 +0100 Subject: [PATCH 01/11] We can now use Error::* --- src/parser.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 6a2d7c9..f67337d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -37,10 +37,7 @@ pub enum Error { /// An unknown encoding was specified in the metadata UnknownEncoding, } -// Can not use use `Error::*` as per this issue: -// (https://github.com/rust-lang/rust/issues/4865) -use Error::{BadMagic, DecodingError, Eof, Io, MalformedMetadata, MisplacedMetadata, - UnknownEncoding}; +use Error::*; impl error::Error for Error { fn description(&self) -> &str { From ecd150c9fb80b485c5ef17c4fe51e80e23e58654 Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Tue, 23 Oct 2018 14:17:55 +0100 Subject: [PATCH 02/11] Run rustfmt And remove rustfmt.toml since it was only compatible with Rust nightly --- rustfmt.toml | 1 - src/lib.rs | 87 ++++++++++++++++++++++++++++++------------------- src/metadata.rs | 3 +- src/parser.rs | 34 ++++++++++++------- 4 files changed, 77 insertions(+), 48 deletions(-) delete mode 100644 rustfmt.toml diff --git a/rustfmt.toml b/rustfmt.toml deleted file mode 100644 index f2ddf93..0000000 --- a/rustfmt.toml +++ /dev/null @@ -1 +0,0 @@ -wrap_comments = false diff --git a/src/lib.rs b/src/lib.rs index 30be7de..c63f72f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,11 +37,15 @@ //! ``` // https://pascalhertleif.de/artikel/good-practices-for-writing-rust-libraries/ -#![deny(missing_docs, missing_debug_implementations, - trivial_casts, trivial_numeric_casts, unused_import_braces)] - -#![cfg_attr(feature="clippy", feature(plugin))] -#![cfg_attr(feature="clippy", plugin(clippy))] +#![deny( + missing_docs, + missing_debug_implementations, + trivial_casts, + trivial_numeric_casts, + unused_import_braces +)] +#![cfg_attr(feature = "clippy", feature(plugin))] +#![cfg_attr(feature = "clippy", plugin(clippy))] mod metadata; mod parser; @@ -74,13 +78,7 @@ impl Catalog { fn new() -> Self { Catalog { strings: HashMap::new(), - resolver: Function(Box::new(|n| { - if n != 1 { - 1 - } else { - 0 - } - })), + resolver: Function(Box::new(|n| if n != 1 { 1 } else { 0 })), } } @@ -116,7 +114,10 @@ impl Catalog { /// Returns the singular translation of `msg_id` from the given catalog /// or `msg_id` itself if a translation does not exist. pub fn gettext<'a>(&'a self, msg_id: &'a str) -> &'a str { - self.strings.get(msg_id).and_then(|msg| msg.get_translated(0)).unwrap_or(msg_id) + self.strings + .get(msg_id) + .and_then(|msg| msg.get_translated(0)) + .unwrap_or(msg_id) } /// Returns the plural translation of `msg_id` from the given catalog @@ -129,9 +130,9 @@ impl Catalog { let form_no = self.resolver.resolve(n); match self.strings.get(msg_id) { - Some(msg) => { - msg.get_translated(form_no).unwrap_or_else(|| [msg_id, msg_id_plural][form_no]) - } + Some(msg) => msg + .get_translated(form_no) + .unwrap_or_else(|| [msg_id, msg_id_plural][form_no]), None if n == 1 => msg_id, None if n != 1 => msg_id_plural, _ => unreachable!(), @@ -144,7 +145,10 @@ impl Catalog { // TODO: DRY gettext/pgettext pub fn pgettext<'a>(&'a self, msg_context: &'a str, msg_id: &'a str) -> &'a str { let key = key_with_context(msg_context, &msg_id); - self.strings.get(&key).and_then(|msg| msg.get_translated(0)).unwrap_or(msg_id) + self.strings + .get(&key) + .and_then(|msg| msg.get_translated(0)) + .unwrap_or(msg_id) } /// Returns the plural translation of `msg_id` @@ -155,18 +159,19 @@ impl Catalog { /// /// Currently, the only supported plural formula is `n != 1`. // TODO: DRY ngettext/npgettext - pub fn npgettext<'a>(&'a self, - msg_context: &'a str, - msg_id: &'a str, - msg_id_plural: &'a str, - n: u64) - -> &'a str { + pub fn npgettext<'a>( + &'a self, + msg_context: &'a str, + msg_id: &'a str, + msg_id_plural: &'a str, + n: u64, + ) -> &'a str { let key = key_with_context(msg_context, &msg_id); let form_no = self.resolver.resolve(n); match self.strings.get(&key) { - Some(msg) => { - msg.get_translated(form_no).unwrap_or_else(|| [msg_id, msg_id_plural][form_no]) - } + Some(msg) => msg + .get_translated(form_no) + .unwrap_or_else(|| [msg_id, msg_id_plural][form_no]), None if n == 1 => msg_id, None if n != 1 => msg_id_plural, _ => unreachable!(), @@ -245,18 +250,28 @@ fn catalog_pgettext() { #[test] fn catalog_npgettext() { let mut cat = Catalog::new(); - cat.insert(Message::new("Text", Some("unit test"), vec!["Tekstas", "Tekstai"])); + cat.insert(Message::new( + "Text", + Some("unit test"), + vec!["Tekstas", "Tekstai"], + )); assert_eq!(cat.npgettext("unit test", "Text", "Texts", 1), "Tekstas"); assert_eq!(cat.npgettext("unit test", "Text", "Texts", 0), "Tekstai"); assert_eq!(cat.npgettext("unit test", "Text", "Texts", 2), "Tekstai"); - assert_eq!(cat.npgettext("integration test", "Text", "Texts", 1), - "Text"); - assert_eq!(cat.npgettext("integration test", "Text", "Texts", 0), - "Texts"); - assert_eq!(cat.npgettext("integration test", "Text", "Texts", 2), - "Texts"); + assert_eq!( + cat.npgettext("integration test", "Text", "Texts", 1), + "Text" + ); + assert_eq!( + cat.npgettext("integration test", "Text", "Texts", 0), + "Texts" + ); + assert_eq!( + cat.npgettext("integration test", "Text", "Texts", 2), + "Texts" + ); } #[cfg(test)] @@ -273,7 +288,11 @@ fn lithuanian_plural(n: u64) -> usize { #[test] fn catalog_ngettext_resolver() { let mut cat = Catalog::new(); - cat.insert(Message::new("Garlic", None, vec!["Česnakas", "Česnakai", "Česnakų"])); + cat.insert(Message::new( + "Garlic", + None, + vec!["Česnakas", "Česnakai", "Česnakų"], + )); // https://localization-guide.readthedocs.org/en/latest/l10n/pluralforms.html cat.resolver = Resolver::Function(Box::new(lithuanian_plural)); diff --git a/src/metadata.rs b/src/metadata.rs index 194e7b0..55d416a 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -10,7 +10,8 @@ pub struct MetadataMap<'a>(HashMap<&'a str, &'a str>); impl<'a> MetadataMap<'a> { /// Returns a string that indicates the character set. pub fn charset(&self) -> Option<&'a str> { - self.get("Content-Type").and_then(|x| x.split("charset=").skip(1).next()) + self.get("Content-Type") + .and_then(|x| x.split("charset=").skip(1).next()) } } diff --git a/src/parser.rs b/src/parser.rs index f67337d..db1b41c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7,13 +7,13 @@ use std::error; use std::fmt; use std::io; -use self::byteorder::{ByteOrder, BigEndian, LittleEndian}; +use self::byteorder::{BigEndian, ByteOrder, LittleEndian}; use self::encoding::label::encoding_from_whatwg_label; -use self::encoding::types::EncodingRef; use self::encoding::types::DecoderTrap::Strict; +use self::encoding::types::EncodingRef; -use super::{Catalog, Message}; use super::plurals::Resolver; +use super::{Catalog, Message}; use metadata::parse_metadata; #[allow(non_upper_case_globals)] @@ -188,7 +188,11 @@ pub fn parse_catalog(mut file: R, opts: ParseOptions) -> Result None, }; // extract msg_id singular, ignoring the plural - id = match original.iter().position(|x| *x == 0).map(|i| &original[..i]) { + id = match original + .iter() + .position(|x| *x == 0) + .map(|i| &original[..i]) + { Some(b) => try!(encoding.decode(b, Strict)), None => return Err(Eof), }; @@ -208,10 +212,12 @@ pub fn parse_catalog(mut file: R, opts: ParseOptions) -> Result, _>>()); + translated = try!( + (&contents[off..off + len]) + .split(|x| *x == 0) + .map(|b| encoding.decode(b, Strict)) + .collect::, _>>() + ); if id == "" { let map = parse_metadata(&*translated[0]).unwrap(); if let (Some(c), None) = (map.charset(), opts.force_encoding) { @@ -307,16 +313,20 @@ fn test_parse_catalog() { let reader: &[u8] = include_bytes!("../test_cases/1.mo"); let catalog = parse_catalog(reader, ParseOptions::new()).unwrap(); assert_eq!(catalog.strings.len(), 1); - assert_eq!(catalog.strings["this is context\x04Text"], - Message::new("Text", Some("this is context"), vec!["Tekstas", "Tekstai"])); + assert_eq!( + catalog.strings["this is context\x04Text"], + Message::new("Text", Some("this is context"), vec!["Tekstas", "Tekstai"]) + ); } { let reader: &[u8] = include_bytes!("../test_cases/2.mo"); let catalog = parse_catalog(reader, ParseOptions::new()).unwrap(); assert_eq!(catalog.strings.len(), 2); - assert_eq!(catalog.strings["Image"], - Message::new("Image", None, vec!["Nuotrauka", "Nuotraukos"])); + assert_eq!( + catalog.strings["Image"], + Message::new("Image", None, vec!["Nuotrauka", "Nuotraukos"]) + ); } { From 6eb4b3d6309e711b746360041b3a73140ba17e05 Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Tue, 23 Oct 2018 17:33:59 +0100 Subject: [PATCH 03/11] WIP: parse plurals - Parse the Plural-Forms header - Add an Ast type to parse boolean expressions --- src/lib.rs | 7 ++-- src/metadata.rs | 47 +++++++++++++++++++++++++++ src/parser.rs | 4 +-- src/plurals.rs | 85 ++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 132 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c63f72f..8a6db90 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,12 +68,12 @@ fn key_with_context(context: &str, key: &str) -> String { /// Catalog represents a set of translation strings /// parsed out of one MO file. #[derive(Debug)] -pub struct Catalog { +pub struct Catalog<'r> { strings: HashMap, - resolver: Resolver, + resolver: Resolver<'r>, } -impl Catalog { +impl<'r> Catalog<'r> { /// Creates a new, empty gettext catalog. fn new() -> Self { Catalog { @@ -98,7 +98,6 @@ impl Catalog { /// let file = File::open("french.mo").unwrap(); /// let catalog = Catalog::parse(file).unwrap(); /// ``` - pub fn parse(reader: R) -> Result { ParseOptions::new().parse(reader) } diff --git a/src/metadata.rs b/src/metadata.rs index 55d416a..1ae4b69 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -13,6 +13,30 @@ impl<'a> MetadataMap<'a> { self.get("Content-Type") .and_then(|x| x.split("charset=").skip(1).next()) } + + /// Returns the number of different plurals and the boolean + /// expression to determine the form to use depending on + /// the number of elements. + /// + /// Defaults to `n_plurals = 2` and `plural = n!=1` (as in English). + pub fn plural_forms(&self) -> (usize, &'a str) { + self.get("Plural-Forms") + .map(|f| f.split(';').fold((2, "n!=1"), |(n_pl, pl), prop| { + match prop.chars().position(|c| c == '=') { + Some(index) => { + let (name, value) = prop.split_at(index); + let value = value[1..value.len()].trim(); + match name.trim() { + "n_plurals" => (usize::from_str_radix(value, 10).unwrap_or(n_pl), pl), + "plural" => (n_pl, value), + _ => (n_pl, pl) + } + }, + None => (n_pl, pl) + } + })) + .unwrap_or((2, "n!=1")) + } } impl<'a> Deref for MetadataMap<'a> { @@ -53,3 +77,26 @@ fn test_metadatamap_charset() { assert_eq!(map.charset().unwrap(), "utf-42"); } } + +#[test] +fn test_metadatamap_plural() { + { + let mut map = MetadataMap(HashMap::new()); + assert_eq!(map.plural_forms(), (2, "n!=1")); + + map.insert("Plural-Forms", ""); + assert_eq!(map.plural_forms(), (2, "n!=1")); + // n_plural + map.insert("Plural-Forms", "n_plurals=42"); + assert_eq!(map.plural_forms(), (42, "n!=1")); + // plural is specified + map.insert("Plural-Forms", "n_plurals=2; plural=n==12"); + assert_eq!(map.plural_forms(), (2, "n==12")); + // plural before n_plurals + map.insert("Plural-Forms", "plural=n==12; n_plurals=2"); + assert_eq!(map.plural_forms(), (2, "n==12")); + // with spaces + map.insert("Plural-Forms", " n_plurals = 42 ; plural = n > 10 "); + assert_eq!(map.plural_forms(), (42, "n > 10")); + } +} diff --git a/src/parser.rs b/src/parser.rs index db1b41c..5f2298d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -98,7 +98,7 @@ impl ParseOptions { } /// Tries to parse the catalog from the given reader using the specified options. - pub fn parse(self, reader: R) -> Result { + pub fn parse<'a, R: io::Read>(self, reader: R) -> Result, Error> { parse_catalog(reader, self) } @@ -135,7 +135,7 @@ fn get_read_u32_fn(magic: &[u8]) -> Option u32> { } } -pub fn parse_catalog(mut file: R, opts: ParseOptions) -> Result { +pub fn parse_catalog<'a, R: io::Read>(mut file: R, opts: ParseOptions) -> Result, Error> { let mut contents = vec![]; let n = try!(file.read_to_end(&mut contents)); if n < 28 { diff --git a/src/plurals.rs b/src/plurals.rs index d90b3cf..0cda14b 100644 --- a/src/plurals.rs +++ b/src/plurals.rs @@ -1,26 +1,101 @@ use std::fmt; -use self::Resolver::Function; - -pub enum Resolver { +use self::Resolver::*; +pub enum Resolver<'a> { /// A function/closure manually supplied by the user. Function(Box usize>), + /// A boolean expression + Expr(&'a str, Option>), +} + +use self::Ast::*; +#[derive(Debug)] +enum Ast<'a> { + /// A ternary expression + /// x ? a : b + /// + /// the three Ast<'a> are respectively x, a and b. + Ternary(&'a Ast<'a>, &'a Ast<'a>, &'a Ast<'a>), + /// The n variable. + N, + /// Integer literals. + Integer(u64), + /// Boolean literals. + Bool(bool), + /// Comparison operators. + CompOp(&'a str, &'a Ast<'a>, &'a Ast<'a>), + /// && or || operators. + CombOp(&'a str, &'a Ast<'a>, &'a Ast<'a>), + /// ! operator. + Not(&'a Ast<'a>), +} + +impl<'a> Ast<'a> { + fn resolve(&self, n: u64) -> usize { + match *self { + Ternary(cond, ok, nok) => if cond.resolve(n) == 0 { + nok.resolve(n) + } else { + ok.resolve(n) + }, + N => n as usize, + Integer(x) => x as usize, + Bool(b) => b as usize, + CompOp(op, lhs, rhs) => (match op { + "==" => lhs.resolve(n) == rhs.resolve(n), + "!=" => lhs.resolve(n) != rhs.resolve(n), + ">=" => lhs.resolve(n) >= rhs.resolve(n), + "<=" => lhs.resolve(n) <= rhs.resolve(n), + ">" => lhs.resolve(n) > rhs.resolve(n), + "<" => lhs.resolve(n) < rhs.resolve(n), + _ => unreachable!(), + }) as usize, + CombOp(op, lhs, rhs) => (match op { + "&&" => lhs.resolve(n) != 0 && rhs.resolve(n) != 0, + "||" => lhs.resolve(n) != 0 || rhs.resolve(n) != 0, + _ => unreachable!() + }) as usize, + Not(val) => match val.resolve(n) { + 0 => 1, + _ => 0, + } + } + } } -impl Resolver { +impl<'a> Resolver<'a> { /// Returns the number of the correct plural form /// for `n` objects, as defined by the rule contained in this resolver. pub fn resolve(&self, n: u64) -> usize { match *self { Function(ref func) => func(n), + Expr(expr, ref ast) => { + if let Some(ast) = ast { + ast.resolve(n) + } else { + // TODO: parse expr + unimplemented!() + } + }, } } } -impl fmt::Debug for Resolver { +impl<'a> fmt::Debug for Resolver<'a> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { match *self { Function(..) => fmt.write_str("Function(..)"), + Expr(expr, ref ast) => fmt.write_fmt(format_args!("Expr({}, {:?})", expr, ast)), } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_expr_resolver() { + assert_eq!(Expr("n", Some(N)).resolve(42), 42); + } +} From 1d09d1013b2cf2e806e5acee095ed1841f29e0ce Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Wed, 24 Oct 2018 11:31:08 +0100 Subject: [PATCH 04/11] Implement boolean expression parser And use it as a resolver if present --- src/lib.rs | 11 ++ src/parser.rs | 125 +++++++++--------- src/plurals.rs | 249 ++++++++++++++++++++++++++++++----- test_cases/complex_plural.mo | Bin 0 -> 353 bytes test_cases/complex_plural.po | 12 ++ test_cases/integration.mo | Bin 703 -> 691 bytes test_cases/integration.po | 2 +- 7 files changed, 299 insertions(+), 100 deletions(-) create mode 100644 test_cases/complex_plural.mo create mode 100644 test_cases/complex_plural.po diff --git a/src/lib.rs b/src/lib.rs index 8a6db90..d95898a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -305,3 +305,14 @@ fn catalog_ngettext_resolver() { } assert_eq!(cat.ngettext("Garlic", "Garlics", 21), "Česnakas"); } + +#[test] +fn test_complex_plural() { + let reader: &[u8] = include_bytes!("../test_cases/complex_plural.mo"); + let cat = parser::parse_catalog(reader, ParseOptions::new()).unwrap(); + + for i in 0..500 { + println!("{} -> {}", i, cat.ngettext("Test", "Tests", i)); + } + // assert_eq!(cat.ngettext("Garlic", "Garlics", 21), "Česnakas"); +} diff --git a/src/parser.rs b/src/parser.rs index 5f2298d..b1da59e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -12,7 +12,7 @@ use self::encoding::label::encoding_from_whatwg_label; use self::encoding::types::DecoderTrap::Strict; use self::encoding::types::EncodingRef; -use super::plurals::Resolver; +use super::plurals::{Ast, Resolver}; use super::{Catalog, Message}; use metadata::parse_metadata; @@ -135,7 +135,10 @@ fn get_read_u32_fn(magic: &[u8]) -> Option u32> { } } -pub fn parse_catalog<'a, R: io::Read>(mut file: R, opts: ParseOptions) -> Result, Error> { +pub fn parse_catalog<'a, R: io::Read>( + mut file: R, + opts: ParseOptions, +) -> Result, Error> { let mut contents = vec![]; let n = try!(file.read_to_end(&mut contents)); if n < 28 { @@ -156,75 +159,72 @@ pub fn parse_catalog<'a, R: io::Read>(mut file: R, opts: ParseOptions) -> Result } let mut catalog = Catalog::new(); - let resolver = match opts.force_plural { - Some(func) => Some(Resolver::Function(func)), - None => None, - }; + let mut resolver = opts.force_plural.map(Resolver::Function); let mut encoding = opts.force_encoding.unwrap_or(utf8_encoding); for i in 0..num_strings { - let id; - let context; - let translated: Vec; // Parse the original string - { - if n < off_otable + 8 { - return Err(Eof); - } - let len = read_u32(&contents[off_otable..off_otable + 4]) as usize; - let off = read_u32(&contents[off_otable + 4..off_otable + 8]) as usize; - // +1 compensates for the ending NUL byte which is not included in length - if n < off + len + 1 { - return Err(Eof); - } - let mut original = &contents[off..off + len + 1]; - // check for context - context = match original.iter().position(|x| *x == 4) { - Some(idx) => { - let ctx = &original[..idx]; - original = &original[idx + 1..]; - Some(try!(encoding.decode(ctx, Strict))) - } - None => None, - }; - // extract msg_id singular, ignoring the plural - id = match original - .iter() - .position(|x| *x == 0) - .map(|i| &original[..i]) - { - Some(b) => try!(encoding.decode(b, Strict)), - None => return Err(Eof), - }; - if id == "" && i != 0 { - return Err(MisplacedMetadata); + if n < off_otable + 8 { + return Err(Eof); + } + let len = read_u32(&contents[off_otable..off_otable + 4]) as usize; + let off = read_u32(&contents[off_otable + 4..off_otable + 8]) as usize; + // +1 compensates for the ending NUL byte which is not included in length + if n < off + len + 1 { + return Err(Eof); + } + let mut original = &contents[off..off + len + 1]; + // check for context + let context = match original.iter().position(|x| *x == 4) { + Some(idx) => { + let ctx = &original[..idx]; + original = &original[idx + 1..]; + Some(try!(encoding.decode(ctx, Strict))) } + None => None, + }; + // extract msg_id singular, ignoring the plural + let id = match original + .iter() + .position(|x| *x == 0) + .map(|i| &original[..i]) + { + Some(b) => try!(encoding.decode(b, Strict)), + None => return Err(Eof), + }; + if id == "" && i != 0 { + return Err(MisplacedMetadata); } // Parse the translation strings - { - if n < off_ttable + 8 { - return Err(Eof); - } - let len = read_u32(&contents[off_ttable..off_ttable + 4]) as usize; - let off = read_u32(&contents[off_ttable + 4..off_ttable + 8]) as usize; - // +1 compensates for the ending NUL byte which is not included in length - if n < off + len + 1 { - return Err(Eof); + if n < off_ttable + 8 { + return Err(Eof); + } + let len = read_u32(&contents[off_ttable..off_ttable + 4]) as usize; + let off = read_u32(&contents[off_ttable + 4..off_ttable + 8]) as usize; + // +1 compensates for the ending NUL byte which is not included in length + if n < off + len + 1 { + return Err(Eof); + } + let translated = try!( + (&contents[off..off + len]) + .split(|x| *x == 0) + .map(|b| encoding.decode(b, Strict)) + .collect::, _>>() + ); + if id == "" { + let map = parse_metadata(&*translated[0]).unwrap(); + if let (Some(c), None) = (map.charset(), opts.force_encoding) { + encoding = match encoding_from_whatwg_label(c) { + Some(enc_ref) => enc_ref, + None => return Err(UnknownEncoding), + } } - translated = try!( - (&contents[off..off + len]) - .split(|x| *x == 0) - .map(|b| encoding.decode(b, Strict)) - .collect::, _>>() - ); - if id == "" { - let map = parse_metadata(&*translated[0]).unwrap(); - if let (Some(c), None) = (map.charset(), opts.force_encoding) { - encoding = match encoding_from_whatwg_label(c) { - Some(enc_ref) => enc_ref, - None => return Err(UnknownEncoding), - } + match resolver { + Some(Resolver::Expr(_)) => {} + _ => { + let plural_forms = map.plural_forms().1.to_owned(); + resolver = Some(Resolver::Expr(Box::new(Ast::parse(plural_forms.as_ref())))); } } } @@ -234,7 +234,6 @@ pub fn parse_catalog<'a, R: io::Read>(mut file: R, opts: ParseOptions) -> Result off_otable += 8; off_ttable += 8; } - if let Some(r) = resolver { catalog.resolver = r; } diff --git a/src/plurals.rs b/src/plurals.rs index 0cda14b..0bcfa4a 100644 --- a/src/plurals.rs +++ b/src/plurals.rs @@ -5,62 +5,203 @@ pub enum Resolver<'a> { /// A function/closure manually supplied by the user. Function(Box usize>), /// A boolean expression - Expr(&'a str, Option>), + /// Use Ast::parse to get an Ast + Expr(Box>), +} + + +fn index_of<'b, 'c>(src: &'b str, pat: &'static str) -> Option { + src.chars().fold((None, 0, 0, 0), |(match_index, i, n_matches, paren_level), ch| { + if let Some(x) = match_index { + return (Some(x), i, n_matches, paren_level); + } else { + let new_par_lvl = match ch { + '(' => paren_level + 1, + ')' => paren_level - 1, + _ => paren_level + }; + + if Some(ch) == pat.chars().nth(n_matches) { + let length = n_matches + 1; + if length == pat.len() && new_par_lvl == 0 { + (Some(i - n_matches), i + 1, length, new_par_lvl) + } else { + (match_index, i + 1, length, new_par_lvl) + } + } else { + (match_index, i + 1, 0, new_par_lvl) + } + } + }).0 } use self::Ast::*; -#[derive(Debug)] -enum Ast<'a> { +#[derive(Debug, PartialEq)] +pub enum Ast<'a> { /// A ternary expression /// x ? a : b /// /// the three Ast<'a> are respectively x, a and b. - Ternary(&'a Ast<'a>, &'a Ast<'a>, &'a Ast<'a>), + Ternary(Box>, Box>, Box>), /// The n variable. N, /// Integer literals. Integer(u64), - /// Boolean literals. - Bool(bool), - /// Comparison operators. - CompOp(&'a str, &'a Ast<'a>, &'a Ast<'a>), - /// && or || operators. - CombOp(&'a str, &'a Ast<'a>, &'a Ast<'a>), + /// Binary operators. + Op(&'a str, Box>, Box>), /// ! operator. - Not(&'a Ast<'a>), + Not(Box>), } impl<'a> Ast<'a> { fn resolve(&self, n: u64) -> usize { match *self { - Ternary(cond, ok, nok) => if cond.resolve(n) == 0 { + Ternary(ref cond, ref ok, ref nok) => if cond.resolve(n) == 0 { nok.resolve(n) } else { ok.resolve(n) }, N => n as usize, Integer(x) => x as usize, - Bool(b) => b as usize, - CompOp(op, lhs, rhs) => (match op { - "==" => lhs.resolve(n) == rhs.resolve(n), - "!=" => lhs.resolve(n) != rhs.resolve(n), - ">=" => lhs.resolve(n) >= rhs.resolve(n), - "<=" => lhs.resolve(n) <= rhs.resolve(n), - ">" => lhs.resolve(n) > rhs.resolve(n), - "<" => lhs.resolve(n) < rhs.resolve(n), + Op(ref op, ref lhs, ref rhs) => match *op { + "==" => (lhs.resolve(n) == rhs.resolve(n)) as usize, + "!=" => (lhs.resolve(n) != rhs.resolve(n)) as usize, + ">=" => (lhs.resolve(n) >= rhs.resolve(n)) as usize, + "<=" => (lhs.resolve(n) <= rhs.resolve(n)) as usize, + ">" => (lhs.resolve(n) > rhs.resolve(n)) as usize, + "<" => (lhs.resolve(n) < rhs.resolve(n)) as usize, + "&&" => (lhs.resolve(n) != 0 && rhs.resolve(n) != 0) as usize, + "||" => (lhs.resolve(n) != 0 || rhs.resolve(n) != 0) as usize, + "%" => lhs.resolve(n) % rhs.resolve(n), _ => unreachable!(), - }) as usize, - CombOp(op, lhs, rhs) => (match op { - "&&" => lhs.resolve(n) != 0 && rhs.resolve(n) != 0, - "||" => lhs.resolve(n) != 0 || rhs.resolve(n) != 0, - _ => unreachable!() - }) as usize, - Not(val) => match val.resolve(n) { + }, + Not(ref val) => match val.resolve(n) { 0 => 1, _ => 0, } } } + + pub fn parse<'b, 'c>(src: &'b str) -> Ast<'c> { + Self::parse_parens(src.trim()) + } + + fn parse_parens<'b, 'c>(src: &'b str) -> Ast<'c> { + if src.starts_with('(') && src.ends_with(')') { + Ast::parse(src[1..src.len() - 1].trim()) + } else { + Ast::parse_and(src.trim()) + } + } + + fn parse_and<'b, 'c>(src: &'b str) -> Ast<'c> { + if let Some(i) = index_of(src, "&&") { + Ast::Op("&&", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + } else { + Self::parse_or(src) + } + } + + fn parse_or<'b, 'c>(src: &'b str) -> Ast<'c> { + if let Some(i) = index_of(src, "||") { + Ast::Op("||", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + } else { + Self::parse_ternary(src) + } + } + + fn parse_ternary<'b, 'c>(src: &'b str) -> Ast<'c> { + if let Some(i) = index_of(src, "?") { + if let Some(l) = index_of(src, ":") { + Ast::Ternary( + Box::new(Ast::parse(&src[0..i])), + Box::new(Ast::parse(&src[i + 1..l])), + Box::new(Ast::parse(&src[l + 1..])) + ) + } else { + panic!("Incorrect ternary expression, expected `:`") + } + } else { + Self::parse_ge(src) + } + } + + fn parse_ge<'b, 'c>(src: &'b str) -> Ast<'c> { + if let Some(i) = index_of(src, ">=") { + Ast::Op(">=", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + } else { + Self::parse_gt(src) + } + } + + fn parse_gt<'b, 'c>(src: &'b str) -> Ast<'c> { + if let Some(i) = index_of(src, ">") { + Ast::Op(">", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) + } else { + Self::parse_le(src) + } + } + + fn parse_le<'b, 'c>(src: &'b str) -> Ast<'c> { + if let Some(i) = index_of(src, "<=") { + Ast::Op("<=", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + } else { + Self::parse_lt(src) + } + } + + fn parse_lt<'b, 'c>(src: &'b str) -> Ast<'c> { + if let Some(i) = index_of(src, "<") { + Ast::Op("<", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) + } else { + Self::parse_eq(src) + } + } + + fn parse_eq<'b, 'c>(src: &'b str) -> Ast<'c> { + if let Some(i) = index_of(src, "==") { + Ast::Op("==", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + } else { + Self::parse_neq(src) + } + } + + fn parse_neq<'b, 'c>(src: &'b str) -> Ast<'c> { + if let Some(i) = index_of(src, "!=") { + Ast::Op("!=", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + } else { + Self::parse_mod(src) + } + } + fn parse_mod<'b, 'c>(src: &'b str) -> Ast<'c> { + if let Some(i) = index_of(src, "%") { + Ast::Op("%", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) + } else { + Self::parse_not(src.trim()) + } + } + + fn parse_not<'b, 'c>(src: &'b str) -> Ast<'c> { + if index_of(src, "!") == Some(0) { + Ast::Not(Box::new(Ast::parse(&src[1..]))) + } else { + Self::parse_int(src.trim()) + } + } + + fn parse_int<'b, 'c>(src: &'b str) -> Ast<'c> { + u64::from_str_radix(src, 10) + .map(|x| Ast::Integer(x)) + .unwrap_or_else(|_| Self::parse_n(src.trim())) + } + + fn parse_n<'b, 'c>(src: &'b str) -> Ast<'c> { + if src == "n" { + Ast::N + } else { + panic!("Unexpected token: {}", src) + } + } } impl<'a> Resolver<'a> { @@ -69,13 +210,8 @@ impl<'a> Resolver<'a> { pub fn resolve(&self, n: u64) -> usize { match *self { Function(ref func) => func(n), - Expr(expr, ref ast) => { - if let Some(ast) = ast { - ast.resolve(n) - } else { - // TODO: parse expr - unimplemented!() - } + Expr(ref ast) => { + ast.resolve(n) }, } } @@ -85,7 +221,7 @@ impl<'a> fmt::Debug for Resolver<'a> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { match *self { Function(..) => fmt.write_str("Function(..)"), - Expr(expr, ref ast) => fmt.write_fmt(format_args!("Expr({}, {:?})", expr, ast)), + Expr(ref ast) => fmt.write_fmt(format_args!("Expr({:?})", ast)), } } } @@ -96,6 +232,47 @@ mod tests { #[test] fn test_expr_resolver() { - assert_eq!(Expr("n", Some(N)).resolve(42), 42); + assert_eq!(Expr(Box::new(N)).resolve(42), 42); + } + + #[test] + fn test_parser() { + assert_eq!(Ast::parse("n == 42 ? n : 6 && n < 7"), Ast::Op( + "&&", + Box::new(Ast::Ternary( + Box::new(Ast::Op( + "==", + Box::new(Ast::N), + Box::new(Ast::Integer(42)) + )), + Box::new(Ast::N), + Box::new(Ast::Integer(6)) + )), + Box::new(Ast::Op( + "<", + Box::new(Ast::N), + Box::new(Ast::Integer(7)) + )) + )); + + assert_eq!(Ast::parse("(n)"), Ast::N); + + assert_eq!(Ast::parse("(n == 1 || n == 2) ? 0 : 1"), Ast::Ternary( + Box::new(Ast::Op( + "||", + Box::new(Ast::Op( + "==", + Box::new(Ast::N), + Box::new(Ast::Integer(1)) + )), + Box::new(Ast::Op( + "==", + Box::new(Ast::N), + Box::new(Ast::Integer(2)) + )) + )), + Box::new(Ast::Integer(0)), + Box::new(Ast::Integer(1)) + )) } } diff --git a/test_cases/complex_plural.mo b/test_cases/complex_plural.mo new file mode 100644 index 0000000000000000000000000000000000000000..cef8f1046d7252a7877fe4215dbdd209c420db89 GIT binary patch literal 353 zcmYL@u};G<6h#dxOGajf%RmJZ20JiRNelzD5<{h;1|}9$3`7b}WV;eb{SAJPwZGBl z1~pGUwy%BP)%!af{4%P2;)pmW4v2z?H6s$@kysNiMD*S7F-B|KM@L_!@qxw7v+2Wh z%%6?(Rbw;A3z6J4HW(Xt`O}&V!F-3yw$_!+vG~-^o1mV`2_N-7r>%cCj*so4d8_P4 zhS6&kl3BfUy5^I{efbQw-Ab>na&%WHSe!r#C= 2 && n % 10 <= 4 && (n % 100 < 10 || n % 100 >= 20) ? 1 : 2;\n" + +msgid "Test" +msgid_plural "Tests" +msgstr[0] "Singular" +msgstr[1] "Plural 1" +msgstr[2] "Singular 2" diff --git a/test_cases/integration.mo b/test_cases/integration.mo index b02bb78ed029c2f7115e92ae35038ce9d1acd869..105021d8c306c02b99e5634453369c3447926769 100644 GIT binary patch delta 67 zcmdnbx|wx?i)uO}1H)|w1_ntWc4lH=&;`=5Kw1JwcLQlYAiZQ`=1E3ABWs0%oYJDi O9NRoaTf@mdOvM0}XAPGC delta 79 zcmdnYx}SA|i)t|=1H)|w1_ntW_Ge;X&;`<&Kw1JwPY2R`KzhT*%#(~#o_-;&?yf=B a3I#c(MTt4Kt`Px2uED{c{(h4kn2G^lW)Z>w diff --git a/test_cases/integration.po b/test_cases/integration.po index 5374a05..81165dc 100644 --- a/test_cases/integration.po +++ b/test_cases/integration.po @@ -16,7 +16,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n" +"Plural-Forms: nplurals=2; plural=n!=1;\n" #: ../tests/lib.rs:12 msgid "non-existent" From 6d4118123806d84abd8258858d81db8fe2ba78e0 Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Wed, 24 Oct 2018 13:39:06 +0100 Subject: [PATCH 05/11] Remove Resolver::Function to make Catalog: Send + Sync --- src/lib.rs | 36 ++---------------------------------- src/parser.rs | 13 +------------ src/plurals.rs | 16 ++-------------- 3 files changed, 5 insertions(+), 60 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d95898a..69f5535 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -56,7 +56,7 @@ use std::io::Read; use std::ops::Deref; pub use parser::{Error, ParseOptions}; -use plurals::Resolver::{self, Function}; +use plurals::*; fn key_with_context(context: &str, key: &str) -> String { let mut result = context.to_owned(); @@ -78,7 +78,7 @@ impl<'r> Catalog<'r> { fn new() -> Self { Catalog { strings: HashMap::new(), - resolver: Function(Box::new(|n| if n != 1 { 1 } else { 0 })), + resolver: Resolver::Expr(Box::new(Ast::parse("n != 1"))), } } @@ -273,38 +273,6 @@ fn catalog_npgettext() { ); } -#[cfg(test)] -fn lithuanian_plural(n: u64) -> usize { - if (n % 10) == 1 && (n % 100) != 11 { - 0 - } else if ((n % 10) >= 2) && ((n % 100) < 10 || (n % 100) >= 20) { - 1 - } else { - 2 - } -} - -#[test] -fn catalog_ngettext_resolver() { - let mut cat = Catalog::new(); - cat.insert(Message::new( - "Garlic", - None, - vec!["Česnakas", "Česnakai", "Česnakų"], - )); - // https://localization-guide.readthedocs.org/en/latest/l10n/pluralforms.html - cat.resolver = Resolver::Function(Box::new(lithuanian_plural)); - - assert_eq!(cat.ngettext("Garlic", "Garlics", 0), "Česnakų"); - assert_eq!(cat.ngettext("Garlic", "Garlics", 1), "Česnakas"); - for i in 2..9 { - assert_eq!(cat.ngettext("Garlic", "Garlics", i), "Česnakai"); - } - for i in 10..20 { - assert_eq!(cat.ngettext("Garlic", "Garlics", i), "Česnakų"); - } - assert_eq!(cat.ngettext("Garlic", "Garlics", 21), "Česnakas"); -} #[test] fn test_complex_plural() { diff --git a/src/parser.rs b/src/parser.rs index b1da59e..81ca159 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -88,7 +88,6 @@ impl From> for Error { #[derive(Default)] pub struct ParseOptions { force_encoding: Option, - force_plural: Option usize + 'static>>, } impl ParseOptions { @@ -111,16 +110,6 @@ impl ParseOptions { self.force_encoding = Some(encoding); self } - - /// Forces a use of the given plural formula - /// for deciding the proper plural form for a message. - /// If this option is not enabled, - /// the parser uses the default formula - /// (`n != 1`). - pub fn force_plural usize + 'static>(mut self, plural: T) -> Self { - self.force_plural = Some(Box::new(plural)); - self - } } /// According to the given magic number of a MO file, @@ -159,7 +148,7 @@ pub fn parse_catalog<'a, R: io::Read>( } let mut catalog = Catalog::new(); - let mut resolver = opts.force_plural.map(Resolver::Function); + let mut resolver = None; let mut encoding = opts.force_encoding.unwrap_or(utf8_encoding); for i in 0..num_strings { diff --git a/src/plurals.rs b/src/plurals.rs index 0bcfa4a..f7a1848 100644 --- a/src/plurals.rs +++ b/src/plurals.rs @@ -1,9 +1,7 @@ -use std::fmt; - use self::Resolver::*; + +#[derive(Debug)] pub enum Resolver<'a> { - /// A function/closure manually supplied by the user. - Function(Box usize>), /// A boolean expression /// Use Ast::parse to get an Ast Expr(Box>), @@ -209,7 +207,6 @@ impl<'a> Resolver<'a> { /// for `n` objects, as defined by the rule contained in this resolver. pub fn resolve(&self, n: u64) -> usize { match *self { - Function(ref func) => func(n), Expr(ref ast) => { ast.resolve(n) }, @@ -217,15 +214,6 @@ impl<'a> Resolver<'a> { } } -impl<'a> fmt::Debug for Resolver<'a> { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - match *self { - Function(..) => fmt.write_str("Function(..)"), - Expr(ref ast) => fmt.write_fmt(format_args!("Expr({:?})", ast)), - } - } -} - #[cfg(test)] mod tests { use super::*; From 343ce3a33ae52c393cfd8f6f6403eb1dda542186 Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Thu, 25 Oct 2018 14:32:56 +0100 Subject: [PATCH 06/11] Make Catalog clonable --- src/lib.rs | 4 ++-- src/plurals.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 69f5535..a6927e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,7 +67,7 @@ fn key_with_context(context: &str, key: &str) -> String { /// Catalog represents a set of translation strings /// parsed out of one MO file. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Catalog<'r> { strings: HashMap, resolver: Resolver<'r>, @@ -178,7 +178,7 @@ impl<'r> Catalog<'r> { } } -#[derive(Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq)] struct Message { id: String, context: Option, diff --git a/src/plurals.rs b/src/plurals.rs index f7a1848..6976a21 100644 --- a/src/plurals.rs +++ b/src/plurals.rs @@ -1,6 +1,6 @@ use self::Resolver::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum Resolver<'a> { /// A boolean expression /// Use Ast::parse to get an Ast @@ -34,7 +34,7 @@ fn index_of<'b, 'c>(src: &'b str, pat: &'static str) -> Option { } use self::Ast::*; -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub enum Ast<'a> { /// A ternary expression /// x ? a : b From a6e522cce232dd23235491b0f44c39e7944dd19f Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Wed, 31 Oct 2018 18:59:09 +0100 Subject: [PATCH 07/11] Use an enum to store operators in Ast --- src/lib.rs | 6 +-- src/parser.rs | 4 +- src/plurals.rs | 110 +++++++++++++++++++++++++++---------------------- 3 files changed, 66 insertions(+), 54 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a6927e4..418062c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,12 +68,12 @@ fn key_with_context(context: &str, key: &str) -> String { /// Catalog represents a set of translation strings /// parsed out of one MO file. #[derive(Clone, Debug)] -pub struct Catalog<'r> { +pub struct Catalog { strings: HashMap, - resolver: Resolver<'r>, + resolver: Resolver, } -impl<'r> Catalog<'r> { +impl Catalog { /// Creates a new, empty gettext catalog. fn new() -> Self { Catalog { diff --git a/src/parser.rs b/src/parser.rs index 81ca159..38be530 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -97,7 +97,7 @@ impl ParseOptions { } /// Tries to parse the catalog from the given reader using the specified options. - pub fn parse<'a, R: io::Read>(self, reader: R) -> Result, Error> { + pub fn parse(self, reader: R) -> Result { parse_catalog(reader, self) } @@ -127,7 +127,7 @@ fn get_read_u32_fn(magic: &[u8]) -> Option u32> { pub fn parse_catalog<'a, R: io::Read>( mut file: R, opts: ParseOptions, -) -> Result, Error> { +) -> Result { let mut contents = vec![]; let n = try!(file.read_to_end(&mut contents)); if n < 28 { diff --git a/src/plurals.rs b/src/plurals.rs index 6976a21..ff1dcd9 100644 --- a/src/plurals.rs +++ b/src/plurals.rs @@ -1,14 +1,14 @@ use self::Resolver::*; #[derive(Clone, Debug)] -pub enum Resolver<'a> { +pub enum Resolver { /// A boolean expression /// Use Ast::parse to get an Ast - Expr(Box>), + Expr(Box), } -fn index_of<'b, 'c>(src: &'b str, pat: &'static str) -> Option { +fn index_of<'a>(src: &'a str, pat: &'static str) -> Option { src.chars().fold((None, 0, 0, 0), |(match_index, i, n_matches, paren_level), ch| { if let Some(x) = match_index { return (Some(x), i, n_matches, paren_level); @@ -35,23 +35,36 @@ fn index_of<'b, 'c>(src: &'b str, pat: &'static str) -> Option { use self::Ast::*; #[derive(Clone, Debug, PartialEq)] -pub enum Ast<'a> { +pub enum Ast { /// A ternary expression /// x ? a : b /// /// the three Ast<'a> are respectively x, a and b. - Ternary(Box>, Box>, Box>), + Ternary(Box, Box, Box), /// The n variable. N, /// Integer literals. Integer(u64), /// Binary operators. - Op(&'a str, Box>, Box>), + Op(Operator, Box, Box), /// ! operator. - Not(Box>), + Not(Box), } -impl<'a> Ast<'a> { +#[derive(Clone, Debug, PartialEq)] +pub enum Operator { + Equal, + NotEqual, + GreaterOrEqual, + SmallerOrEqual, + Greater, + Smaller, + And, + Or, + Modulo, +} + +impl Ast { fn resolve(&self, n: u64) -> usize { match *self { Ternary(ref cond, ref ok, ref nok) => if cond.resolve(n) == 0 { @@ -62,16 +75,15 @@ impl<'a> Ast<'a> { N => n as usize, Integer(x) => x as usize, Op(ref op, ref lhs, ref rhs) => match *op { - "==" => (lhs.resolve(n) == rhs.resolve(n)) as usize, - "!=" => (lhs.resolve(n) != rhs.resolve(n)) as usize, - ">=" => (lhs.resolve(n) >= rhs.resolve(n)) as usize, - "<=" => (lhs.resolve(n) <= rhs.resolve(n)) as usize, - ">" => (lhs.resolve(n) > rhs.resolve(n)) as usize, - "<" => (lhs.resolve(n) < rhs.resolve(n)) as usize, - "&&" => (lhs.resolve(n) != 0 && rhs.resolve(n) != 0) as usize, - "||" => (lhs.resolve(n) != 0 || rhs.resolve(n) != 0) as usize, - "%" => lhs.resolve(n) % rhs.resolve(n), - _ => unreachable!(), + Operator::Equal => (lhs.resolve(n) == rhs.resolve(n)) as usize, + Operator::NotEqual => (lhs.resolve(n) != rhs.resolve(n)) as usize, + Operator::GreaterOrEqual => (lhs.resolve(n) >= rhs.resolve(n)) as usize, + Operator::SmallerOrEqual => (lhs.resolve(n) <= rhs.resolve(n)) as usize, + Operator::Greater => (lhs.resolve(n) > rhs.resolve(n)) as usize, + Operator::Smaller => (lhs.resolve(n) < rhs.resolve(n)) as usize, + Operator::And => (lhs.resolve(n) != 0 && rhs.resolve(n) != 0) as usize, + Operator::Or => (lhs.resolve(n) != 0 || rhs.resolve(n) != 0) as usize, + Operator::Modulo => lhs.resolve(n) % rhs.resolve(n), }, Not(ref val) => match val.resolve(n) { 0 => 1, @@ -80,11 +92,11 @@ impl<'a> Ast<'a> { } } - pub fn parse<'b, 'c>(src: &'b str) -> Ast<'c> { + pub fn parse<'a>(src: &'a str) -> Ast { Self::parse_parens(src.trim()) } - fn parse_parens<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_parens<'a>(src: &'a str) -> Ast { if src.starts_with('(') && src.ends_with(')') { Ast::parse(src[1..src.len() - 1].trim()) } else { @@ -92,23 +104,23 @@ impl<'a> Ast<'a> { } } - fn parse_and<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_and<'a>(src: &'a str) -> Ast { if let Some(i) = index_of(src, "&&") { - Ast::Op("&&", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ast::Op(Operator::And, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) } else { Self::parse_or(src) } } - fn parse_or<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_or<'a>(src: &'a str) -> Ast { if let Some(i) = index_of(src, "||") { - Ast::Op("||", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ast::Op(Operator::Or, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) } else { Self::parse_ternary(src) } } - fn parse_ternary<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_ternary<'a>(src: &'a str) -> Ast { if let Some(i) = index_of(src, "?") { if let Some(l) = index_of(src, ":") { Ast::Ternary( @@ -124,62 +136,62 @@ impl<'a> Ast<'a> { } } - fn parse_ge<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_ge<'a>(src: &'a str) -> Ast { if let Some(i) = index_of(src, ">=") { - Ast::Op(">=", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ast::Op(Operator::GreaterOrEqual, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) } else { Self::parse_gt(src) } } - fn parse_gt<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_gt<'a>(src: &'a str) -> Ast { if let Some(i) = index_of(src, ">") { - Ast::Op(">", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) + Ast::Op(Operator::Greater, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) } else { Self::parse_le(src) } } - fn parse_le<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_le<'a>(src: &'a str) -> Ast { if let Some(i) = index_of(src, "<=") { - Ast::Op("<=", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ast::Op(Operator::SmallerOrEqual, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) } else { Self::parse_lt(src) } } - fn parse_lt<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_lt<'a>(src: &'a str) -> Ast { if let Some(i) = index_of(src, "<") { - Ast::Op("<", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) + Ast::Op(Operator::Smaller, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) } else { Self::parse_eq(src) } } - fn parse_eq<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_eq<'a>(src: &'a str) -> Ast { if let Some(i) = index_of(src, "==") { - Ast::Op("==", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ast::Op(Operator::Equal, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) } else { Self::parse_neq(src) } } - fn parse_neq<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_neq<'a>(src: &'a str) -> Ast { if let Some(i) = index_of(src, "!=") { - Ast::Op("!=", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ast::Op(Operator::NotEqual, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) } else { Self::parse_mod(src) } } - fn parse_mod<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_mod<'a>(src: &'a str) -> Ast { if let Some(i) = index_of(src, "%") { - Ast::Op("%", Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) + Ast::Op(Operator::Modulo, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) } else { Self::parse_not(src.trim()) } } - fn parse_not<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_not<'a>(src: &'a str) -> Ast { if index_of(src, "!") == Some(0) { Ast::Not(Box::new(Ast::parse(&src[1..]))) } else { @@ -187,13 +199,13 @@ impl<'a> Ast<'a> { } } - fn parse_int<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_int<'a>(src: &'a str) -> Ast { u64::from_str_radix(src, 10) .map(|x| Ast::Integer(x)) .unwrap_or_else(|_| Self::parse_n(src.trim())) } - fn parse_n<'b, 'c>(src: &'b str) -> Ast<'c> { + fn parse_n<'a>(src: &'a str) -> Ast { if src == "n" { Ast::N } else { @@ -202,7 +214,7 @@ impl<'a> Ast<'a> { } } -impl<'a> Resolver<'a> { +impl Resolver { /// Returns the number of the correct plural form /// for `n` objects, as defined by the rule contained in this resolver. pub fn resolve(&self, n: u64) -> usize { @@ -226,10 +238,10 @@ mod tests { #[test] fn test_parser() { assert_eq!(Ast::parse("n == 42 ? n : 6 && n < 7"), Ast::Op( - "&&", + Operator::And, Box::new(Ast::Ternary( Box::new(Ast::Op( - "==", + Operator::Equal, Box::new(Ast::N), Box::new(Ast::Integer(42)) )), @@ -237,7 +249,7 @@ mod tests { Box::new(Ast::Integer(6)) )), Box::new(Ast::Op( - "<", + Operator::Smaller, Box::new(Ast::N), Box::new(Ast::Integer(7)) )) @@ -247,14 +259,14 @@ mod tests { assert_eq!(Ast::parse("(n == 1 || n == 2) ? 0 : 1"), Ast::Ternary( Box::new(Ast::Op( - "||", + Operator::Or, Box::new(Ast::Op( - "==", + Operator::Equal, Box::new(Ast::N), Box::new(Ast::Integer(1)) )), Box::new(Ast::Op( - "==", + Operator::Equal, Box::new(Ast::N), Box::new(Ast::Integer(2)) )) From 64aab86eb6b4ca08b0b6ea15e932d2f35a8fd99e Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Wed, 31 Oct 2018 19:29:47 +0100 Subject: [PATCH 08/11] Reintroduce Resolver::Function --- src/parser.rs | 35 ++++++++++++++++++++++++----------- src/plurals.rs | 9 +++++---- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 38be530..bc457ee 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -88,6 +88,7 @@ impl From> for Error { #[derive(Default)] pub struct ParseOptions { force_encoding: Option, + force_plural: Option usize>>, } impl ParseOptions { @@ -110,6 +111,16 @@ impl ParseOptions { self.force_encoding = Some(encoding); self } + + /// Forces a use of the given plural formula + /// for deciding the proper plural form for a message. + /// If this option is not enabled, + /// the parser uses the default formula + /// (`n != 1`). + pub fn force_plural(mut self, plural: fn(u64) -> usize) -> Self { + self.force_plural = Some(Box::new(plural)); + self + } } /// According to the given magic number of a MO file, @@ -148,7 +159,8 @@ pub fn parse_catalog<'a, R: io::Read>( } let mut catalog = Catalog::new(); - let mut resolver = None; + let mut resolver = opts.force_plural.map(|f| Resolver::Function(f)) + .unwrap_or(Resolver::Function(Box::new(default_resolver))); let mut encoding = opts.force_encoding.unwrap_or(utf8_encoding); for i in 0..num_strings { @@ -209,13 +221,8 @@ pub fn parse_catalog<'a, R: io::Read>( None => return Err(UnknownEncoding), } } - match resolver { - Some(Resolver::Expr(_)) => {} - _ => { - let plural_forms = map.plural_forms().1.to_owned(); - resolver = Some(Resolver::Expr(Box::new(Ast::parse(plural_forms.as_ref())))); - } - } + let plural_forms = map.plural_forms().1.to_owned(); + resolver = Resolver::Expr(Box::new(Ast::parse(plural_forms.as_ref()))); } catalog.insert(Message::new(id, context, translated)); @@ -223,13 +230,19 @@ pub fn parse_catalog<'a, R: io::Read>( off_otable += 8; off_ttable += 8; } - if let Some(r) = resolver { - catalog.resolver = r; - } + catalog.resolver = resolver; Ok(catalog) } +fn default_resolver(n: u64) -> usize { + if n == 1 { + 0 + } else { + 1 + } +} + #[test] fn test_get_read_u32_fn() { use std::mem; diff --git a/src/plurals.rs b/src/plurals.rs index ff1dcd9..ee5eda8 100644 --- a/src/plurals.rs +++ b/src/plurals.rs @@ -5,9 +5,11 @@ pub enum Resolver { /// A boolean expression /// Use Ast::parse to get an Ast Expr(Box), + /// A function + Function(Box usize>) } - +/// Finds the index of a pattern, outside of parenthesis fn index_of<'a>(src: &'a str, pat: &'static str) -> Option { src.chars().fold((None, 0, 0, 0), |(match_index, i, n_matches, paren_level), ch| { if let Some(x) = match_index { @@ -219,9 +221,8 @@ impl Resolver { /// for `n` objects, as defined by the rule contained in this resolver. pub fn resolve(&self, n: u64) -> usize { match *self { - Expr(ref ast) => { - ast.resolve(n) - }, + Expr(ref ast) => ast.resolve(n), + Function(ref f) => f(n) } } } From f5da5b6305ae520aebe23e319ee993dbddc68379 Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Wed, 31 Oct 2018 20:09:13 +0100 Subject: [PATCH 09/11] Avoid panics --- src/lib.rs | 12 +++-- src/parser.rs | 13 ++++- src/plurals.rs | 90 +++++++++++++++++++---------------- test_cases/complex_plural.mo | Bin 353 -> 274 bytes test_cases/complex_plural.po | 4 +- 5 files changed, 69 insertions(+), 50 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 418062c..a1566b6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,7 +55,7 @@ use std::collections::HashMap; use std::io::Read; use std::ops::Deref; -pub use parser::{Error, ParseOptions}; +pub use parser::{default_resolver, Error, ParseOptions}; use plurals::*; fn key_with_context(context: &str, key: &str) -> String { @@ -78,7 +78,7 @@ impl Catalog { fn new() -> Self { Catalog { strings: HashMap::new(), - resolver: Resolver::Expr(Box::new(Ast::parse("n != 1"))), + resolver: Resolver::Function(Box::new(default_resolver)), } } @@ -279,8 +279,10 @@ fn test_complex_plural() { let reader: &[u8] = include_bytes!("../test_cases/complex_plural.mo"); let cat = parser::parse_catalog(reader, ParseOptions::new()).unwrap(); - for i in 0..500 { - println!("{} -> {}", i, cat.ngettext("Test", "Tests", i)); + assert_eq!(cat.ngettext("Test", "Tests", 0), "Plural 2"); + assert_eq!(cat.ngettext("Test", "Tests", 1), "Singular"); + assert_eq!(cat.ngettext("Test", "Tests", 2), "Plural 1"); + for i in 3..20 { + assert_eq!(cat.ngettext("Test", "Tests", i), "Plural 2"); } - // assert_eq!(cat.ngettext("Garlic", "Garlics", 21), "Česnakas"); } diff --git a/src/parser.rs b/src/parser.rs index bc457ee..b251412 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -222,7 +222,9 @@ pub fn parse_catalog<'a, R: io::Read>( } } let plural_forms = map.plural_forms().1.to_owned(); - resolver = Resolver::Expr(Box::new(Ast::parse(plural_forms.as_ref()))); + if let Ok(ast) = Ast::parse(plural_forms.as_ref()) { + resolver = Resolver::Expr(Box::new(ast)) + } } catalog.insert(Message::new(id, context, translated)); @@ -235,7 +237,14 @@ pub fn parse_catalog<'a, R: io::Read>( Ok(catalog) } -fn default_resolver(n: u64) -> usize { +/// The default plural resolver. +/// +/// It will be used if not `Plural-Forms` header is found in the .mo file, and if +/// `ParseOptions::force_plural` was not called. +/// +/// It is valid for English and similar languages: plural will be used for any quantity +/// different of 1. +pub fn default_resolver(n: u64) -> usize { if n == 1 { 0 } else { diff --git a/src/plurals.rs b/src/plurals.rs index ee5eda8..85b10ef 100644 --- a/src/plurals.rs +++ b/src/plurals.rs @@ -66,6 +66,12 @@ pub enum Operator { Modulo, } +#[derive(Debug, PartialEq)] +pub enum ParseError { + UnexpectedToken, + InvalidTernary, +} + impl Ast { fn resolve(&self, n: u64) -> usize { match *self { @@ -94,11 +100,11 @@ impl Ast { } } - pub fn parse<'a>(src: &'a str) -> Ast { + pub fn parse<'a>(src: &'a str) -> Result { Self::parse_parens(src.trim()) } - fn parse_parens<'a>(src: &'a str) -> Ast { + fn parse_parens<'a>(src: &'a str) -> Result { if src.starts_with('(') && src.ends_with(')') { Ast::parse(src[1..src.len() - 1].trim()) } else { @@ -106,112 +112,114 @@ impl Ast { } } - fn parse_and<'a>(src: &'a str) -> Ast { + fn parse_and<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "&&") { - Ast::Op(Operator::And, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ok(Ast::Op(Operator::And, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { Self::parse_or(src) } } - fn parse_or<'a>(src: &'a str) -> Ast { + fn parse_or<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "||") { - Ast::Op(Operator::Or, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ok(Ast::Op(Operator::Or, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { Self::parse_ternary(src) } } - fn parse_ternary<'a>(src: &'a str) -> Ast { + fn parse_ternary<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "?") { if let Some(l) = index_of(src, ":") { - Ast::Ternary( - Box::new(Ast::parse(&src[0..i])), - Box::new(Ast::parse(&src[i + 1..l])), - Box::new(Ast::parse(&src[l + 1..])) - ) + Ok(Ast::Ternary( + Box::new(Ast::parse(&src[0..i])?), + Box::new(Ast::parse(&src[i + 1..l])?), + Box::new(Ast::parse(&src[l + 1..])?), + )) } else { - panic!("Incorrect ternary expression, expected `:`") + Err(ParseError::InvalidTernary) } } else { Self::parse_ge(src) } } - fn parse_ge<'a>(src: &'a str) -> Ast { + fn parse_ge<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, ">=") { - Ast::Op(Operator::GreaterOrEqual, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ok(Ast::Op(Operator::GreaterOrEqual, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { Self::parse_gt(src) } } - fn parse_gt<'a>(src: &'a str) -> Ast { + fn parse_gt<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, ">") { - Ast::Op(Operator::Greater, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) + Ok(Ast::Op(Operator::Greater, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 1..])?))) } else { Self::parse_le(src) } } - fn parse_le<'a>(src: &'a str) -> Ast { + fn parse_le<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "<=") { - Ast::Op(Operator::SmallerOrEqual, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ok(Ast::Op(Operator::SmallerOrEqual, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { Self::parse_lt(src) } } - fn parse_lt<'a>(src: &'a str) -> Ast { + fn parse_lt<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "<") { - Ast::Op(Operator::Smaller, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) + Ok(Ast::Op(Operator::Smaller, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 1..])?))) } else { Self::parse_eq(src) } } - fn parse_eq<'a>(src: &'a str) -> Ast { + fn parse_eq<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "==") { - Ast::Op(Operator::Equal, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ok(Ast::Op(Operator::Equal, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { Self::parse_neq(src) } } - fn parse_neq<'a>(src: &'a str) -> Ast { + fn parse_neq<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "!=") { - Ast::Op(Operator::NotEqual, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 2..]))) + Ok(Ast::Op(Operator::NotEqual, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { Self::parse_mod(src) } } - fn parse_mod<'a>(src: &'a str) -> Ast { + fn parse_mod<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "%") { - Ast::Op(Operator::Modulo, Box::new(Ast::parse(&src[0..i])), Box::new(Ast::parse(&src[i + 1..]))) + Ok(Ast::Op(Operator::Modulo, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 1..])?))) } else { Self::parse_not(src.trim()) } } - fn parse_not<'a>(src: &'a str) -> Ast { + fn parse_not<'a>(src: &'a str) -> Result { if index_of(src, "!") == Some(0) { - Ast::Not(Box::new(Ast::parse(&src[1..]))) + Ok(Ast::Not(Box::new(Ast::parse(&src[1..])?))) } else { Self::parse_int(src.trim()) } } - fn parse_int<'a>(src: &'a str) -> Ast { - u64::from_str_radix(src, 10) - .map(|x| Ast::Integer(x)) - .unwrap_or_else(|_| Self::parse_n(src.trim())) + fn parse_int<'a>(src: &'a str) -> Result { + if let Ok(x) = u64::from_str_radix(src, 10) { + Ok(Ast::Integer(x)) + } else { + Self::parse_n(src.trim()) + } } - fn parse_n<'a>(src: &'a str) -> Ast { + fn parse_n<'a>(src: &'a str) -> Result { if src == "n" { - Ast::N + Ok(Ast::N) } else { - panic!("Unexpected token: {}", src) + Err(ParseError::UnexpectedToken) } } } @@ -238,7 +246,7 @@ mod tests { #[test] fn test_parser() { - assert_eq!(Ast::parse("n == 42 ? n : 6 && n < 7"), Ast::Op( + assert_eq!(Ast::parse("n == 42 ? n : 6 && n < 7"), Ok(Ast::Op( Operator::And, Box::new(Ast::Ternary( Box::new(Ast::Op( @@ -254,11 +262,11 @@ mod tests { Box::new(Ast::N), Box::new(Ast::Integer(7)) )) - )); + ))); - assert_eq!(Ast::parse("(n)"), Ast::N); + assert_eq!(Ast::parse("(n)"), Ok(Ast::N)); - assert_eq!(Ast::parse("(n == 1 || n == 2) ? 0 : 1"), Ast::Ternary( + assert_eq!(Ast::parse("(n == 1 || n == 2) ? 0 : 1"), Ok(Ast::Ternary( Box::new(Ast::Op( Operator::Or, Box::new(Ast::Op( @@ -274,6 +282,6 @@ mod tests { )), Box::new(Ast::Integer(0)), Box::new(Ast::Integer(1)) - )) + ))) } } diff --git a/test_cases/complex_plural.mo b/test_cases/complex_plural.mo index cef8f1046d7252a7877fe4215dbdd209c420db89..9f5d64f07882c5f646ed6ffbadd17c35274504ba 100644 GIT binary patch delta 56 zcmaFJG>K_~4&N*W28I|QmIC7M3=^#u3fkH#7%JE+7${gN= 2 && n % 10 <= 4 && (n % 100 < 10 || n % 100 >= 20) ? 1 : 2;\n" +"Plural-Forms: nplurals=3; plural = n == 1 ? 0 : n == 2 ? 1 : 2;\n" msgid "Test" msgid_plural "Tests" msgstr[0] "Singular" msgstr[1] "Plural 1" -msgstr[2] "Singular 2" +msgstr[2] "Plural 2" From 99d469ceea2aa8b9e3a22a131e9a50b08f6532f8 Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Wed, 31 Oct 2018 20:14:41 +0100 Subject: [PATCH 10/11] Update README.md: plurals are now supported! --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cfdd5b6..65e4fd7 100644 --- a/README.md +++ b/README.md @@ -6,5 +6,5 @@ - [x] Parsing MO files (10.3) - [x] Parsing metadata (6.2) - [x] Supporting encodings other than UTF-8 -- [ ] Parsing the plural expression? (11.2.6) +- [x] Parsing the plural expression (11.2.6) - [ ] Correct pathfinding? (11.2.3) From a229c16439a24812864c4e4d44db1bb1e48c29ff Mon Sep 17 00:00:00 2001 From: Baptiste Gelez Date: Thu, 1 Nov 2018 10:50:41 +0100 Subject: [PATCH 11/11] Various improvements --- src/lib.rs | 2 +- src/metadata.rs | 22 +++++++++---------- src/parser.rs | 20 ++++++++++------- src/plurals.rs | 58 +++++++++++++++++++++++-------------------------- 4 files changed, 51 insertions(+), 51 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a1566b6..26bd9da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,7 +78,7 @@ impl Catalog { fn new() -> Self { Catalog { strings: HashMap::new(), - resolver: Resolver::Function(Box::new(default_resolver)), + resolver: Resolver::Function(default_resolver), } } diff --git a/src/metadata.rs b/src/metadata.rs index 1ae4b69..6506b42 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -19,23 +19,23 @@ impl<'a> MetadataMap<'a> { /// the number of elements. /// /// Defaults to `n_plurals = 2` and `plural = n!=1` (as in English). - pub fn plural_forms(&self) -> (usize, &'a str) { + pub fn plural_forms(&self) -> (Option, Option<&'a str>) { self.get("Plural-Forms") - .map(|f| f.split(';').fold((2, "n!=1"), |(n_pl, pl), prop| { + .map(|f| f.split(';').fold((None, None), |(n_pl, pl), prop| { match prop.chars().position(|c| c == '=') { Some(index) => { let (name, value) = prop.split_at(index); let value = value[1..value.len()].trim(); match name.trim() { - "n_plurals" => (usize::from_str_radix(value, 10).unwrap_or(n_pl), pl), - "plural" => (n_pl, value), + "n_plurals" => (usize::from_str_radix(value, 10).ok(), pl), + "plural" => (n_pl, Some(value)), _ => (n_pl, pl) } }, None => (n_pl, pl) } })) - .unwrap_or((2, "n!=1")) + .unwrap_or((None, None)) } } @@ -82,21 +82,21 @@ fn test_metadatamap_charset() { fn test_metadatamap_plural() { { let mut map = MetadataMap(HashMap::new()); - assert_eq!(map.plural_forms(), (2, "n!=1")); + assert_eq!(map.plural_forms(), (None, None)); map.insert("Plural-Forms", ""); - assert_eq!(map.plural_forms(), (2, "n!=1")); + assert_eq!(map.plural_forms(), (None, None)); // n_plural map.insert("Plural-Forms", "n_plurals=42"); - assert_eq!(map.plural_forms(), (42, "n!=1")); + assert_eq!(map.plural_forms(), (Some(42), None)); // plural is specified map.insert("Plural-Forms", "n_plurals=2; plural=n==12"); - assert_eq!(map.plural_forms(), (2, "n==12")); + assert_eq!(map.plural_forms(), (Some(2), Some("n==12"))); // plural before n_plurals map.insert("Plural-Forms", "plural=n==12; n_plurals=2"); - assert_eq!(map.plural_forms(), (2, "n==12")); + assert_eq!(map.plural_forms(), (Some(2), Some("n==12"))); // with spaces map.insert("Plural-Forms", " n_plurals = 42 ; plural = n > 10 "); - assert_eq!(map.plural_forms(), (42, "n > 10")); + assert_eq!(map.plural_forms(), (Some(42), Some("n > 10"))); } } diff --git a/src/parser.rs b/src/parser.rs index b251412..528823a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -36,6 +36,8 @@ pub enum Error { MisplacedMetadata, /// An unknown encoding was specified in the metadata UnknownEncoding, + /// Invalid Plural-Forms metadata + PluralParsing, } use Error::*; @@ -49,6 +51,7 @@ impl error::Error for Error { MalformedMetadata => "metadata syntax error", MisplacedMetadata => "misplaced metadata", UnknownEncoding => "unknown encoding specified", + PluralParsing => "invalid plural expression", } } } @@ -88,7 +91,7 @@ impl From> for Error { #[derive(Default)] pub struct ParseOptions { force_encoding: Option, - force_plural: Option usize>>, + force_plural: Option usize>, } impl ParseOptions { @@ -118,7 +121,7 @@ impl ParseOptions { /// the parser uses the default formula /// (`n != 1`). pub fn force_plural(mut self, plural: fn(u64) -> usize) -> Self { - self.force_plural = Some(Box::new(plural)); + self.force_plural = Some(plural); self } } @@ -159,8 +162,9 @@ pub fn parse_catalog<'a, R: io::Read>( } let mut catalog = Catalog::new(); - let mut resolver = opts.force_plural.map(|f| Resolver::Function(f)) - .unwrap_or(Resolver::Function(Box::new(default_resolver))); + if let Some(f) = opts.force_plural { + catalog.resolver = Resolver::Function(f); + } let mut encoding = opts.force_encoding.unwrap_or(utf8_encoding); for i in 0..num_strings { @@ -221,9 +225,10 @@ pub fn parse_catalog<'a, R: io::Read>( None => return Err(UnknownEncoding), } } - let plural_forms = map.plural_forms().1.to_owned(); - if let Ok(ast) = Ast::parse(plural_forms.as_ref()) { - resolver = Resolver::Expr(Box::new(ast)) + if opts.force_plural.is_none() { + if let Some(p) = map.plural_forms().1 { + catalog.resolver = Ast::parse(p).map(Resolver::Expr)?; + } } } @@ -233,7 +238,6 @@ pub fn parse_catalog<'a, R: io::Read>( off_ttable += 8; } - catalog.resolver = resolver; Ok(catalog) } diff --git a/src/plurals.rs b/src/plurals.rs index 85b10ef..33e7417 100644 --- a/src/plurals.rs +++ b/src/plurals.rs @@ -1,12 +1,14 @@ +use parser::Error; + use self::Resolver::*; #[derive(Clone, Debug)] pub enum Resolver { /// A boolean expression /// Use Ast::parse to get an Ast - Expr(Box), + Expr(Ast), /// A function - Function(Box usize>) + Function(fn(u64) -> usize) } /// Finds the index of a pattern, outside of parenthesis @@ -66,12 +68,6 @@ pub enum Operator { Modulo, } -#[derive(Debug, PartialEq)] -pub enum ParseError { - UnexpectedToken, - InvalidTernary, -} - impl Ast { fn resolve(&self, n: u64) -> usize { match *self { @@ -100,11 +96,11 @@ impl Ast { } } - pub fn parse<'a>(src: &'a str) -> Result { + pub fn parse<'a>(src: &'a str) -> Result { Self::parse_parens(src.trim()) } - fn parse_parens<'a>(src: &'a str) -> Result { + fn parse_parens<'a>(src: &'a str) -> Result { if src.starts_with('(') && src.ends_with(')') { Ast::parse(src[1..src.len() - 1].trim()) } else { @@ -112,7 +108,7 @@ impl Ast { } } - fn parse_and<'a>(src: &'a str) -> Result { + fn parse_and<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "&&") { Ok(Ast::Op(Operator::And, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { @@ -120,7 +116,7 @@ impl Ast { } } - fn parse_or<'a>(src: &'a str) -> Result { + fn parse_or<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "||") { Ok(Ast::Op(Operator::Or, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { @@ -128,7 +124,7 @@ impl Ast { } } - fn parse_ternary<'a>(src: &'a str) -> Result { + fn parse_ternary<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "?") { if let Some(l) = index_of(src, ":") { Ok(Ast::Ternary( @@ -137,14 +133,14 @@ impl Ast { Box::new(Ast::parse(&src[l + 1..])?), )) } else { - Err(ParseError::InvalidTernary) + Err(Error::PluralParsing) } } else { Self::parse_ge(src) } } - fn parse_ge<'a>(src: &'a str) -> Result { + fn parse_ge<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, ">=") { Ok(Ast::Op(Operator::GreaterOrEqual, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { @@ -152,7 +148,7 @@ impl Ast { } } - fn parse_gt<'a>(src: &'a str) -> Result { + fn parse_gt<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, ">") { Ok(Ast::Op(Operator::Greater, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 1..])?))) } else { @@ -160,7 +156,7 @@ impl Ast { } } - fn parse_le<'a>(src: &'a str) -> Result { + fn parse_le<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "<=") { Ok(Ast::Op(Operator::SmallerOrEqual, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { @@ -168,7 +164,7 @@ impl Ast { } } - fn parse_lt<'a>(src: &'a str) -> Result { + fn parse_lt<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "<") { Ok(Ast::Op(Operator::Smaller, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 1..])?))) } else { @@ -176,7 +172,7 @@ impl Ast { } } - fn parse_eq<'a>(src: &'a str) -> Result { + fn parse_eq<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "==") { Ok(Ast::Op(Operator::Equal, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { @@ -184,14 +180,14 @@ impl Ast { } } - fn parse_neq<'a>(src: &'a str) -> Result { + fn parse_neq<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "!=") { Ok(Ast::Op(Operator::NotEqual, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 2..])?))) } else { Self::parse_mod(src) } } - fn parse_mod<'a>(src: &'a str) -> Result { + fn parse_mod<'a>(src: &'a str) -> Result { if let Some(i) = index_of(src, "%") { Ok(Ast::Op(Operator::Modulo, Box::new(Ast::parse(&src[0..i])?), Box::new(Ast::parse(&src[i + 1..])?))) } else { @@ -199,7 +195,7 @@ impl Ast { } } - fn parse_not<'a>(src: &'a str) -> Result { + fn parse_not<'a>(src: &'a str) -> Result { if index_of(src, "!") == Some(0) { Ok(Ast::Not(Box::new(Ast::parse(&src[1..])?))) } else { @@ -207,7 +203,7 @@ impl Ast { } } - fn parse_int<'a>(src: &'a str) -> Result { + fn parse_int<'a>(src: &'a str) -> Result { if let Ok(x) = u64::from_str_radix(src, 10) { Ok(Ast::Integer(x)) } else { @@ -215,11 +211,11 @@ impl Ast { } } - fn parse_n<'a>(src: &'a str) -> Result { + fn parse_n<'a>(src: &'a str) -> Result { if src == "n" { Ok(Ast::N) } else { - Err(ParseError::UnexpectedToken) + Err(Error::PluralParsing) } } } @@ -241,12 +237,12 @@ mod tests { #[test] fn test_expr_resolver() { - assert_eq!(Expr(Box::new(N)).resolve(42), 42); + assert_eq!(Expr(N).resolve(42), 42); } #[test] fn test_parser() { - assert_eq!(Ast::parse("n == 42 ? n : 6 && n < 7"), Ok(Ast::Op( + assert_eq!(Ast::parse("n == 42 ? n : 6 && n < 7").expect("Invalid plural"), Ast::Op( Operator::And, Box::new(Ast::Ternary( Box::new(Ast::Op( @@ -262,11 +258,11 @@ mod tests { Box::new(Ast::N), Box::new(Ast::Integer(7)) )) - ))); + )); - assert_eq!(Ast::parse("(n)"), Ok(Ast::N)); + assert_eq!(Ast::parse("(n)").expect("Invalid plural"), Ast::N); - assert_eq!(Ast::parse("(n == 1 || n == 2) ? 0 : 1"), Ok(Ast::Ternary( + assert_eq!(Ast::parse("(n == 1 || n == 2) ? 0 : 1").expect("Invalid plural"), Ast::Ternary( Box::new(Ast::Op( Operator::Or, Box::new(Ast::Op( @@ -282,6 +278,6 @@ mod tests { )), Box::new(Ast::Integer(0)), Box::new(Ast::Integer(1)) - ))) + )) } }