diff --git a/unic/ucd/age/src/age.rs b/unic/ucd/age/src/age.rs index 8845ad41..cf589c52 100644 --- a/unic/ucd/age/src/age.rs +++ b/unic/ucd/age/src/age.rs @@ -14,6 +14,7 @@ use std::fmt; use unic_utils::CharDataTable; pub use unic_ucd_core::UnicodeVersion; +use unic_utils::CharProperty; /// Represents values of the Unicode character property @@ -41,6 +42,12 @@ pub enum Age { Unassigned, // Unassigned is older (larger) than any age } +impl CharProperty for Age { + fn of(ch: char) -> Self { + Self::of(ch) + } +} + use Age::{Assigned, Unassigned}; diff --git a/unic/ucd/age/src/lib.rs b/unic/ucd/age/src/lib.rs index c28be658..b59b1226 100644 --- a/unic/ucd/age/src/lib.rs +++ b/unic/ucd/age/src/lib.rs @@ -10,7 +10,7 @@ // except according to those terms. -#![forbid(unsafe_code)] +#![forbid(unsafe_code, unconditional_recursion)] #![deny(missing_docs)] //! # UNIC — UCD — Character Age diff --git a/unic/ucd/bidi/src/bidi_class.rs b/unic/ucd/bidi/src/bidi_class.rs index 3c100480..ec1e4e4a 100644 --- a/unic/ucd/bidi/src/bidi_class.rs +++ b/unic/ucd/bidi/src/bidi_class.rs @@ -9,9 +9,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. + use std::fmt; -use unic_utils::CharDataTable; +use unic_utils::{CharDataTable, CharProperty, EnumeratedCharProperty}; + /// Represents the Unicode character /// [*Bidi_Class*](http://www.unicode.org/reports/tr44/#Bidi_Class) property, also known as the @@ -48,6 +50,18 @@ pub enum BidiClass { // [UNIC_UPDATE_ON_UNICODE_UPDATE] Source: `tables/bidi_class_type.rsv` } +impl CharProperty for BidiClass { + fn of(ch: char) -> Self { + Self::of(ch) + } +} + +impl EnumeratedCharProperty for BidiClass { + fn all_values() -> &'static [Self] { + Self::all_values() + } +} + /// Abbreviated name aliases for /// [*Bidi_Class*](http://www.unicode.org/reports/tr44/#Bidi_Class) property. @@ -111,6 +125,37 @@ impl BidiClass { *TABLE.find_or(ch, &L) } + /// Exhaustive list of all `BidiClass` property values. + pub fn all_values() -> &'static [BidiClass] { + use BidiClass::*; + const ALL_VALUES: &[BidiClass] = &[ + ArabicLetter, + ArabicNumber, + ParagraphSeparator, + BoundaryNeutral, + CommonSeparator, + EuropeanNumber, + EuropeanSeparator, + EuropeanTerminator, + FirstStrongIsolate, + LeftToRight, + LeftToRightEmbedding, + LeftToRightIsolate, + LeftToRightOverride, + NonspacingMark, + OtherNeutral, + PopDirectionalFormat, + PopDirectionalIsolate, + RightToLeft, + RightToLeftEmbedding, + RightToLeftIsolate, + RightToLeftOverride, + SegmentSeparator, + WhiteSpace, + ]; + ALL_VALUES + } + /// Abbreviated name of the *Bidi_Class* property value. /// /// diff --git a/unic/ucd/bidi/src/lib.rs b/unic/ucd/bidi/src/lib.rs index f3bf46cc..18f8753b 100644 --- a/unic/ucd/bidi/src/lib.rs +++ b/unic/ucd/bidi/src/lib.rs @@ -10,7 +10,7 @@ // except according to those terms. -#![forbid(unsafe_code)] +#![forbid(unsafe_code, unconditional_recursion)] #![deny(missing_docs)] //! # UNIC — UCD — Bidi diff --git a/unic/ucd/category/Cargo.toml b/unic/ucd/category/Cargo.toml index 85943c94..df7f598a 100644 --- a/unic/ucd/category/Cargo.toml +++ b/unic/ucd/category/Cargo.toml @@ -15,6 +15,6 @@ exclude = [] travis-ci = { repository = "behnam/rust-unic", branch = "master" } [dependencies] +matches = "0.1" unic-ucd-core = { path = "../core/", version = "0.5.0" } unic-utils = { path = "../../utils/", version = "0.5.0" } -matches = "0.1" diff --git a/unic/ucd/category/src/category.rs b/unic/ucd/category/src/category.rs index 095dbc7b..6b74fd6c 100644 --- a/unic/ucd/category/src/category.rs +++ b/unic/ucd/category/src/category.rs @@ -8,7 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use unic_utils::CharDataTable; + +use std::fmt; + +use unic_utils::{CharDataTable, CharProperty, EnumeratedCharProperty}; + /// Represents the Unicode Character /// [*General_Category*](http://unicode.org/reports/tr44/#General_Category) property. @@ -16,7 +20,7 @@ use unic_utils::CharDataTable; /// This is a useful breakdown into various character types which can be used as a default /// categorization in implementations. For the property values, see /// [*General_Category Values*](http://unicode.org/reports/tr44/#General_Category_Values). -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum GeneralCategory { /// An uppercase letter (Short form: `Lu`) UppercaseLetter, @@ -80,6 +84,21 @@ pub enum GeneralCategory { Unassigned, } + +impl CharProperty for GeneralCategory { + fn of(ch: char) -> Self { + Self::of(ch) + } +} + + +impl EnumeratedCharProperty for GeneralCategory { + fn all_values() -> &'static [Self] { + Self::all_values() + } +} + + pub mod abbr_names { pub use super::GeneralCategory::UppercaseLetter as Lu; pub use super::GeneralCategory::LowercaseLetter as Ll; @@ -125,8 +144,6 @@ impl GeneralCategory { } /// Exhaustive list of all `GeneralCategory` property values. - /// - /// Reference: pub fn all_values() -> &'static [GeneralCategory] { use GeneralCategory::*; const ALL_VALUES: &[GeneralCategory] = &[ @@ -163,8 +180,16 @@ impl GeneralCategory { ]; ALL_VALUES } + + /// Human-readable description of the property value. + // TODO: Needs to be improved by returning long-name with underscores replaced by space. + #[inline] + pub fn display(&self) -> String { + format!("{:?}", self).to_owned() + } } + impl GeneralCategory { /// `Lu` | `Ll` | `Lt` (Short form: `LC`) pub fn is_cased_letter(&self) -> bool { @@ -207,6 +232,21 @@ impl GeneralCategory { } } + +impl Default for GeneralCategory { + fn default() -> Self { + GeneralCategory::Unassigned + } +} + + +impl fmt::Display for GeneralCategory { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.display()) + } +} + + #[cfg(test)] mod tests { use super::GeneralCategory as GC; @@ -305,4 +345,11 @@ mod tests { assert_eq!(GC::of(c), GC::Unassigned); } } + + #[test] + fn test_display() { + //assert_eq!(format!("{}", GC::UppercaseLetter), "Uppercase Letter"); + assert_eq!(format!("{}", GC::UppercaseLetter), "UppercaseLetter"); + assert_eq!(format!("{}", GC::Unassigned), "Unassigned"); + } } diff --git a/unic/ucd/category/src/lib.rs b/unic/ucd/category/src/lib.rs index 4e505b53..5171cd8c 100644 --- a/unic/ucd/category/src/lib.rs +++ b/unic/ucd/category/src/lib.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![deny(unsafe_code, missing_docs)] +#![deny(unsafe_code, missing_docs, unconditional_recursion)] //! # UNIC — UCD — Category //! @@ -38,6 +38,7 @@ #[macro_use] extern crate matches; + extern crate unic_ucd_core; extern crate unic_utils; diff --git a/unic/ucd/core/src/lib.rs b/unic/ucd/core/src/lib.rs index 83f73c89..1d2ee6ad 100644 --- a/unic/ucd/core/src/lib.rs +++ b/unic/ucd/core/src/lib.rs @@ -9,7 +9,7 @@ // except according to those terms. -#![forbid(unsafe_code, missing_docs)] +#![forbid(unsafe_code, missing_docs, unconditional_recursion)] //! # UNIC — UCD — Core //! diff --git a/unic/ucd/normal/src/canonical_combining_class.rs b/unic/ucd/normal/src/canonical_combining_class.rs index ae2245f1..ee4b5d2d 100644 --- a/unic/ucd/normal/src/canonical_combining_class.rs +++ b/unic/ucd/normal/src/canonical_combining_class.rs @@ -15,7 +15,9 @@ //! Reference: -use unic_utils::CharDataTable; +use std::fmt; + +use unic_utils::{CharDataTable, CharProperty}; /// Represents *Canonical_Combining_Class* property of a Unicode character. @@ -82,6 +84,13 @@ pub mod values { } +impl CharProperty for CanonicalCombiningClass { + fn of(ch: char) -> Self { + Self::of(ch) + } +} + + impl CanonicalCombiningClass { /// Find the character *Canonical_Combining_Class* property value. pub fn of(ch: char) -> CanonicalCombiningClass { @@ -89,6 +98,19 @@ impl CanonicalCombiningClass { include!("tables/canonical_combining_class_values.rsv"); *TABLE.find_or(ch, &CanonicalCombiningClass(0)) } + + /// Human-readable description of the property value. + // TODO: Needs to be improved by returning long-name with underscores replaced by space. + #[inline] + pub fn display(&self) -> String { + format!("{}", self.number()) + } +} + +impl fmt::Display for CanonicalCombiningClass { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.display()) + } } @@ -224,4 +246,10 @@ mod tests { assert_eq!(CCC::of('\u{0315}').number(), 232); assert_eq!(CCC::of('\u{1e94a}').number(), 7); } + + #[test] + fn test_display() { + assert_eq!(format!("{}", CCC::of('\u{0000}')), "0"); + assert_eq!(format!("{}", CCC::of('\u{0300}')), "230"); + } } diff --git a/unic/ucd/normal/src/decomposition_type.rs b/unic/ucd/normal/src/decomposition_type.rs index aa8ecb26..285c1a4b 100644 --- a/unic/ucd/normal/src/decomposition_type.rs +++ b/unic/ucd/normal/src/decomposition_type.rs @@ -12,7 +12,9 @@ //! Accessor for *Decomposition_Type* (dt) property -use unic_utils::CharDataTable; +use std::fmt; + +use unic_utils::{CharDataTable, EnumeratedCharProperty, OptionCharProperty}; use composition::canonical_decomposition; use hangul; @@ -22,7 +24,7 @@ use hangul; /// [*Decomposition_Type*](http://www.unicode.org/reports/tr44/#Decomposition_Type) property. /// /// * -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] #[allow(missing_docs)] pub enum DecompositionType { Canonical, // abbreviated: Can @@ -46,6 +48,20 @@ pub enum DecompositionType { } +impl OptionCharProperty for DecompositionType { + fn of(ch: char) -> Option { + Self::of(ch) + } +} + + +impl EnumeratedCharProperty for DecompositionType { + fn all_values() -> &'static [Self] { + Self::all_values() + } +} + + use self::DecompositionType::*; @@ -61,6 +77,46 @@ impl DecompositionType { include!("tables/compatibility_decomposition_type_values.rsv"); TABLE.find(ch).cloned() } + + /// Exhaustive list of all `DecompositionType` property values. + pub fn all_values() -> &'static [DecompositionType] { + use DecompositionType::*; + const ALL_VALUES: &[DecompositionType] = &[ + Canonical, + Compat, + Circle, + Final, + Font, + Fraction, + Initial, + Isolated, + Medial, + Narrow, + Nobreak, + None, + Small, + Square, + Sub, + Super, + Vertical, + Wide, + ]; + ALL_VALUES + } + + /// Human-readable description of the property value. + // TODO: Needs to be improved by returning long-name with underscores replaced by space. + #[inline] + pub fn display(&self) -> String { + format!("{:?}", self).to_owned() + } +} + + +impl fmt::Display for DecompositionType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.display()) + } } @@ -192,4 +248,9 @@ mod tests { assert_eq!(DT::of('\u{90000}'), None); assert_eq!(DT::of('\u{a0000}'), None); } + + #[test] + fn test_display() { + assert_eq!(format!("{}", DT::of('\u{a0}').unwrap()), "Nobreak"); + } } diff --git a/unic/ucd/normal/src/lib.rs b/unic/ucd/normal/src/lib.rs index 335c66a0..d4c3cb8c 100644 --- a/unic/ucd/normal/src/lib.rs +++ b/unic/ucd/normal/src/lib.rs @@ -10,7 +10,7 @@ // except according to those terms. -#![deny(unsafe_code, missing_docs)] +#![deny(unsafe_code, missing_docs, unconditional_recursion)] //! # UNIC — UCD — Normalization //! diff --git a/unic/ucd/src/lib.rs b/unic/ucd/src/lib.rs index af4210fc..99f8e666 100644 --- a/unic/ucd/src/lib.rs +++ b/unic/ucd/src/lib.rs @@ -9,7 +9,7 @@ // except according to those terms. -#![forbid(unsafe_code, missing_docs)] +#![forbid(unsafe_code, missing_docs, unconditional_recursion)] //! # UNIC — Unicode Character Database //! diff --git a/unic/utils/README.md b/unic/utils/README.md new file mode 100644 index 00000000..7300dc76 --- /dev/null +++ b/unic/utils/README.md @@ -0,0 +1,6 @@ +# UNIC — Utilities + +[![Crates.io](https://img.shields.io/crates/v/unic-utils.svg)](https://crates.io/crates/unic-utils) +[![Documentation](https://docs.rs/unic-utils/badge.svg)](https://docs.rs/unic-utils/) + +This UNIC component provides utility libraries that do not depend on Unicode data. diff --git a/unic/utils/src/char_property.rs b/unic/utils/src/char_property.rs new file mode 100644 index 00000000..b4c97401 --- /dev/null +++ b/unic/utils/src/char_property.rs @@ -0,0 +1,46 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + + +//! TBD. + + +use std::fmt::{Debug, Display}; +use std::hash::Hash; + + +/// TBD. +pub trait CharProperty +where + Self: Copy + Debug + Display + Eq + Hash, +{ + /// TBD + fn of(ch: char) -> Self; +} + + +/// TBD. +pub trait OptionCharProperty +where + Self: Copy + Debug + Display + Eq + Hash, +{ + /// TBD + fn of(ch: char) -> Option; +} + + +/// TBD. +pub trait EnumeratedCharProperty +where + Self: Copy + Debug + Display + Eq + Hash, +{ + /// TBD + fn all_values() -> &'static [Self]; +} diff --git a/unic/utils/src/lib.rs b/unic/utils/src/lib.rs index 1975c558..4af3293c 100644 --- a/unic/utils/src/lib.rs +++ b/unic/utils/src/lib.rs @@ -29,7 +29,9 @@ pub const PKG_DESCRIPTION: &'static str = env!("CARGO_PKG_DESCRIPTION"); pub mod codepoints; pub mod tables; +pub mod char_property; +pub use char_property::{CharProperty, EnumeratedCharProperty, OptionCharProperty}; pub use codepoints::iter_all_chars; pub use tables::CharDataTable;