Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor properties to separate crate #1153

Merged
merged 16 commits into from
Oct 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -460,9 +460,9 @@ jobs:
matrix:
component:
- components/locid
- components/uniset
- components/plurals
- components/datetime
- utils/uniset
- utils/fixed_decimal


Expand Down
16 changes: 14 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ members = [
"components/locid",
"components/locid/macros",
"components/plurals",
"components/uniset",
"components/properties",
"experimental/bies",
"experimental/formatted_string_builder",
"experimental/list_formatter",
Expand All @@ -37,6 +37,7 @@ members = [
"utils/fixed_decimal",
"utils/litemap",
"utils/pattern",
"utils/uniset",
"utils/writeable",
"utils/yoke",
"utils/yoke/derive",
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ ICU4X will provide an ECMA-402-compatible API surface in the target client-side
The [performance benchmarks](docs/process/benchmarking.md) are all run on Ubuntu, and are broken out by component.

* [locid](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/components/locid)
* [uniset](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/components/uniset)
* [uniset](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/utils/uniset)
* [fixed_decimal](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/utils/fixed_decimal)
* [plurals](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/components/plurals)
* [datetime](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/components/datetime)
Expand Down
9 changes: 5 additions & 4 deletions components/icu/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ version = "0.3"
path = "../plurals"
default-features = false

[dependencies.icu_uniset]
[dependencies.icu_properties]
version = "0.3"
path = "../uniset"
path = "../../components/properties"
default-features = false

[dependencies.fixed_decimal]
Expand All @@ -79,10 +79,11 @@ default-features = false
[dev-dependencies]
icu_provider = { version = "0.3", path = "../../provider/core" }
icu_testdata = { version = "0.3", path = "../../provider/testdata" }
icu_uniset = { version = "0.3", path = "../../utils/uniset" }
writeable = { version = "0.2", path = "../../utils/writeable" }

[features]
std = ["icu_datetime/std", "icu_locid/std", "icu_plurals/std", "icu_uniset/std", "fixed_decimal/std"]
std = ["icu_datetime/std", "icu_locid/std", "icu_plurals/std", "icu_properties/std", "fixed_decimal/std"]
default = ["provider_serde"]
serde = [
"icu_locid/serde"
Expand All @@ -92,5 +93,5 @@ provider_serde = [
"icu_decimal/provider_serde",
"icu_locale_canonicalizer/provider_serde",
"icu_plurals/provider_serde",
"icu_uniset/provider_serde",
"icu_properties/provider_serde",
]
2 changes: 1 addition & 1 deletion components/icu/examples/tui.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
use icu::datetime::DateTimeFormatOptions;
use icu::locid::{macros::langid, Locale};
use icu::plurals::{PluralCategory, PluralRuleType, PluralRules};
use icu::uniset::UnicodeSetBuilder;
use icu_datetime::{mock::zoned_datetime::MockZonedDateTime, ZonedDateTimeFormat};
use icu_uniset::UnicodeSetBuilder;
use std::env;

fn print<T: AsRef<str>>(_input: T) {
Expand Down
54 changes: 11 additions & 43 deletions components/icu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,50 +367,18 @@ pub mod plurals {
pub use icu_plurals::*;
}

pub mod uniset {
//! Unicode Set operations
pub mod properties {
//! `icu_properties` is a utility crate of the [`ICU4X`] project.
//!
//! This API provides necessary functionality for highly efficient querying of sets of Unicode characters.
//! This component provides definitions of [Unicode Properties] and APIs for
//! retrieving property data in an appropriate data structure.
//!
//! It is an implementation of the existing [ICU4C UnicodeSet API](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1UnicodeSet.html).
//! Currently, only binary property APIs are supported, with APIs that return
//! a [`UnicodeSet`]. See the [`sets`] module for more details.
//!
//! # Architecture
//! ICU4X `UnicodeSet` is split up into independent levels, with [`UnicodeSet`] representing the membership/query API,
//! and [`UnicodeSetBuilder`] representing the builder API. A [Properties API](http://userguide.icu-project.org/strings/properties)
//! is in future works.
//!
//! # Examples:
//!
//! ## Creating a `UnicodeSet`
//!
//! UnicodeSets are created from either serialized UnicodeSets,
//! represented by [inversion lists](http://userguide.icu-project.org/strings/properties),
//! the [`UnicodeSetBuilder`], or from the TBA Properties API.
//!
//! ```
//! use icu::uniset::{UnicodeSet, UnicodeSetBuilder};
//!
//! let mut builder = UnicodeSetBuilder::new();
//! builder.add_range(&('A'..'Z'));
//! let set: UnicodeSet = builder.build();
//!
//! assert!(set.contains('A'));
//! ```
//!
//! ## Querying a `UnicodeSet`
//!
//! Currently, you can check if a character/range of characters exists in the UnicodeSet, or iterate through the characters.
//!
//! ```
//! use icu::uniset::{UnicodeSet, UnicodeSetBuilder};
//!
//! let mut builder = UnicodeSetBuilder::new();
//! builder.add_range(&('A'..'Z'));
//! let set: UnicodeSet = builder.build();
//!
//! assert!(set.contains('A'));
//! assert!(set.contains_range(&('A'..='C')));
//! assert_eq!(set.iter_chars().next(), Some('A'));
//! ```
pub use icu_uniset::*;
sffc marked this conversation as resolved.
Show resolved Hide resolved
//! [`ICU4X`]: ../icu/index.html
//! [Unicode Properties]: https://unicode-org.github.io/icu/userguide/strings/properties.html
//! [`UnicodeSet`]: ../../icu_uniset/struct.UnicodeSet.html
//! [`sets`]: sets
Comment on lines +379 to +382
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and below: Let's fix these crossrefs.

pub use icu_properties::*;
}
51 changes: 51 additions & 0 deletions components/properties/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# This file is part of ICU4X. For terms of use, please see the file
# called LICENSE at the top level of the ICU4X source tree
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

[package]
name = "icu_properties"
description = "Definitions for Unicode properties"
version = "0.3.0"
authors = ["The ICU4X Project Developers"]
edition = "2018"
readme = "README.md"
repository = "https://github.com/unicode-org/icu4x"
license-file = "LICENSE"
categories = ["internationalization"]
# Keep this in sync with other crates unless there are exceptions
include = [
"src/**/*",
"examples/**/*",
"benches/**/*",
"tests/**/*",
"Cargo.toml",
"LICENSE",
"README.md"
]

[package.metadata.cargo-all-features]
skip_optional_dependencies = true
# Bench feature gets tested separately and is only relevant for CI
denylist = ["bench"]

[package.metadata.docs.rs]
all-features = true

[dependencies]
icu_provider = { version = "0.3", path = "../../provider/core", features = ["macros"] }
icu_uniset = { version = "0.3", path = "../../utils/uniset", features = ["serde"] }
num_enum = { version = "0.5.4", default-features = false }
serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
zerovec = { version = "0.3", path = "../../utils/zerovec", features = ["serde"] }

[dev-dependencies]
icu = { path = "../../components/icu", default-features = false }

[lib]
bench = false # This option is required for Benchmark CI
path = "src/lib.rs"

[features]
std = ["icu_provider/std"]
default = ["provider_serde"]
provider_serde = ["serde"]
File renamed without changes.
18 changes: 18 additions & 0 deletions components/properties/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# icu_properties [![crates.io](http://meritbadge.herokuapp.com/icu_properties)](https://crates.io/crates/icu_properties)

`icu_properties` is a utility crate of the [`ICU4X`] project.

This component provides definitions of [Unicode Properties] and APIs for
retrieving property data in an appropriate data structure.

Currently, only binary property APIs are supported, with APIs that return
a [`UnicodeSet`]. See the [`sets`] module for more details.

[`ICU4X`]: ../icu/index.html
[Unicode Properties]: https://unicode-org.github.io/icu/userguide/strings/properties.html
[`UnicodeSet`]: icu_uniset::UnicodeSet
[`sets`]: crate::sets

## More Information

For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
25 changes: 25 additions & 0 deletions components/properties/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

//! `icu_properties` is a utility crate of the [`ICU4X`] project.
//!
//! This component provides definitions of [Unicode Properties] and APIs for
//! retrieving property data in an appropriate data structure.
//!
//! Currently, only binary property APIs are supported, with APIs that return
//! a [`UnicodeSet`]. See the [`sets`] module for more details.
//!
//! [`ICU4X`]: ../icu/index.html
//! [Unicode Properties]: https://unicode-org.github.io/icu/userguide/strings/properties.html
//! [`UnicodeSet`]: icu_uniset::UnicodeSet
//! [`sets`]: crate::sets

#![no_std]

mod props;
pub mod provider;
pub mod sets;
mod ule;

pub use props::*;
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use num_enum::{TryFromPrimitive, UnsafeFromPrimitive};

/// Selection constants for Unicode properties.
/// These constants are used to select one of the Unicode properties.
/// See UProperty in ICU4C.
/// See `UProperty` in ICU4C.
#[derive(Clone, PartialEq, Debug)]
#[non_exhaustive]
pub enum EnumeratedProperty {
Expand All @@ -21,8 +21,8 @@ pub enum EnumeratedProperty {
}

/// Enumerated Unicode general category types.
/// GeneralSubcategory only supports specific subcategories (eg UppercaseLetter).
/// It does not support grouped categories (eg Letter). For grouped categories, use GeneralCategory.
/// GeneralSubcategory only supports specific subcategories (eg `UppercaseLetter`).
/// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategory`].
Comment on lines +24 to +25
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Praise: good catch

#[derive(Copy, Clone, PartialEq, Debug, TryFromPrimitive, UnsafeFromPrimitive)]
#[repr(u8)]
pub enum GeneralSubcategory {
Expand Down Expand Up @@ -99,8 +99,8 @@ pub enum GeneralSubcategory {
/// The discriminants correspond to the U_GC_XX_MASK constants in ICU4C.
/// Unlike GeneralSubcategory, this supports groups of general categories: for example, `Letter`
/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc...
/// See https://www.unicode.org/reports/tr44/ .
/// See UCharCategory and U_GET_GC_MASK in ICU4C.
/// See <https://www.unicode.org/reports/tr44/> .
/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
#[derive(Copy, Clone, PartialEq, Debug, Eq)]
#[repr(transparent)]
pub struct GeneralCategory(pub(crate) u32);
Expand Down Expand Up @@ -231,7 +231,7 @@ impl From<GeneralSubcategory> for GeneralCategory {
/// a particular subset of scripts will be in more than one Script_Extensions set.
/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
/// Script_Extensions set for Dogra, Kaithi, and Mahajani.
/// For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
/// See UScriptCode in ICU4C.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[repr(transparent)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
//!
//! Read more about data providers: [`icu_provider`]

use crate::builder::UnicodeSetBuilder;
use crate::uniset::UnicodeSet;
use icu_provider::yoke::{self, *};
use icu_uniset::UnicodeSet;
use icu_uniset::UnicodeSetBuilder;

//
// resource key structs - the structs used directly by users of data provider
Expand Down
Loading