From d83a0e390387ce9be1aa083a83be6a2764ee7861 Mon Sep 17 00:00:00 2001 From: Iain Ireland Date: Wed, 6 Oct 2021 17:03:53 -0700 Subject: [PATCH 1/9] Rename TrieTypeEnum to TrieType TrieType no longer exists, so we don't need an awkward name for TrieTypeEnum. --- utils/codepointtrie/src/codepointtrie.rs | 18 +++++++++--------- utils/codepointtrie/src/planes.rs | 2 +- utils/codepointtrie/tests/planes_test.rs | 2 +- utils/codepointtrie/tests/test_util.rs | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/utils/codepointtrie/src/codepointtrie.rs b/utils/codepointtrie/src/codepointtrie.rs index 8c405bfe1c3..e6c15acc9b5 100644 --- a/utils/codepointtrie/src/codepointtrie.rs +++ b/utils/codepointtrie/src/codepointtrie.rs @@ -27,7 +27,7 @@ pub enum ValueWidthEnum { /// See [`UCPTrieType`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C. #[derive(Clone, Copy, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub enum TrieTypeEnum { +pub enum TrieType { /// Represents the "fast" type code point tries for the /// [`TrieType`] trait. The "fast max" limit is set to `0xffff`. Fast = 0, @@ -130,16 +130,16 @@ pub struct CodePointTrieHeader { pub null_value: u32, /// The enum value representing the type of trie, where trie type is as it /// is defined in ICU (ex: Fast, Small). - pub trie_type: TrieTypeEnum, + pub trie_type: TrieType, } -impl TryFrom for TrieTypeEnum { +impl TryFrom for TrieType { type Error = crate::error::Error; - fn try_from(trie_type_int: u8) -> Result { + fn try_from(trie_type_int: u8) -> Result { match trie_type_int { - 0 => Ok(TrieTypeEnum::Fast), - 1 => Ok(TrieTypeEnum::Small), + 0 => Ok(TrieType::Fast), + 1 => Ok(TrieType::Small), _ => Err(crate::error::Error::FromDeserialized { reason: "Cannot parse value for trie_type", }), @@ -183,7 +183,7 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> { fn internal_small_index(&self, code_point: u32) -> u32 { let mut index1_pos: u32 = code_point >> SHIFT_1; - if self.header.trie_type == TrieTypeEnum::Fast { + if self.header.trie_type == TrieType::Fast { debug_assert!( FAST_TYPE_FAST_INDEXING_MAX < code_point && code_point < self.header.high_start ); @@ -296,8 +296,8 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> { // thus only need 2 lookups for a [CodePointTrie::get()](`crate::codepointtrie::CodePointTrie::get`). // Code points above the "fast max" limit require 4 lookups. let fast_max = match self.header.trie_type { - TrieTypeEnum::Fast => FAST_TYPE_FAST_INDEXING_MAX, - TrieTypeEnum::Small => SMALL_TYPE_FAST_INDEXING_MAX, + TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX, + TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX, }; let data_pos: u32 = if code_point <= fast_max { Self::fast_index(self, code_point) diff --git a/utils/codepointtrie/src/planes.rs b/utils/codepointtrie/src/planes.rs index 06e8e816d3a..c4bd4706953 100644 --- a/utils/codepointtrie/src/planes.rs +++ b/utils/codepointtrie/src/planes.rs @@ -176,7 +176,7 @@ pub fn get_planes_trie() -> CodePointTrie<'static, u8> { let index3_null_offset = 0x2; let data_null_offset = 0x0; let null_value = 0x0; - let trie_type = TrieTypeEnum::Small; + let trie_type = TrieType::Small; let trie_header = CodePointTrieHeader { high_start, diff --git a/utils/codepointtrie/tests/planes_test.rs b/utils/codepointtrie/tests/planes_test.rs index 25b8158a24d..d396d73fe8b 100644 --- a/utils/codepointtrie/tests/planes_test.rs +++ b/utils/codepointtrie/tests/planes_test.rs @@ -41,7 +41,7 @@ fn planes_trie_deserialize_check_test() { let code_point_trie_struct = planes_enum_prop.code_point_trie.trie_struct; - let trie_type_enum = match TrieTypeEnum::try_from(code_point_trie_struct.trie_type_enum_val) { + let trie_type_enum = match TrieType::try_from(code_point_trie_struct.trie_type_enum_val) { Ok(enum_val) => enum_val, _ => { panic!( diff --git a/utils/codepointtrie/tests/test_util.rs b/utils/codepointtrie/tests/test_util.rs index 8b4db2ab8df..eb1205ff1b6 100644 --- a/utils/codepointtrie/tests/test_util.rs +++ b/utils/codepointtrie/tests/test_util.rs @@ -152,7 +152,7 @@ pub fn run_deserialize_test_from_test_data(test_file_path: &str) { test_struct.name ); - let trie_type_enum = match TrieTypeEnum::try_from(test_struct.trie_type_enum_val) { + let trie_type_enum = match TrieType::try_from(test_struct.trie_type_enum_val) { Ok(enum_val) => enum_val, _ => { panic!( From 6b64fdb2f4b0d5c4cf5b521ffb08486ec3b75a13 Mon Sep 17 00:00:00 2001 From: Iain Ireland Date: Thu, 7 Oct 2021 15:10:22 -0700 Subject: [PATCH 2/9] Implement Yokeable/ZeroCopyFrom for CodePointTrie and data struct --- Cargo.lock | 1 + utils/codepointtrie/Cargo.toml | 3 +- utils/codepointtrie/src/codepointtrie.rs | 86 +++++++++--------------- utils/codepointtrie/src/lib.rs | 1 + utils/codepointtrie/src/provider.rs | 76 +++++++++++++++++++++ utils/codepointtrie/tests/test_util.rs | 12 +++- 6 files changed, 123 insertions(+), 56 deletions(-) create mode 100644 utils/codepointtrie/src/provider.rs diff --git a/Cargo.lock b/Cargo.lock index 9284e29e794..5e5c0c918e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1072,6 +1072,7 @@ dependencies = [ name = "icu_codepointtrie" version = "0.2.0" dependencies = [ + "icu_provider", "postcard", "serde", "thiserror", diff --git a/utils/codepointtrie/Cargo.toml b/utils/codepointtrie/Cargo.toml index 6a3cb19b8f1..be462b8fc18 100644 --- a/utils/codepointtrie/Cargo.toml +++ b/utils/codepointtrie/Cargo.toml @@ -32,9 +32,10 @@ denylist = ["bench"] all-features = true [dependencies] +icu_provider = { version = "0.3", path = "../../provider/core", features = ["macros"] } serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } thiserror = "1.0" -zerovec = { version = "0.3", path = "../../utils/zerovec", features = ["serde"] } +zerovec = { version = "0.3", path = "../../utils/zerovec", features = ["serde", "yoke"] } [dev-dependencies] postcard = { version = "0.7", features = ["alloc"] } diff --git a/utils/codepointtrie/src/codepointtrie.rs b/utils/codepointtrie/src/codepointtrie.rs index e6c15acc9b5..b2ff54f687d 100644 --- a/utils/codepointtrie/src/codepointtrie.rs +++ b/utils/codepointtrie/src/codepointtrie.rs @@ -9,18 +9,7 @@ use core::convert::TryFrom; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use zerovec::ZeroVec; - -// Enums - -/// The width of the elements in the data array of a [`CodePointTrie`]. -/// See [`UCPTrieValueWidth`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C. -#[derive(Clone, Copy, PartialEq)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub enum ValueWidthEnum { - Bits16 = 0, - Bits32 = 1, - Bits8 = 2, -} +use icu_provider::yoke::ZeroCopyFrom; /// The type of trie represents whether the trie has an optimization that /// would make it small or fast. @@ -36,51 +25,26 @@ pub enum TrieType { Small = 1, } -// ValueWidth trait +// TrieValue trait // AsULE is AsUnalignedLittleEndian, i.e. "allowed in a zerovec" -/// A trait representing the width of the values stored in the data array of a -/// [`CodePointTrie`]. This trait is used as a type parameter in constructing -/// a `CodePointTrie`. -pub trait ValueWidth: Copy + zerovec::ule::AsULE + 'static { - /// This enum variant represents the specific instance of `ValueWidth` such - /// that the enum discriminant values matches ICU4C's enum integer value. - const ENUM_VALUE: ValueWidthEnum; - /// This value is used to indicate an error in the Rust code in accessing - /// a position in the trie's `data` array. In normal cases, the position in - /// the `data` array will return either the correct value, or in case of a - /// logical error in the trie's computation, the trie's own error value - /// which is stored that in the `data` array. +/// A trait representing the values stored in the data array of a [`CodePointTrie`]. +/// This trait is used as a type parameter in constructing a `CodePointTrie`. +pub trait TrieValue: Copy + zerovec::ule::AsULE + 'static { const DATA_GET_ERROR_VALUE: Self; - fn cast_to_widest(self) -> u32; } -impl ValueWidth for u8 { - const ENUM_VALUE: ValueWidthEnum = ValueWidthEnum::Bits8; +impl TrieValue for u8 { const DATA_GET_ERROR_VALUE: u8 = u8::MAX; - - fn cast_to_widest(self) -> u32 { - self as u32 - } } -impl ValueWidth for u16 { - const ENUM_VALUE: ValueWidthEnum = ValueWidthEnum::Bits16; +impl TrieValue for u16 { const DATA_GET_ERROR_VALUE: u16 = u16::MAX; - - fn cast_to_widest(self) -> u32 { - self as u32 - } } -impl ValueWidth for u32 { - const ENUM_VALUE: ValueWidthEnum = ValueWidthEnum::Bits32; +impl TrieValue for u32 { const DATA_GET_ERROR_VALUE: u32 = u32::MAX; - - fn cast_to_widest(self) -> u32 { - self - } } /// This struct represents a de-serialized CodePointTrie that was exported from @@ -90,16 +54,17 @@ impl ValueWidth for u32 { /// - [ICU Site design doc](http://site.icu-project.org/design/struct/utrie) /// - [ICU User Guide section on Properties lookup](https://unicode-org.github.io/icu/userguide/strings/properties.html#lookup) #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct CodePointTrie<'trie, W: ValueWidth> { +pub struct CodePointTrie<'trie, T: TrieValue> { header: CodePointTrieHeader, #[cfg_attr(feature = "serde", serde(borrow))] index: ZeroVec<'trie, u16>, #[cfg_attr(feature = "serde", serde(borrow))] - data: ZeroVec<'trie, W>, + data: ZeroVec<'trie, T>, } /// This struct contains the fixed-length header fields of a [`CodePointTrie`]. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Copy,Clone)] pub struct CodePointTrieHeader { /// The code point of the start of the last range of the trie. A /// range is defined as a partition of the code point space such that the @@ -147,14 +112,14 @@ impl TryFrom for TrieType { } } -impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> { +impl<'trie, T: TrieValue> CodePointTrie<'trie, T> { /// Returns a new [`CodePointTrie`] backed by borrowed data for the `index` /// array and `data` array, whose data values have width `W`. pub fn try_new( header: CodePointTrieHeader, index: ZeroVec<'trie, u16>, - data: ZeroVec<'trie, W>, - ) -> Result, Error> { + data: ZeroVec<'trie, T>, + ) -> Result, Error> { // Validation invariants are not needed here when constructing a new // `CodePointTrie` because: // @@ -167,7 +132,7 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> { // - The `ZeroVec` serializer stores the length of the array along with the // ZeroVec data, meaning that a deserializer would also see that length info. - let trie: CodePointTrie<'trie, W> = CodePointTrie { + let trie: CodePointTrie<'trie, T> = CodePointTrie { header, index, data, @@ -290,7 +255,7 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> { /// assert_eq!(0, trie.get(0x13E0)); // 'Ꮰ' as u32 /// assert_eq!(1, trie.get(0x10044)); // '𐁄' as u32 /// ``` - pub fn get(&self, code_point: u32) -> W { + pub fn get(&self, code_point: u32) -> T { // All code points up to the fast max limit are represented // individually in the `index` array to hold their `data` array position, and // thus only need 2 lookups for a [CodePointTrie::get()](`crate::codepointtrie::CodePointTrie::get`). @@ -308,12 +273,14 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> { }; // Returns the trie value (or trie's error value). // If we cannot read from the data array, then return the associated constant - // DATA_GET_ERROR_VALUE for the instance type for W: ValueWidth. + // DATA_GET_ERROR_VALUE for the instance type for T: TrieValue. self.data .get(data_pos as usize) - .unwrap_or(W::DATA_GET_ERROR_VALUE) + .unwrap_or(T::DATA_GET_ERROR_VALUE) } +} +impl<'trie, T: TrieValue + Into> CodePointTrie<'trie, T> { /// Returns the value that is associated with `code_point` for this [`CodePointTrie`] /// as a `u32`. /// @@ -333,7 +300,18 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> { // Note: This API method maintains consistency with the corresponding // original ICU APIs. pub fn get_u32(&self, code_point: u32) -> u32 { - self.get(code_point).cast_to_widest() + self.get(code_point).into() + } +} + +impl<'a, T: TrieValue> ZeroCopyFrom> for CodePointTrie<'static, T> +{ + fn zero_copy_from<'b>(cart: &'b CodePointTrie<'a, T>) -> CodePointTrie<'b, T> { + CodePointTrie { + header: cart.header, + index: ZeroVec::<'static, u16>::zero_copy_from(&cart.index), + data: ZeroVec::<'static, T>::zero_copy_from(&cart.data) + } } } diff --git a/utils/codepointtrie/src/lib.rs b/utils/codepointtrie/src/lib.rs index 18c104ff904..357d35fe833 100644 --- a/utils/codepointtrie/src/lib.rs +++ b/utils/codepointtrie/src/lib.rs @@ -39,3 +39,4 @@ pub mod codepointtrie; pub mod error; mod impl_const; pub mod planes; +pub mod provider; diff --git a/utils/codepointtrie/src/provider.rs b/utils/codepointtrie/src/provider.rs new file mode 100644 index 00000000000..df32888e818 --- /dev/null +++ b/utils/codepointtrie/src/provider.rs @@ -0,0 +1,76 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::{mem,ptr}; + +use crate::codepointtrie::{CodePointTrie, TrieValue}; +use icu_provider::yoke::*; + +// Note: +// T: TrieValue is T: Copy + zerovec::ule::AsULE + 'static + +unsafe impl<'a, T: TrieValue> Yokeable<'a> for CodePointTrie<'static, T> { + type Output = CodePointTrie<'a, T>; + fn transform(&'a self) -> &'a Self::Output { + self + } + fn transform_owned(self) -> Self::Output { + self + } + unsafe fn make(from: Self::Output) -> Self { + debug_assert!(mem::size_of::() == mem::size_of::()); + let ptr: *const Self = (&from as *const Self::Output).cast(); + mem::forget(from); + ptr::read(ptr) + } + fn transform_mut(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output), + { + unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } + } +} + +pub struct UnicodePropertyMapV1<'data, T: TrieValue> { + pub codepoint_trie: CodePointTrie<'data, T>, +} + +unsafe impl<'a, T: TrieValue> Yokeable<'a> for UnicodePropertyMapV1<'static, T> { + type Output = UnicodePropertyMapV1<'a, T>; + fn transform(&'a self) -> &'a Self::Output { + self + } + fn transform_owned(self) -> Self::Output { + self + } + unsafe fn make(from: Self::Output) -> Self { + debug_assert!(mem::size_of::() == mem::size_of::()); + let ptr: *const Self = (&from as *const Self::Output).cast(); + mem::forget(from); + ptr::read(ptr) + } + fn transform_mut(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output), + { + unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } + } +} + +impl<'a, T: TrieValue> ZeroCopyFrom> for UnicodePropertyMapV1<'static,T> { + fn zero_copy_from<'b>(cart: &'b UnicodePropertyMapV1<'a, T>) -> UnicodePropertyMapV1<'b,T> { + UnicodePropertyMapV1 { + codepoint_trie: CodePointTrie::<'static, T>::zero_copy_from(&cart.codepoint_trie) + } + } +} + +pub struct UnicodePropertyMapV1Marker { + _phantom: core::marker::PhantomData +} + +impl<'data, T: TrieValue> icu_provider::DataMarker<'data> for UnicodePropertyMapV1Marker { + type Yokeable = UnicodePropertyMapV1<'static, T>; + type Cart = UnicodePropertyMapV1<'data, T>; +} diff --git a/utils/codepointtrie/tests/test_util.rs b/utils/codepointtrie/tests/test_util.rs index eb1205ff1b6..1c7ec5fbe15 100644 --- a/utils/codepointtrie/tests/test_util.rs +++ b/utils/codepointtrie/tests/test_util.rs @@ -11,7 +11,17 @@ use std::io::Read; use std::path::Path; use zerovec::ZeroVec; -pub fn check_trie(trie: &CodePointTrie, check_ranges: &[u32]) { +/// The width of the elements in the data array of a [`CodePointTrie`]. +/// See [`UCPTrieValueWidth`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C. +#[derive(Clone, Copy, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum ValueWidthEnum { + Bits16 = 0, + Bits32 = 1, + Bits8 = 2, +} + +pub fn check_trie>(trie: &CodePointTrie, check_ranges: &[u32]) { assert_eq!( 0, check_ranges.len() % 2, From 2e79cef5f7712d86cb81e0b728a06f5e3f721821 Mon Sep 17 00:00:00 2001 From: Iain Ireland Date: Thu, 7 Oct 2021 15:41:03 -0700 Subject: [PATCH 3/9] Cargo fmt + minor fixes --- utils/codepointtrie/src/codepointtrie.rs | 9 ++++----- utils/codepointtrie/src/provider.rs | 12 +++++++----- utils/codepointtrie/tests/test_util.rs | 2 ++ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/utils/codepointtrie/src/codepointtrie.rs b/utils/codepointtrie/src/codepointtrie.rs index b2ff54f687d..10144daf2d8 100644 --- a/utils/codepointtrie/src/codepointtrie.rs +++ b/utils/codepointtrie/src/codepointtrie.rs @@ -6,10 +6,10 @@ use crate::error::Error; use crate::impl_const::*; use core::convert::TryFrom; +use icu_provider::yoke::ZeroCopyFrom; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use zerovec::ZeroVec; -use icu_provider::yoke::ZeroCopyFrom; /// The type of trie represents whether the trie has an optimization that /// would make it small or fast. @@ -64,7 +64,7 @@ pub struct CodePointTrie<'trie, T: TrieValue> { /// This struct contains the fixed-length header fields of a [`CodePointTrie`]. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Copy,Clone)] +#[derive(Copy, Clone)] pub struct CodePointTrieHeader { /// The code point of the start of the last range of the trie. A /// range is defined as a partition of the code point space such that the @@ -304,13 +304,12 @@ impl<'trie, T: TrieValue + Into> CodePointTrie<'trie, T> { } } -impl<'a, T: TrieValue> ZeroCopyFrom> for CodePointTrie<'static, T> -{ +impl<'a, T: TrieValue> ZeroCopyFrom> for CodePointTrie<'static, T> { fn zero_copy_from<'b>(cart: &'b CodePointTrie<'a, T>) -> CodePointTrie<'b, T> { CodePointTrie { header: cart.header, index: ZeroVec::<'static, u16>::zero_copy_from(&cart.index), - data: ZeroVec::<'static, T>::zero_copy_from(&cart.data) + data: ZeroVec::<'static, T>::zero_copy_from(&cart.data), } } } diff --git a/utils/codepointtrie/src/provider.rs b/utils/codepointtrie/src/provider.rs index df32888e818..79337cb435c 100644 --- a/utils/codepointtrie/src/provider.rs +++ b/utils/codepointtrie/src/provider.rs @@ -2,7 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use core::{mem,ptr}; +use core::{mem, ptr}; use crate::codepointtrie::{CodePointTrie, TrieValue}; use icu_provider::yoke::*; @@ -58,16 +58,18 @@ unsafe impl<'a, T: TrieValue> Yokeable<'a> for UnicodePropertyMapV1<'static, T> } } -impl<'a, T: TrieValue> ZeroCopyFrom> for UnicodePropertyMapV1<'static,T> { - fn zero_copy_from<'b>(cart: &'b UnicodePropertyMapV1<'a, T>) -> UnicodePropertyMapV1<'b,T> { +impl<'a, T: TrieValue> ZeroCopyFrom> + for UnicodePropertyMapV1<'static, T> +{ + fn zero_copy_from<'b>(cart: &'b UnicodePropertyMapV1<'a, T>) -> UnicodePropertyMapV1<'b, T> { UnicodePropertyMapV1 { - codepoint_trie: CodePointTrie::<'static, T>::zero_copy_from(&cart.codepoint_trie) + codepoint_trie: CodePointTrie::<'static, T>::zero_copy_from(&cart.codepoint_trie), } } } pub struct UnicodePropertyMapV1Marker { - _phantom: core::marker::PhantomData + _phantom: core::marker::PhantomData, } impl<'data, T: TrieValue> icu_provider::DataMarker<'data> for UnicodePropertyMapV1Marker { diff --git a/utils/codepointtrie/tests/test_util.rs b/utils/codepointtrie/tests/test_util.rs index 1c7ec5fbe15..8b04276fa0f 100644 --- a/utils/codepointtrie/tests/test_util.rs +++ b/utils/codepointtrie/tests/test_util.rs @@ -6,6 +6,8 @@ use icu_codepointtrie::codepointtrie::*; use icu_codepointtrie::error::Error; use core::convert::TryFrom; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; use std::fs::File; use std::io::Read; use std::path::Path; From 106eb287691c1de0b92fc9c84a9376688ff5661a Mon Sep 17 00:00:00 2001 From: Iain Ireland Date: Wed, 13 Oct 2021 17:43:15 -0700 Subject: [PATCH 4/9] Rebase on yoke-generics --- utils/codepointtrie/src/codepointtrie.rs | 23 ++++----- utils/codepointtrie/src/provider.rs | 62 +----------------------- 2 files changed, 14 insertions(+), 71 deletions(-) diff --git a/utils/codepointtrie/src/codepointtrie.rs b/utils/codepointtrie/src/codepointtrie.rs index 10144daf2d8..0fe154331d2 100644 --- a/utils/codepointtrie/src/codepointtrie.rs +++ b/utils/codepointtrie/src/codepointtrie.rs @@ -6,7 +6,7 @@ use crate::error::Error; use crate::impl_const::*; use core::convert::TryFrom; -use icu_provider::yoke::ZeroCopyFrom; +use icu_provider::yoke::{self, Yokeable, ZeroCopyFrom}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use zerovec::ZeroVec; @@ -54,6 +54,7 @@ impl TrieValue for u32 { /// - [ICU Site design doc](http://site.icu-project.org/design/struct/utrie) /// - [ICU User Guide section on Properties lookup](https://unicode-org.github.io/icu/userguide/strings/properties.html#lookup) #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Yokeable, ZeroCopyFrom)] pub struct CodePointTrie<'trie, T: TrieValue> { header: CodePointTrieHeader, #[cfg_attr(feature = "serde", serde(borrow))] @@ -64,7 +65,7 @@ pub struct CodePointTrie<'trie, T: TrieValue> { /// This struct contains the fixed-length header fields of a [`CodePointTrie`]. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Yokeable, ZeroCopyFrom)] pub struct CodePointTrieHeader { /// The code point of the start of the last range of the trie. A /// range is defined as a partition of the code point space such that the @@ -304,15 +305,15 @@ impl<'trie, T: TrieValue + Into> CodePointTrie<'trie, T> { } } -impl<'a, T: TrieValue> ZeroCopyFrom> for CodePointTrie<'static, T> { - fn zero_copy_from<'b>(cart: &'b CodePointTrie<'a, T>) -> CodePointTrie<'b, T> { - CodePointTrie { - header: cart.header, - index: ZeroVec::<'static, u16>::zero_copy_from(&cart.index), - data: ZeroVec::<'static, T>::zero_copy_from(&cart.data), - } - } -} +// impl<'a, T: TrieValue> ZeroCopyFrom> for CodePointTrie<'static, T> { +// fn zero_copy_from<'b>(cart: &'b CodePointTrie<'a, T>) -> CodePointTrie<'b, T> { +// CodePointTrie { +// header: cart.header, +// index: ZeroVec::<'static, u16>::zero_copy_from(&cart.index), +// data: ZeroVec::<'static, T>::zero_copy_from(&cart.data), +// } +// } +// } #[cfg(test)] mod tests { diff --git a/utils/codepointtrie/src/provider.rs b/utils/codepointtrie/src/provider.rs index 79337cb435c..3b753d4f5f8 100644 --- a/utils/codepointtrie/src/provider.rs +++ b/utils/codepointtrie/src/provider.rs @@ -2,72 +2,14 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use core::{mem, ptr}; - use crate::codepointtrie::{CodePointTrie, TrieValue}; -use icu_provider::yoke::*; - -// Note: -// T: TrieValue is T: Copy + zerovec::ule::AsULE + 'static - -unsafe impl<'a, T: TrieValue> Yokeable<'a> for CodePointTrie<'static, T> { - type Output = CodePointTrie<'a, T>; - fn transform(&'a self) -> &'a Self::Output { - self - } - fn transform_owned(self) -> Self::Output { - self - } - unsafe fn make(from: Self::Output) -> Self { - debug_assert!(mem::size_of::() == mem::size_of::()); - let ptr: *const Self = (&from as *const Self::Output).cast(); - mem::forget(from); - ptr::read(ptr) - } - fn transform_mut(&'a mut self, f: F) - where - F: 'static + for<'b> FnOnce(&'b mut Self::Output), - { - unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } - } -} +use icu_provider::yoke::{self, Yokeable, ZeroCopyFrom}; +#[derive(Yokeable, ZeroCopyFrom)] pub struct UnicodePropertyMapV1<'data, T: TrieValue> { pub codepoint_trie: CodePointTrie<'data, T>, } -unsafe impl<'a, T: TrieValue> Yokeable<'a> for UnicodePropertyMapV1<'static, T> { - type Output = UnicodePropertyMapV1<'a, T>; - fn transform(&'a self) -> &'a Self::Output { - self - } - fn transform_owned(self) -> Self::Output { - self - } - unsafe fn make(from: Self::Output) -> Self { - debug_assert!(mem::size_of::() == mem::size_of::()); - let ptr: *const Self = (&from as *const Self::Output).cast(); - mem::forget(from); - ptr::read(ptr) - } - fn transform_mut(&'a mut self, f: F) - where - F: 'static + for<'b> FnOnce(&'b mut Self::Output), - { - unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } - } -} - -impl<'a, T: TrieValue> ZeroCopyFrom> - for UnicodePropertyMapV1<'static, T> -{ - fn zero_copy_from<'b>(cart: &'b UnicodePropertyMapV1<'a, T>) -> UnicodePropertyMapV1<'b, T> { - UnicodePropertyMapV1 { - codepoint_trie: CodePointTrie::<'static, T>::zero_copy_from(&cart.codepoint_trie), - } - } -} - pub struct UnicodePropertyMapV1Marker { _phantom: core::marker::PhantomData, } From 008739640b4bd937ff3d3026396c56e8333e74e8 Mon Sep 17 00:00:00 2001 From: Iain Ireland Date: Wed, 13 Oct 2021 17:50:30 -0700 Subject: [PATCH 5/9] Add doc comments --- utils/codepointtrie/src/codepointtrie.rs | 1 + utils/codepointtrie/src/provider.rs | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/utils/codepointtrie/src/codepointtrie.rs b/utils/codepointtrie/src/codepointtrie.rs index 0fe154331d2..eecfab22122 100644 --- a/utils/codepointtrie/src/codepointtrie.rs +++ b/utils/codepointtrie/src/codepointtrie.rs @@ -32,6 +32,7 @@ pub enum TrieType { /// A trait representing the values stored in the data array of a [`CodePointTrie`]. /// This trait is used as a type parameter in constructing a `CodePointTrie`. pub trait TrieValue: Copy + zerovec::ule::AsULE + 'static { + /// The value to return if we cannot read data from the trie. const DATA_GET_ERROR_VALUE: Self; } diff --git a/utils/codepointtrie/src/provider.rs b/utils/codepointtrie/src/provider.rs index 3b753d4f5f8..1d2eb6d9d98 100644 --- a/utils/codepointtrie/src/provider.rs +++ b/utils/codepointtrie/src/provider.rs @@ -2,14 +2,22 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +//! Data provider struct definitions for this ICU4X component. +//! +//! Read more about data providers: [`icu_provider`] + use crate::codepointtrie::{CodePointTrie, TrieValue}; use icu_provider::yoke::{self, Yokeable, ZeroCopyFrom}; +/// A map efficiently storing data about individual characters. #[derive(Yokeable, ZeroCopyFrom)] pub struct UnicodePropertyMapV1<'data, T: TrieValue> { + /// A codepoint trie storing the data pub codepoint_trie: CodePointTrie<'data, T>, } +/// Marker type for UnicodePropertyMapV1. +/// This is generated by hand because icu_provider::data_struct doesn't support generics yet. pub struct UnicodePropertyMapV1Marker { _phantom: core::marker::PhantomData, } From 18f2556b1a521c1a255d8c7e7c64dd3c84b98852 Mon Sep 17 00:00:00 2001 From: Iain Ireland Date: Mon, 18 Oct 2021 11:14:10 -0700 Subject: [PATCH 6/9] Address feedback --- utils/codepointtrie/src/codepointtrie.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/utils/codepointtrie/src/codepointtrie.rs b/utils/codepointtrie/src/codepointtrie.rs index eecfab22122..2977d9bd3f2 100644 --- a/utils/codepointtrie/src/codepointtrie.rs +++ b/utils/codepointtrie/src/codepointtrie.rs @@ -306,16 +306,6 @@ impl<'trie, T: TrieValue + Into> CodePointTrie<'trie, T> { } } -// impl<'a, T: TrieValue> ZeroCopyFrom> for CodePointTrie<'static, T> { -// fn zero_copy_from<'b>(cart: &'b CodePointTrie<'a, T>) -> CodePointTrie<'b, T> { -// CodePointTrie { -// header: cart.header, -// index: ZeroVec::<'static, u16>::zero_copy_from(&cart.index), -// data: ZeroVec::<'static, T>::zero_copy_from(&cart.data), -// } -// } -// } - #[cfg(test)] mod tests { #[cfg(feature = "serde")] From ac13151306dd6cb4c0be86052e0c079d53159c13 Mon Sep 17 00:00:00 2001 From: Iain Ireland Date: Mon, 18 Oct 2021 13:42:41 -0700 Subject: [PATCH 7/9] Add additional derives --- utils/codepointtrie/Cargo.toml | 4 ++++ utils/codepointtrie/src/codepointtrie.rs | 19 +++++++++++++++---- utils/codepointtrie/src/provider.rs | 16 +++++++++++++++- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/utils/codepointtrie/Cargo.toml b/utils/codepointtrie/Cargo.toml index be462b8fc18..fbc10c48632 100644 --- a/utils/codepointtrie/Cargo.toml +++ b/utils/codepointtrie/Cargo.toml @@ -46,3 +46,7 @@ zerovec = { version = "0.3", path = "../../utils/zerovec", features = ["serde"] [lib] bench = false # This option is required for Benchmark CI path = "src/lib.rs" + +[features] +default = ["provider_serde"] +provider_serde = ["serde"] diff --git a/utils/codepointtrie/src/codepointtrie.rs b/utils/codepointtrie/src/codepointtrie.rs index 2977d9bd3f2..ff5ec4d6460 100644 --- a/utils/codepointtrie/src/codepointtrie.rs +++ b/utils/codepointtrie/src/codepointtrie.rs @@ -14,7 +14,7 @@ use zerovec::ZeroVec; /// The type of trie represents whether the trie has an optimization that /// would make it small or fast. /// See [`UCPTrieType`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C. -#[derive(Clone, Copy, PartialEq)] +#[derive(Clone, Copy, PartialEq, Debug, Eq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum TrieType { /// Represents the "fast" type code point tries for the @@ -31,7 +31,7 @@ pub enum TrieType { /// A trait representing the values stored in the data array of a [`CodePointTrie`]. /// This trait is used as a type parameter in constructing a `CodePointTrie`. -pub trait TrieValue: Copy + zerovec::ule::AsULE + 'static { +pub trait TrieValue: Copy + Eq + PartialEq + zerovec::ule::AsULE + 'static { /// The value to return if we cannot read data from the trie. const DATA_GET_ERROR_VALUE: Self; } @@ -55,7 +55,7 @@ impl TrieValue for u32 { /// - [ICU Site design doc](http://site.icu-project.org/design/struct/utrie) /// - [ICU User Guide section on Properties lookup](https://unicode-org.github.io/icu/userguide/strings/properties.html#lookup) #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Yokeable, ZeroCopyFrom)] +#[derive(Debug, Eq, PartialEq, Yokeable, ZeroCopyFrom)] pub struct CodePointTrie<'trie, T: TrieValue> { header: CodePointTrieHeader, #[cfg_attr(feature = "serde", serde(borrow))] @@ -66,7 +66,7 @@ pub struct CodePointTrie<'trie, T: TrieValue> { /// This struct contains the fixed-length header fields of a [`CodePointTrie`]. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Copy, Clone, Yokeable, ZeroCopyFrom)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Yokeable, ZeroCopyFrom)] pub struct CodePointTrieHeader { /// The code point of the start of the last range of the trie. A /// range is defined as a partition of the code point space such that the @@ -306,6 +306,17 @@ impl<'trie, T: TrieValue + Into> CodePointTrie<'trie, T> { } } +impl<'trie, T: TrieValue> Clone for CodePointTrie<'trie, T> +where ::ULE: Clone { + fn clone(&self) -> Self { + CodePointTrie { + header: self.header, + index: self.index.clone(), + data: self.data.clone(), + } + } +} + #[cfg(test)] mod tests { #[cfg(feature = "serde")] diff --git a/utils/codepointtrie/src/provider.rs b/utils/codepointtrie/src/provider.rs index 1d2eb6d9d98..3fff25a6e79 100644 --- a/utils/codepointtrie/src/provider.rs +++ b/utils/codepointtrie/src/provider.rs @@ -10,12 +10,26 @@ use crate::codepointtrie::{CodePointTrie, TrieValue}; use icu_provider::yoke::{self, Yokeable, ZeroCopyFrom}; /// A map efficiently storing data about individual characters. -#[derive(Yokeable, ZeroCopyFrom)] +#[derive(Debug, Eq, PartialEq, Yokeable, ZeroCopyFrom)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub struct UnicodePropertyMapV1<'data, T: TrieValue> { /// A codepoint trie storing the data + #[cfg_attr(feature = "provider_serde", serde(borrow))] pub codepoint_trie: CodePointTrie<'data, T>, } +impl<'data, T: TrieValue> Clone for UnicodePropertyMapV1<'data, T> +where ::ULE: Clone { + fn clone(&self) -> Self { + UnicodePropertyMapV1 { + codepoint_trie: self.codepoint_trie.clone(), + } + } +} + /// Marker type for UnicodePropertyMapV1. /// This is generated by hand because icu_provider::data_struct doesn't support generics yet. pub struct UnicodePropertyMapV1Marker { From f9b74ca5e24e653a490466df9fb7d536f6002802 Mon Sep 17 00:00:00 2001 From: iainireland Date: Mon, 18 Oct 2021 13:49:19 -0700 Subject: [PATCH 8/9] Update comment on DATA_GET_ERROR_VALUE Co-authored-by: Shane F. Carr --- utils/codepointtrie/src/codepointtrie.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/codepointtrie/src/codepointtrie.rs b/utils/codepointtrie/src/codepointtrie.rs index ff5ec4d6460..1cd8fb6d795 100644 --- a/utils/codepointtrie/src/codepointtrie.rs +++ b/utils/codepointtrie/src/codepointtrie.rs @@ -32,7 +32,9 @@ pub enum TrieType { /// A trait representing the values stored in the data array of a [`CodePointTrie`]. /// This trait is used as a type parameter in constructing a `CodePointTrie`. pub trait TrieValue: Copy + Eq + PartialEq + zerovec::ule::AsULE + 'static { - /// The value to return if we cannot read data from the trie. + /// Last-resort fallback value to return if we cannot read data from the trie. + /// + /// In most cases, the error value is read from the last element of the `data` array. const DATA_GET_ERROR_VALUE: Self; } From 57ac8edd08214e7e5170f74e5e88451d450d2452 Mon Sep 17 00:00:00 2001 From: Iain Ireland Date: Mon, 18 Oct 2021 13:52:51 -0700 Subject: [PATCH 9/9] Cargo fmt --- utils/codepointtrie/src/codepointtrie.rs | 4 +++- utils/codepointtrie/src/provider.rs | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/utils/codepointtrie/src/codepointtrie.rs b/utils/codepointtrie/src/codepointtrie.rs index 1cd8fb6d795..78d10bb30b6 100644 --- a/utils/codepointtrie/src/codepointtrie.rs +++ b/utils/codepointtrie/src/codepointtrie.rs @@ -309,7 +309,9 @@ impl<'trie, T: TrieValue + Into> CodePointTrie<'trie, T> { } impl<'trie, T: TrieValue> Clone for CodePointTrie<'trie, T> -where ::ULE: Clone { +where + ::ULE: Clone, +{ fn clone(&self) -> Self { CodePointTrie { header: self.header, diff --git a/utils/codepointtrie/src/provider.rs b/utils/codepointtrie/src/provider.rs index 3fff25a6e79..1909da5c796 100644 --- a/utils/codepointtrie/src/provider.rs +++ b/utils/codepointtrie/src/provider.rs @@ -22,7 +22,9 @@ pub struct UnicodePropertyMapV1<'data, T: TrieValue> { } impl<'data, T: TrieValue> Clone for UnicodePropertyMapV1<'data, T> -where ::ULE: Clone { +where + ::ULE: Clone, +{ fn clone(&self) -> Self { UnicodePropertyMapV1 { codepoint_trie: self.codepoint_trie.clone(),