-
Notifications
You must be signed in to change notification settings - Fork 184
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement Short Compact Currency Formatter Provider and Populate Associated Data #5361
Changes from 3 commits
954e434
307d2a4
164850d
8f223e5
65d2db8
1626fe4
603a09b
7e2a7a8
0e7d9ac
8574cdf
aeb7bc1
1b09d29
c72bd00
f7bad7a
9ca41bf
234223f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use zerovec::ule::{AsULE, ZeroVecError, ULE}; | ||
|
||
use super::currency_compact::{CompactCount, Count}; | ||
|
||
/// [`CompactCountULE`] is a type optimized for efficient storing and | ||
/// deserialization of [`CompactCount`] using the `ZeroVec` model. | ||
/// | ||
/// The serialization model packages the pattern item in one byte. | ||
/// | ||
/// The first bit (b7) is used to determine count_type. | ||
/// If the bit is `0`, then, then the type is `Standard`. | ||
/// If the bit is `1`, then, then the type is `AlphaNextToNumber`. | ||
/// | ||
/// The last three bits (b2, b1 & b0), are used to determine the count: | ||
/// 000 -> Count::Zero | ||
/// 001 -> Count::One | ||
/// 010 -> Count::Two | ||
/// 011 -> Count::Few | ||
/// 100 -> Count::Many | ||
/// 101 -> Count::Other | ||
/// | ||
/// The other bits (b6,b5,b4,b3) must always be zeros. | ||
#[derive(Copy, Clone, Debug, PartialEq)] | ||
#[repr(transparent)] | ||
pub struct CompactCountULE(u8); | ||
|
||
// Safety (based on the safety checklist on the ULE trait): | ||
// 1. CompactCountULE does not include any uninitialized or padding bytes. | ||
// (achieved by `#[repr(transparent)]` on a ULE type) | ||
// 2. CompactCountULE is aligned to 1 byte. | ||
// (achieved by `#[repr(transparent)]` on a ULE type) | ||
// 3. The impl of validate_byte_slice() returns an error if any byte is not valid. | ||
// 4. The impl of validate_byte_slice() returns an error if there are extra bytes. | ||
// 5. The other ULE methods use the default impl. | ||
// 6. CompactCountULE byte equality is semantic equality. | ||
unsafe impl ULE for CompactCountULE { | ||
fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { | ||
for byte in bytes { | ||
if byte & 0b0111_1000 != 0 { | ||
return Err(ZeroVecError::parse::<Self>()); | ||
} | ||
|
||
if byte & 0b0000_0111 > 5 { | ||
return Err(ZeroVecError::parse::<Self>()); | ||
} | ||
} | ||
|
||
Ok(()) | ||
} | ||
} | ||
|
||
impl AsULE for CompactCount { | ||
type ULE = CompactCountULE; | ||
fn to_unaligned(self) -> Self::ULE { | ||
CompactCountULE(match self { | ||
CompactCount::Standard(count) => count as u8, | ||
CompactCount::AlphaNextToNumber(count) => (count as u8) | 0b1000_0000, | ||
}) | ||
} | ||
|
||
#[inline] | ||
fn from_unaligned(unaligned: Self::ULE) -> Self { | ||
let count = match unaligned.0 & 0b0000_0111 { | ||
0 => Count::Zero, | ||
1 => Count::One, | ||
2 => Count::Two, | ||
3 => Count::Few, | ||
4 => Count::Many, | ||
5 => Count::Other, | ||
_ => unreachable!(), | ||
}; | ||
match unaligned.0 & 0b1000_0000 { | ||
0 => CompactCount::Standard(count), | ||
0b1000_0000 => CompactCount::AlphaNextToNumber(count), | ||
_ => unreachable!(), | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,7 @@ | |
//! Read more about data providers: [`icu_provider`] | ||
|
||
use icu_provider::prelude::*; | ||
use zerovec::ZeroMap2d; | ||
use zerovec::ZeroMap; | ||
|
||
/// Currency Compact V1 data struct. | ||
#[icu_provider::data_struct(marker(ShortCurrencyCompactV1Marker, "currency/compact@1"))] | ||
|
@@ -33,11 +33,9 @@ pub struct ShortCurrencyCompactV1<'data> { | |
/// `"1000-count-one-alt-alphaNextToNumber": "¤ 0K"` | ||
/// -> key1 = 3, key2 = CompactCount::OneAlt, value = "¤ 0K" | ||
#[cfg_attr(feature = "serde", serde(borrow))] | ||
pub compact_patterns: ZeroMap2d<'data, i8, CompactCount, str>, | ||
pub compact_patterns: ZeroMap<'data, (i8, CompactCount), str>, | ||
} | ||
|
||
#[zerovec::make_ule(CompactCountULE)] | ||
#[zerovec::derive(Debug)] | ||
#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Debug)] | ||
#[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
#[cfg_attr( | ||
|
@@ -46,56 +44,35 @@ pub struct ShortCurrencyCompactV1<'data> { | |
databake(path = icu_experimental::dimension::provider::currency_compact) | ||
)] | ||
#[repr(u8)] | ||
pub enum CompactCount { | ||
pub enum Count { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use an existing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done and #5373 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you just moved it? |
||
/// CompactPattern `zero`. | ||
Zero = 0, | ||
/// Compact Pattern `zero` alternative. | ||
ZeroAlt = 1, | ||
|
||
/// CompactPattern `one`. | ||
One = 2, | ||
/// Compact Pattern `one` alternative. | ||
OneAlt = 3, | ||
One = 1, | ||
|
||
/// CompactPattern `two`. | ||
Two = 4, | ||
/// Compact Pattern `two` alternative. | ||
TwoAlt = 5, | ||
Two = 2, | ||
|
||
/// Compact Pattern `few`. | ||
Few = 6, | ||
/// Compact Pattern `few` alternative. | ||
FewAlt = 7, | ||
Few = 3, | ||
|
||
/// CompactPattern `many`. | ||
Many = 8, | ||
/// Compact Pattern `many` alternative. | ||
ManyAlt = 9, | ||
Many = 4, | ||
|
||
/// CompactPattern `other`. | ||
Other = 10, | ||
/// Compact Pattern `other` alternative. | ||
OtherAlt = 11, | ||
Other = 5, | ||
} | ||
|
||
impl TryFrom<&str> for CompactCount { | ||
type Error = (); | ||
|
||
fn try_from(value: &str) -> Result<Self, Self::Error> { | ||
match value { | ||
"zero" => Ok(CompactCount::Zero), | ||
"zero-alt-alphaNextToNumber" => Ok(CompactCount::ZeroAlt), | ||
"one" => Ok(CompactCount::One), | ||
"one-alt-alphaNextToNumber" => Ok(CompactCount::OneAlt), | ||
"two" => Ok(CompactCount::Two), | ||
"two-alt-alphaNextToNumber" => Ok(CompactCount::TwoAlt), | ||
"few" => Ok(CompactCount::Few), | ||
"few-alt-alphaNextToNumber" => Ok(CompactCount::FewAlt), | ||
"many" => Ok(CompactCount::Many), | ||
"many-alt-alphaNextToNumber" => Ok(CompactCount::ManyAlt), | ||
"other" => Ok(CompactCount::Other), | ||
"other-alt-alphaNextToNumber" => Ok(CompactCount::OtherAlt), | ||
_ => Err(()), | ||
} | ||
} | ||
#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Debug)] | ||
#[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
#[cfg_attr( | ||
feature = "datagen", | ||
derive(serde::Serialize, databake::Bake), | ||
databake(path = icu_experimental::dimension::provider::currency_compact) | ||
)] | ||
#[repr(u8)] | ||
pub enum CompactCount { | ||
Standard(Count), | ||
AlphaNextToNumber(Count), | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you have 4 bits left in
CompactCountULE
, what's the range of the log10?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there is no limit for log10, but I think to put the limit
16
is kinda a low limitThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah it'd be -8 to 7 as well.