From 4522f5b1fb1e08da2f37f781d2b719ec6a98374c Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Thu, 7 Nov 2024 04:11:49 +0000 Subject: [PATCH] Add TupleNVarULE (#5777) The VarULE counterpart of TupleNULE Part of https://github.com/unicode-org/icu4x/issues/5523. Planned to be used in https://github.com/unicode-org/icu4x/issues/4437 I'm not super happy with the naming with this vs VarTupleULE, but I've tried to make it clearer with the module names and it's fine for now. We can rename as desired since zerovec isn't on the ICU4X stability track. I do plan to add serde/etc impls but that's going to be a separate PR. --- utils/zerovec/src/ule/encode.rs | 2 +- utils/zerovec/src/ule/mod.rs | 1 + utils/zerovec/src/ule/tuplevar.rs | 218 ++++++++++++++++++++++++++++++ utils/zerovec/src/ule/vartuple.rs | 2 +- 4 files changed, 221 insertions(+), 2 deletions(-) create mode 100644 utils/zerovec/src/ule/tuplevar.rs diff --git a/utils/zerovec/src/ule/encode.rs b/utils/zerovec/src/ule/encode.rs index fac7f2e249a..1a0fa2e2548 100644 --- a/utils/zerovec/src/ule/encode.rs +++ b/utils/zerovec/src/ule/encode.rs @@ -81,7 +81,7 @@ pub unsafe trait EncodeAsVarULE { /// Given an [`EncodeAsVarULE`] type `S`, encode it into a `Box` /// /// This is primarily useful for generating `Deserialize` impls for VarULE types -pub fn encode_varule_to_box, T: VarULE + ?Sized>(x: &S) -> Box { +pub fn encode_varule_to_box + ?Sized, T: VarULE + ?Sized>(x: &S) -> Box { // zero-fill the vector to avoid uninitialized data UB let mut vec: Vec = vec![0; x.encode_var_ule_len()]; x.encode_var_ule_write(&mut vec); diff --git a/utils/zerovec/src/ule/mod.rs b/utils/zerovec/src/ule/mod.rs index 44f9f8ab129..d721364dfbc 100644 --- a/utils/zerovec/src/ule/mod.rs +++ b/utils/zerovec/src/ule/mod.rs @@ -22,6 +22,7 @@ mod plain; mod slices; pub mod tuple; +pub mod tuplevar; pub mod vartuple; pub use chars::CharULE; pub use encode::{encode_varule_to_box, EncodeAsVarULE}; diff --git a/utils/zerovec/src/ule/tuplevar.rs b/utils/zerovec/src/ule/tuplevar.rs new file mode 100644 index 00000000000..ef2e0caacd1 --- /dev/null +++ b/utils/zerovec/src/ule/tuplevar.rs @@ -0,0 +1,218 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! [`VarULE`] impls for tuples. +//! +//! This module exports [`Tuple2VarULE`], [`Tuple3VarULE`], ..., the corresponding [`VarULE`] types +//! of tuples containing purely [`VarULE`] types. +//! +//! This can be paired with [`VarTupleULE`] to make arbitrary combinations of [`ULE`] and [`VarULE`] types. +//! +//! [`VarTupleULE`]: crate::ule::vartuple::VarTupleULE + +use super::*; +use alloc::borrow::ToOwned; +use core::fmt; +use core::marker::PhantomData; +use core::mem; +use zerofrom::ZeroFrom; + +macro_rules! tuple_varule { + // Invocation: Should be called like `tuple_ule!(Tuple2VarULE, 2, [ A a AX 0, B b BX 1 ])` + // + // $T is a generic name, $t is a lowercase version of it, $T_alt is an "alternate" name to use when we need two types referring + // to the same input field, $i is an index. + // + // $name is the name of the type, $len MUST be the total number of fields, and then $i must be an integer going from 0 to (n - 1) in sequence + // (This macro code can rely on $i < $len) + ($name:ident, $len:literal, [ $($T:ident $t:ident $T_alt: ident $i:tt),+ ]) => { + #[doc = concat!("VarULE type for tuples with ", $len, " elements. See module docs for more information")] + #[repr(transparent)] + #[allow(clippy::exhaustive_structs)] // stable + pub struct $name<$($T: ?Sized),+> { + $($t: PhantomData<$T>,)+ + // Safety invariant: Each "field" $i of the MultiFieldsULE is a valid instance of $t + // + // In other words, calling `.get_field::<$T>($i)` is always safe. + // + // This invariant is upheld when this type is constructed during VarULE parsing/validation + multi: MultiFieldsULE<$len> + } + + impl<$($T: VarULE + ?Sized),+> $name<$($T),+> { + $( + #[doc = concat!("Get field ", $i, "of this tuple")] + pub fn $t(&self) -> &$T { + // Safety: See invariant of `multi`. + unsafe { + self.multi.get_field::<$T>($i) + } + } + + + )+ + } + + // # Safety + // + // ## Checklist + // + // Safety checklist for `VarULE`: + // + // 1. align(1): repr(transparent) around an align(1) VarULE type: MultiFieldsULE + // 2. No padding: see previous point + // 3. `validate_byte_slice` validates that this type is a valid MultiFieldsULE, and that each field is the correct type from the tuple. + // 4. `validate_byte_slice` checks length by deferring to the inner ULEs + // 5. `from_byte_slice_unchecked` returns a fat pointer to the bytes. + // 6. All other methods are left at their default impl. + // 7. The inner ULEs have byte equality, so this composition has byte equality. + unsafe impl<$($T: VarULE + ?Sized),+> VarULE for $name<$($T),+> + { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), UleError> { + let multi = as VarULE>::parse_byte_slice(bytes)?; + $( + // Safety invariant: $i < $len, from the macro invocation + unsafe { + multi.validate_field::<$T>($i)?; + } + )+ + Ok(()) + } + + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + let multi = as VarULE>::from_byte_slice_unchecked(bytes); + + // This type is repr(transparent) over MultiFieldsULE<$len>, so its slices can be transmuted + // Field invariant upheld here: validate_byte_slice above validates every field for being the right type + mem::transmute::<&MultiFieldsULE<$len>, &Self>(multi) + } + } + + impl<$($T: fmt::Debug + VarULE + ?Sized),+> fmt::Debug for $name<$($T),+> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + ($(self.$t(),)+).fmt(f) + } + } + + // We need manual impls since `#[derive()]` is disallowed on packed types + impl<$($T: PartialEq + VarULE + ?Sized),+> PartialEq for $name<$($T),+> { + fn eq(&self, other: &Self) -> bool { + + ($(self.$t(),)+).eq(&($(other.$t(),)+)) + } + } + + impl<$($T: Eq + VarULE + ?Sized),+> Eq for $name<$($T),+> {} + + impl<$($T: PartialOrd + VarULE + ?Sized),+> PartialOrd for $name<$($T),+> { + fn partial_cmp(&self, other: &Self) -> Option { + ($(self.$t(),)+).partial_cmp(&($(other.$t(),)+)) + } + } + + impl<$($T: Ord + VarULE + ?Sized),+> Ord for $name<$($T),+> { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + ($(self.$t(),)+).cmp(&($(other.$t(),)+)) + } + } + + // # Safety + // + // encode_var_ule_len: returns the length of the individual VarULEs together. + // + // encode_var_ule_write: writes bytes by deferring to the inner VarULE impls. + unsafe impl<$($T,)+ $($T_alt),+> EncodeAsVarULE<$name<$($T),+>> for ( $($T_alt),+ ) + where + $($T: VarULE + ?Sized,)+ + $($T_alt: EncodeAsVarULE<$T>,)+ + { + fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + MultiFieldsULE::<$len>::compute_encoded_len_for([$(self.$i.encode_var_ule_len()),+]) + } + + #[inline] + fn encode_var_ule_write(&self, dst: &mut [u8]) { + let lengths = [$(self.$i.encode_var_ule_len()),+]; + let multi = MultiFieldsULE::<$len>::new_from_lengths_partially_initialized(lengths, dst); + $( + // Safety: $i < $len, from the macro invocation, and field $i is supposed to be of type $T + unsafe { + multi.set_field_at::<$T, $T_alt>($i, &self.$i); + } + )+ + } + } + + impl<$($T: VarULE + ?Sized),+> ToOwned for $name<$($T),+> { + type Owned = Box; + fn to_owned(&self) -> Self::Owned { + encode_varule_to_box(self) + } + } + + impl<'a, $($T,)+ $($T_alt),+> ZeroFrom <'a, $name<$($T,)+>> for ($($T_alt),+) + where + $($T: VarULE + ?Sized,)+ + $($T_alt: ZeroFrom<'a, $T>,)+ { + fn zero_from(other: &'a $name<$($T,)+>) -> Self { + ( + $($T_alt::zero_from(other.$t()),)+ + ) + } + } + }; +} + +tuple_varule!(Tuple2VarULE, 2, [ A a AE 0, B b BE 1 ]); +tuple_varule!(Tuple3VarULE, 3, [ A a AE 0, B b BE 1, C c CE 2 ]); +tuple_varule!(Tuple4VarULE, 4, [ A a AE 0, B b BE 1, C c CE 2, D d DE 3 ]); +tuple_varule!(Tuple5VarULE, 5, [ A a AE 0, B b BE 1, C c CE 2, D d DE 3, E e EE 4 ]); +tuple_varule!(Tuple6VarULE, 6, [ A a AE 0, B b BE 1, C c CE 2, D d DE 3, E e EE 4, F f FE 5 ]); + +#[cfg(test)] +mod tests { + use super::*; + use crate::VarZeroSlice; + use crate::VarZeroVec; + #[test] + fn test_pairvarule_validate() { + let vec: Vec<(&str, &[u8])> = vec![("a", b"b"), ("foo", b"bar"), ("lorem", b"ipsum\xFF")]; + let zerovec: VarZeroVec> = (&vec).into(); + let bytes = zerovec.as_bytes(); + let zerovec2 = VarZeroVec::parse_byte_slice(bytes).unwrap(); + assert_eq!(zerovec, zerovec2); + + // Test failed validation with a correctly sized but differently constrained tuple + // Note: ipsum\xFF is not a valid str + let zerovec3 = VarZeroVec::>::parse_byte_slice(bytes); + assert!(zerovec3.is_err()); + } + #[test] + fn test_tripleule_validate() { + let vec: Vec<(&str, &[u8], VarZeroVec)> = vec![ + ("a", b"b", (&vec!["a", "b", "c"]).into()), + ("foo", b"bar", (&vec!["baz", "quux"]).into()), + ( + "lorem", + b"ipsum\xFF", + (&vec!["dolor", "sit", "amet"]).into(), + ), + ]; + let zerovec: VarZeroVec>> = (&vec).into(); + let bytes = zerovec.as_bytes(); + let zerovec2 = VarZeroVec::parse_byte_slice(bytes).unwrap(); + assert_eq!(zerovec, zerovec2); + + // Test failed validation with a correctly sized but differently constrained tuple + // Note: the str is unlikely to be a valid varzerovec + let zerovec3 = VarZeroVec::, [u8], VarZeroSlice>>::parse_byte_slice(bytes); + assert!(zerovec3.is_err()); + } +} diff --git a/utils/zerovec/src/ule/vartuple.rs b/utils/zerovec/src/ule/vartuple.rs index 2b54b032260..3f0cf9ae291 100644 --- a/utils/zerovec/src/ule/vartuple.rs +++ b/utils/zerovec/src/ule/vartuple.rs @@ -2,7 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -//! Types to help compose VarULE primitives. +//! Types to help compose fixed-size [`ULE`] and variable-size [`VarULE`] primitives. //! //! This module exports [`VarTuple`] and [`VarTupleULE`], which allow a single sized type and //! a single unsized type to be stored together as a [`VarULE`].