Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add VarZeroVecFormat support to VarTuple and make_var #5808

Merged
merged 5 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,368 changes: 684 additions & 684 deletions provider/data/experimental/data/transliterator_rules_v1_marker.rs.data

Large diffs are not rendered by default.

Large diffs are not rendered by default.

812 changes: 406 additions & 406 deletions provider/data/experimental/fingerprints.csv

Large diffs are not rendered by default.

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions provider/data/locale/data/aliases_v2_marker.rs.data

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion provider/data/locale/fingerprints.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
locale/aliases@2, <singleton>, 8497B, 8045B, e6f7cbdd19886fcf
locale/aliases@2, <singleton>, 8459B, 8007B, 7bd49e8b1931d4e5
locale/exemplarchars/auxiliary@1, <lookup>, 802B, 163 identifiers
locale/exemplarchars/auxiliary@1, <total>, 34053B, 26306B, 144 unique payloads
locale/exemplarchars/auxiliary@1, af, 92B, 38B, ae09150252a2a416
Expand Down
4 changes: 2 additions & 2 deletions provider/data/locale/stubdata/aliases_v2_marker.rs.data

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions utils/zerovec/derive/examples/make_var.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ struct MultiFieldStruct<'a> {
#[make_varule(MultiFieldConsecutiveStructULE)]
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)]
#[zerovec::derive(Serialize, Deserialize, Debug)]
#[zerovec::format(zerovec::vecs::Index8)]
struct MultiFieldConsecutiveStruct<'a> {
#[serde(borrow)]
a: Cow<'a, str>,
Expand All @@ -87,6 +88,7 @@ struct CustomVarField<'a> {
#[make_varule(MultiFieldTupleULE)]
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)]
#[zerovec::derive(Serialize, Deserialize, Debug)]
#[zerovec::format(zerovec::vecs::Index32)]
struct MultiFieldTuple<'a>(
u8,
char,
Expand Down
8 changes: 4 additions & 4 deletions utils/zerovec/derive/src/make_ule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ pub fn make_ule_impl(ule_name: Ident, mut input: DeriveInput) -> TokenStream2 {
let name = &input.ident;

let ule_stuff = match input.data {
Data::Struct(ref s) => make_ule_struct_impl(name, &ule_name, &input, s, attrs),
Data::Enum(ref e) => make_ule_enum_impl(name, &ule_name, &input, e, attrs),
Data::Struct(ref s) => make_ule_struct_impl(name, &ule_name, &input, s, &attrs),
Data::Enum(ref e) => make_ule_enum_impl(name, &ule_name, &input, e, &attrs),
_ => {
return Error::new(input.span(), "#[make_ule] must be applied to a struct")
.to_compile_error();
Expand Down Expand Up @@ -80,7 +80,7 @@ fn make_ule_enum_impl(
ule_name: &Ident,
input: &DeriveInput,
enu: &DataEnum,
attrs: ZeroVecAttrs,
attrs: &ZeroVecAttrs,
) -> TokenStream2 {
// We could support more int reprs in the future if needed
if !utils::ReprInfo::compute(&input.attrs).u8 {
Expand Down Expand Up @@ -264,7 +264,7 @@ fn make_ule_struct_impl(
ule_name: &Ident,
input: &DeriveInput,
struc: &DataStruct,
attrs: ZeroVecAttrs,
attrs: &ZeroVecAttrs,
) -> TokenStream2 {
if struc.fields.iter().next().is_none() {
return Error::new(
Expand Down
25 changes: 18 additions & 7 deletions utils/zerovec/derive/src/make_varule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ pub fn make_varule_impl(ule_name: Ident, mut input: DeriveInput) -> TokenStream2
.to_compile_error();
}

let unsized_field_info = UnsizedFields::new(unsized_fields);
let unsized_field_info = UnsizedFields::new(unsized_fields, attrs.vzv_format);

let mut field_inits = crate::ule::make_ule_fields(&sized_fields);
let last_field_ule = unsized_field_info.varule_ty();
Expand Down Expand Up @@ -488,21 +488,29 @@ struct UnsizedField<'a> {

struct UnsizedFields<'a> {
fields: Vec<UnsizedField<'a>>,
format_param: TokenStream2,
}

impl<'a> UnsizedFields<'a> {
fn new(fields: Vec<UnsizedField<'a>>) -> Self {
/// The format_param is an optional tokenstream describing a VZVFormat argument needed by MultiFieldsULE
fn new(fields: Vec<UnsizedField<'a>>, format_param: Option<TokenStream2>) -> Self {
assert!(!fields.is_empty(), "Must have at least one unsized field");
Self { fields }

let format_param = format_param.unwrap_or_else(|| quote!(zerovec::vecs::Index16));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thought: make it a required parameter?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather not: I want the user-facing things (VarZeroVec, make_varule, TupleNVarULE) to default to Index16. Formats are a power user tool IMO.

Self {
fields,
format_param,
}
}

// Get the corresponding VarULE type that can store all of these
fn varule_ty(&self) -> TokenStream2 {
let len = self.fields.len();
let format_param = &self.format_param;
if len == 1 {
self.fields[0].kind.varule_ty()
} else {
quote!(zerovec::ule::MultiFieldsULE::<#len>)
quote!(zerovec::ule::MultiFieldsULE::<#len, #format_param>)
}
}

Expand Down Expand Up @@ -546,6 +554,7 @@ impl<'a> UnsizedFields<'a> {
// Takes all unsized fields on self and encodes them into a byte slice `out`
fn encode_write(&self, out: TokenStream2) -> TokenStream2 {
let len = self.fields.len();
let format_param = &self.format_param;
if len == 1 {
self.fields[0].encode_func(quote!(encode_var_ule_write), quote!(#out))
} else {
Expand All @@ -562,7 +571,7 @@ impl<'a> UnsizedFields<'a> {
quote!(
let lengths = [#(#lengths),*];
// Todo: index type should be settable by attribute
let mut multi = zerovec::ule::MultiFieldsULE::<#len, zerovec::vecs::Index32>::new_from_lengths_partially_initialized(lengths, #out);
let mut multi = zerovec::ule::MultiFieldsULE::<#len, #format_param>::new_from_lengths_partially_initialized(lengths, #out);
unsafe {
#(#writers;)*
}
Expand All @@ -573,6 +582,7 @@ impl<'a> UnsizedFields<'a> {
// Takes all unsized fields on self and returns the length needed for encoding into a byte slice
fn encode_len(&self) -> TokenStream2 {
let len = self.fields.len();
let format_param = &self.format_param;
if len == 1 {
self.fields[0].encode_func(quote!(encode_var_ule_len), quote!())
} else {
Expand All @@ -581,7 +591,7 @@ impl<'a> UnsizedFields<'a> {
lengths.push(field.encode_func(quote!(encode_var_ule_len), quote!()));
}
// Todo: index type should be settable by attribute
quote!(zerovec::ule::MultiFieldsULE::<#len, zerovec::vecs::Index32>::compute_encoded_len_for([#(#lengths),*]))
quote!(zerovec::ule::MultiFieldsULE::<#len, #format_param>::compute_encoded_len_for([#(#lengths),*]))
}
}

Expand Down Expand Up @@ -638,6 +648,7 @@ impl<'a> UnsizedFields<'a> {
/// The code will validate a variable known as `last_field_bytes`
fn varule_validator(&self) -> Option<TokenStream2> {
let len = self.fields.len();
let format_param = &self.format_param;
if len == 1 {
None
} else {
Expand All @@ -648,7 +659,7 @@ impl<'a> UnsizedFields<'a> {
}

Some(quote!(
let multi = zerovec::ule::MultiFieldsULE::<#len>::parse_byte_slice(last_field_bytes)?;
let multi = zerovec::ule::MultiFieldsULE::<#len, #format_param>::parse_byte_slice(last_field_bytes)?;
unsafe {
#(#validators)*
}
Expand Down
58 changes: 57 additions & 1 deletion utils/zerovec/derive/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,48 @@ pub fn extract_parenthetical_zerovec_attrs(
Ok(ret)
}

pub fn extract_single_tt_attr(
attrs: &mut Vec<Attribute>,
name: &str,
) -> Result<Option<TokenStream2>> {
let mut ret = None;
let mut error = None;
attrs.retain(|a| {
// skip the "zerovec" part
let second_segment = a.path().segments.iter().nth(1);

if let Some(second) = second_segment {
if second.ident == name {
if ret.is_some() {
error = Some(Error::new(
a.span(),
"Can only specify a single VarZeroVecFormat via #[zerovec::format(..)]",
));
return false
}
ret = match a.parse_args::<TokenStream2>() {
Ok(l) => Some(l),
Err(_) => {
error = Some(Error::new(
a.span(),
format!("#[zerovec::{name}(..)] takes in a comma separated list of identifiers"),
));
return false;
}
};
return false;
}
}

true
});

if let Some(error) = error {
return Err(error);
}
Ok(ret)
}

/// Removes all attributes with `zerovec` in the name and places them in a separate vector
pub fn extract_zerovec_attributes(attrs: &mut Vec<Attribute>) -> Vec<Attribute> {
let mut ret = vec![];
Expand Down Expand Up @@ -266,7 +308,7 @@ pub fn extract_field_attributes(attrs: &mut Vec<Attribute>) -> Result<Option<Ide
Ok(varule.first().cloned())
}

#[derive(Default, Copy, Clone)]
#[derive(Default, Clone)]
pub struct ZeroVecAttrs {
pub skip_kv: bool,
pub skip_ord: bool,
Expand All @@ -275,6 +317,7 @@ pub struct ZeroVecAttrs {
pub deserialize: bool,
pub debug: bool,
pub hash: bool,
pub vzv_format: Option<TokenStream2>,
}

/// Removes all known zerovec:: attributes from struct attrs and validates them
Expand All @@ -287,6 +330,7 @@ pub fn extract_attributes_common(

let derive = extract_parenthetical_zerovec_attrs(&mut zerovec_attrs, "derive")?;
let skip = extract_parenthetical_zerovec_attrs(&mut zerovec_attrs, "skip_derive")?;
let format = extract_single_tt_attr(&mut zerovec_attrs, "format")?;

let name = if is_var { "make_varule" } else { "make_ule" };

Expand Down Expand Up @@ -333,6 +377,18 @@ pub fn extract_attributes_common(
}
}

if let Some(ref format) = format {
if !is_var {
return Err(Error::new(
format.span(),
format!(
"Found unknown derive attribute for #[{name}]: #[zerovec::format({format})]"
),
));
}
}
attrs.vzv_format = format;

if (attrs.serialize || attrs.deserialize) && !is_var {
return Err(Error::new(
span,
Expand Down
5 changes: 5 additions & 0 deletions utils/zerovec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ pub use zerovec_derive::make_ule;
///
/// - [`Ord`] and [`PartialOrd`]
/// - [`ZeroMapKV`]
/// - [`alloc::borrow::ToOwned`]
///
/// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`.
/// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`.
Expand All @@ -436,6 +437,10 @@ pub use zerovec_derive::make_ule;
/// Note that this implementation will autogenerate [`EncodeAsVarULE`] impls for _both_ `Self` and `&Self`
/// for convenience. This allows for a little more flexibility encoding slices.
///
/// In case there are multiple [`VarULE`] (i.e., variable-sized) fields, this macro will produce private fields that
/// appropriately pack the data together, with the packing format by default being [`crate::vecs::Index16`], but can be
/// overridden with `#[zerovec::format(zerovec::vecs::Index8)]`.
///
/// [`EncodeAsVarULE`]: ule::EncodeAsVarULE
/// [`VarULE`]: ule::VarULE
/// [`ULE`]: ule::ULE
Expand Down
52 changes: 23 additions & 29 deletions utils/zerovec/src/ule/multi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,59 +4,53 @@

use super::*;
use crate::varzerovec::lengthless::VarZeroLengthlessSlice;
use crate::varzerovec::Index32;
use crate::vecs::VarZeroVecFormat;
use core::{fmt, mem};

/// This type is used by the custom derive to represent multiple [`VarULE`]
/// fields packed into a single end-of-struct field. It is not recommended
/// to use this type directly.
/// to use this type directly, use [`Tuple2VarULE`](crate::ule::tuplevar::Tuple2VarULE) etc instead.
///
/// Logically, consider it to be `(V1, V2, V3, ..)`
/// where `V1` etc are potentially different [`VarULE`] types.
///
/// Internally, it is represented by a VarZeroSlice.
/// Internally, it is represented by a VarZeroSlice without the length part.
#[derive(PartialEq, Eq)]
#[repr(transparent)]
pub struct MultiFieldsULE<const LEN: usize, Format: VarZeroVecFormat = Index32>(
pub struct MultiFieldsULE<const LEN: usize, Format: VarZeroVecFormat>(
Manishearth marked this conversation as resolved.
Show resolved Hide resolved
VarZeroLengthlessSlice<[u8], Format>,
);

impl<const LEN: usize, Format: VarZeroVecFormat> MultiFieldsULE<LEN, Format> {
/// Compute the amount of bytes needed to support elements with lengths `lengths`
#[inline]
#[allow(clippy::expect_used)] // See #1410
pub fn compute_encoded_len_for(lengths: [usize; LEN]) -> usize {
#[allow(clippy::expect_used)] // See #1410
unsafe {
// safe since BlankSliceEncoder is transparent over usize
let lengths = &*(lengths.as_slice() as *const [usize] as *const [BlankSliceEncoder]);
crate::varzerovec::components::compute_serializable_len_without_length::<_, _, Format>(
lengths,
)
.expect("Too many bytes to encode") as usize
}
let lengths = lengths.map(BlankSliceEncoder);
crate::varzerovec::components::compute_serializable_len_without_length::<_, _, Format>(
&lengths,
)
.expect("Too many bytes to encode") as usize
}

/// Construct a partially initialized MultiFieldsULE backed by a mutable byte buffer
pub fn new_from_lengths_partially_initialized<'a>(
lengths: [usize; LEN],
output: &'a mut [u8],
) -> &'a mut Self {
let lengths = lengths.map(BlankSliceEncoder);
crate::varzerovec::components::write_serializable_bytes_without_length::<_, _, Format>(
&lengths, output,
);
debug_assert!(
<VarZeroLengthlessSlice<[u8], Format>>::parse_byte_slice(LEN as u32, output).is_ok(),
"Encoded slice must be valid VarZeroSlice"
);
unsafe {
// safe since BlankSliceEncoder is transparent over usize
let lengths = &*(lengths.as_slice() as *const [usize] as *const [BlankSliceEncoder]);
crate::varzerovec::components::write_serializable_bytes_without_length::<_, _, Format>(
lengths, output,
);
debug_assert!(
<VarZeroLengthlessSlice<[u8], Format>>::parse_byte_slice(LEN as u32, output)
.is_ok(),
"Encoded slice must be valid VarZeroSlice"
);
// Safe since write_serializable_bytes produces a valid VarZeroSlice buffer
// Safe since write_serializable_bytes produces a valid VarZeroLengthlessSlice buffer with the right format
let slice = <VarZeroLengthlessSlice<[u8], Format>>::from_bytes_unchecked_mut(output);
// safe since `Self` is transparent over VarZeroSlice
mem::transmute::<&mut VarZeroLengthlessSlice<_, Format>, &mut Self>(slice)
// safe since `Self` is transparent over VarZeroLengthlessSlice<[u8], Format>
mem::transmute::<&mut VarZeroLengthlessSlice<[u8], Format>, &mut Self>(slice)
}
}

Expand Down Expand Up @@ -98,11 +92,11 @@ impl<const LEN: usize, Format: VarZeroVecFormat> MultiFieldsULE<LEN, Format> {
/// Construct from a byte slice
///
/// # Safety
/// - byte slice must be a valid VarZeroLengthlessSlice<[u8]> with length LEN
/// - byte slice must be a valid VarZeroLengthlessSlice<[u8], Format> with length LEN
#[inline]
pub unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
// &Self is transparent over &VZS<..>
mem::transmute(<VarZeroLengthlessSlice<[u8]>>::from_bytes_unchecked(bytes))
// &Self is transparent over &VZS<..> with the right format
mem::transmute(<VarZeroLengthlessSlice<[u8], Format>>::from_bytes_unchecked(bytes))
}

/// Get the bytes behind this value
Expand Down
Loading