diff --git a/CHANGELOG.md b/CHANGELOG.md index 5660d53..5929b93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,10 @@ Released YYYY-MM-DD. Released 2022-09-08. +### Added + +* Support custom arbitrary implementation for fields on derive. [#129](https://github.com/rust-fuzz/arbitrary/pull/129) + ### Fixed * Fixed a potential panic due to an off-by-one error in the `Arbitrary` diff --git a/Cargo.toml b/Cargo.toml index 4cf3970..633a153 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,8 +22,6 @@ rust-version = "1.63.0" [dependencies] derive_arbitrary = { version = "1.1.6", path = "./derive", optional = true } -[dev-dependencies] - [features] # Turn this feature on to enable support for `#[derive(Arbitrary)]`. derive = ["derive_arbitrary"] diff --git a/README.md b/README.md index 38bd949..3f5619b 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,39 @@ pub struct Rgb { } ``` +#### Customizing single fields + +This can be particular handy if your structure uses a type that does not implement `Arbitrary` or you want to have more customization for particular fields. + +```rust +#[derive(Arbitrary)] +pub struct Rgba { + // set `r` to Default::default() + #[arbitrary(default)] + pub r: u8, + + // set `g` to 255 + #[arbitrary(value = 255)] + pub g: u8, + + // Generate `b` with a custom function of type + // + // fn(&mut Unstructured) -> arbitrary::Result + // + // where `T` is the field's type. + #[arbitrary(with = arbitrary_b)] + pub b: u8, + + // Generate `a` with a custom closure (shortuct to avoid a custom funciton) + #[arbitrary(with = |u: &mut Unstructured| u.int_in_range(0..=64))] + pub a: u8, +} + +fn arbitrary_b(u: &mut Unstructured) -> arbitrary::Result { + u.int_in_range(64..=128) +} +``` + ### Implementing `Arbitrary` By Hand Alternatively, you can write an `Arbitrary` implementation by hand: diff --git a/derive/Cargo.toml b/derive/Cargo.toml index d532269..4ff114c 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -9,7 +9,7 @@ authors = [ "Corey Farwell ", ] categories = ["development-tools::testing"] -edition = "2018" +edition = "2021" keywords = ["arbitrary", "testing", "derive", "macro"] readme = "README.md" description = "Derives arbitrary traits" diff --git a/derive/src/field_attributes.rs b/derive/src/field_attributes.rs new file mode 100644 index 0000000..ccaba74 --- /dev/null +++ b/derive/src/field_attributes.rs @@ -0,0 +1,117 @@ +use proc_macro2::{Group, Span, TokenStream, TokenTree}; +use quote::quote; +use syn::{spanned::Spanned, *}; + +/// Used to filter out necessary field attribute and within error messages. +static ARBITRARY_ATTRIBUTE_NAME: &str = "arbitrary"; + +/// Determines how a value for a field should be constructed. +#[cfg_attr(test, derive(Debug))] +pub enum FieldConstructor { + /// Assume that Arbitrary is defined for the type of this field and use it (default) + Arbitrary, + + /// Places `Default::default()` as a field value. + Default, + + /// Use custom function or closure to generate a value for a field. + With(TokenStream), + + /// Set a field always to the given value. + Value(TokenStream), +} + +pub fn determine_field_constructor(field: &Field) -> Result { + let opt_attr = fetch_attr_from_field(field)?; + let ctor = match opt_attr { + Some(attr) => parse_attribute(attr)?, + None => FieldConstructor::Arbitrary, + }; + Ok(ctor) +} + +fn fetch_attr_from_field(field: &Field) -> Result> { + let found_attributes: Vec<_> = field + .attrs + .iter() + .filter(|a| { + let path = &a.path; + let name = quote!(#path).to_string(); + name == ARBITRARY_ATTRIBUTE_NAME + }) + .collect(); + if found_attributes.len() > 1 { + let name = field.ident.as_ref().unwrap(); + let msg = format!( + "Multiple conflicting #[{ARBITRARY_ATTRIBUTE_NAME}] attributes found on field `{name}`" + ); + return Err(syn::Error::new(field.span(), msg)); + } + Ok(found_attributes.into_iter().next()) +} + +fn parse_attribute(attr: &Attribute) -> Result { + let group = { + let mut tokens_iter = attr.clone().tokens.into_iter(); + let token = tokens_iter.next().ok_or_else(|| { + let msg = format!("#[{ARBITRARY_ATTRIBUTE_NAME}] cannot be empty."); + syn::Error::new(attr.span(), msg) + })?; + match token { + TokenTree::Group(g) => g, + t => { + let msg = format!("#[{ARBITRARY_ATTRIBUTE_NAME}] must contain a group, got: {t})"); + return Err(syn::Error::new(attr.span(), msg)); + } + } + }; + parse_attribute_internals(group) +} + +fn parse_attribute_internals(group: Group) -> Result { + let stream = group.stream(); + let mut tokens_iter = stream.into_iter(); + let token = tokens_iter.next().ok_or_else(|| { + let msg = format!("#[{ARBITRARY_ATTRIBUTE_NAME}] cannot be empty."); + syn::Error::new(group.span(), msg) + })?; + match token.to_string().as_ref() { + "default" => Ok(FieldConstructor::Default), + "with" => { + let func_path = parse_assigned_value("with", tokens_iter, group.span())?; + Ok(FieldConstructor::With(func_path)) + } + "value" => { + let value = parse_assigned_value("value", tokens_iter, group.span())?; + Ok(FieldConstructor::Value(value)) + } + _ => { + let msg = format!("Unknown option for #[{ARBITRARY_ATTRIBUTE_NAME}]: `{token}`"); + Err(syn::Error::new(token.span(), msg)) + } + } +} + +// Input: +// = 2 + 2 +// Output: +// 2 + 2 +fn parse_assigned_value( + opt_name: &str, + mut tokens_iter: impl Iterator, + default_span: Span, +) -> Result { + let eq_sign = tokens_iter.next().ok_or_else(|| { + let msg = format!( + "Invalid syntax for #[{ARBITRARY_ATTRIBUTE_NAME}], `{opt_name}` is missing assignment." + ); + syn::Error::new(default_span, msg) + })?; + + if eq_sign.to_string() == "=" { + Ok(tokens_iter.collect()) + } else { + let msg = format!("Invalid syntax for #[{ARBITRARY_ATTRIBUTE_NAME}], expected `=` after `{opt_name}`, got: `{eq_sign}`"); + Err(syn::Error::new(eq_sign.span(), msg)) + } +} diff --git a/derive/src/lib.rs b/derive/src/lib.rs index 7b16a8d..4cd80b6 100644 --- a/derive/src/lib.rs +++ b/derive/src/lib.rs @@ -4,11 +4,20 @@ use proc_macro2::{Span, TokenStream}; use quote::quote; use syn::*; +mod field_attributes; +use field_attributes::{determine_field_constructor, FieldConstructor}; + static ARBITRARY_LIFETIME_NAME: &str = "'arbitrary"; -#[proc_macro_derive(Arbitrary)] +#[proc_macro_derive(Arbitrary, attributes(arbitrary))] pub fn derive_arbitrary(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream { let input = syn::parse_macro_input!(tokens as syn::DeriveInput); + expand_derive_arbitrary(input) + .unwrap_or_else(syn::Error::into_compile_error) + .into() +} + +fn expand_derive_arbitrary(input: syn::DeriveInput) -> Result { let (lifetime_without_bounds, lifetime_with_bounds) = build_arbitrary_lifetime(input.generics.clone()); @@ -18,8 +27,8 @@ pub fn derive_arbitrary(tokens: proc_macro::TokenStream) -> proc_macro::TokenStr ); let arbitrary_method = - gen_arbitrary_method(&input, lifetime_without_bounds.clone(), &recursive_count); - let size_hint_method = gen_size_hint_method(&input); + gen_arbitrary_method(&input, lifetime_without_bounds.clone(), &recursive_count)?; + let size_hint_method = gen_size_hint_method(&input)?; let name = input.ident; // Add a bound `T: Arbitrary` to every type parameter T. let generics = add_trait_bounds(input.generics, lifetime_without_bounds.clone()); @@ -34,7 +43,7 @@ pub fn derive_arbitrary(tokens: proc_macro::TokenStream) -> proc_macro::TokenStr // Build TypeGenerics and WhereClause without a lifetime let (_, ty_generics, where_clause) = generics.split_for_impl(); - (quote! { + Ok(quote! { const _: () = { thread_local! { #[allow(non_upper_case_globals)] @@ -47,7 +56,6 @@ pub fn derive_arbitrary(tokens: proc_macro::TokenStream) -> proc_macro::TokenStr } }; }) - .into() } // Returns: (lifetime without bounds, lifetime with bounds) @@ -112,18 +120,21 @@ fn gen_arbitrary_method( input: &DeriveInput, lifetime: LifetimeDef, recursive_count: &syn::Ident, -) -> TokenStream { - let ident = &input.ident; - - let arbitrary_structlike = |fields| { - let arbitrary = construct(fields, |_, _| quote!(arbitrary::Arbitrary::arbitrary(u)?)); +) -> Result { + fn arbitrary_structlike( + fields: &Fields, + ident: &syn::Ident, + lifetime: LifetimeDef, + recursive_count: &syn::Ident, + ) -> Result { + let arbitrary = construct(fields, |_idx, field| gen_constructor_for_field(field))?; let body = with_recursive_count_guard(recursive_count, quote! { Ok(#ident #arbitrary) }); - let arbitrary_take_rest = construct_take_rest(fields); + let arbitrary_take_rest = construct_take_rest(fields)?; let take_rest_body = with_recursive_count_guard(recursive_count, quote! { Ok(#ident #arbitrary_take_rest) }); - quote! { + Ok(quote! { fn arbitrary(u: &mut arbitrary::Unstructured<#lifetime>) -> arbitrary::Result { #body } @@ -131,27 +142,43 @@ fn gen_arbitrary_method( fn arbitrary_take_rest(mut u: arbitrary::Unstructured<#lifetime>) -> arbitrary::Result { #take_rest_body } - } - }; + }) + } - match &input.data { - Data::Struct(data) => arbitrary_structlike(&data.fields), - Data::Union(data) => arbitrary_structlike(&Fields::Named(data.fields.clone())), + let ident = &input.ident; + let output = match &input.data { + Data::Struct(data) => arbitrary_structlike(&data.fields, ident, lifetime, recursive_count)?, + Data::Union(data) => arbitrary_structlike( + &Fields::Named(data.fields.clone()), + ident, + lifetime, + recursive_count, + )?, Data::Enum(data) => { - let variants = data.variants.iter().enumerate().map(|(i, variant)| { - let idx = i as u64; - let ctor = construct(&variant.fields, |_, _| { - quote!(arbitrary::Arbitrary::arbitrary(u)?) - }); - let variant_name = &variant.ident; - quote! { #idx => #ident::#variant_name #ctor } - }); - let variants_take_rest = data.variants.iter().enumerate().map(|(i, variant)| { - let idx = i as u64; - let ctor = construct_take_rest(&variant.fields); - let variant_name = &variant.ident; - quote! { #idx => #ident::#variant_name #ctor } - }); + let variants: Vec = data + .variants + .iter() + .enumerate() + .map(|(i, variant)| { + let idx = i as u64; + let variant_name = &variant.ident; + construct(&variant.fields, |_, field| gen_constructor_for_field(field)) + .map(|ctor| quote! { #idx => #ident::#variant_name #ctor }) + }) + .collect::>()?; + + let variants_take_rest: Vec = data + .variants + .iter() + .enumerate() + .map(|(i, variant)| { + let idx = i as u64; + let variant_name = &variant.ident; + construct_take_rest(&variant.fields) + .map(|ctor| quote! { #idx => #ident::#variant_name #ctor }) + }) + .collect::>()?; + let count = data.variants.len() as u64; let arbitrary = with_recursive_count_guard( @@ -190,74 +217,131 @@ fn gen_arbitrary_method( } } } - } + }; + Ok(output) } -fn construct(fields: &Fields, ctor: impl Fn(usize, &Field) -> TokenStream) -> TokenStream { - match fields { +fn construct( + fields: &Fields, + ctor: impl Fn(usize, &Field) -> Result, +) -> Result { + let output = match fields { Fields::Named(names) => { - let names = names.named.iter().enumerate().map(|(i, f)| { - let name = f.ident.as_ref().unwrap(); - let ctor = ctor(i, f); - quote! { #name: #ctor } - }); + let names: Vec = names + .named + .iter() + .enumerate() + .map(|(i, f)| { + let name = f.ident.as_ref().unwrap(); + ctor(i, f).map(|ctor| quote! { #name: #ctor }) + }) + .collect::>()?; quote! { { #(#names,)* } } } Fields::Unnamed(names) => { - let names = names.unnamed.iter().enumerate().map(|(i, f)| { - let ctor = ctor(i, f); - quote! { #ctor } - }); + let names: Vec = names + .unnamed + .iter() + .enumerate() + .map(|(i, f)| ctor(i, f).map(|ctor| quote! { #ctor })) + .collect::>()?; quote! { ( #(#names),* ) } } Fields::Unit => quote!(), - } + }; + Ok(output) } -fn construct_take_rest(fields: &Fields) -> TokenStream { - construct(fields, |idx, _| { - if idx + 1 == fields.len() { - quote! { arbitrary::Arbitrary::arbitrary_take_rest(u)? } - } else { - quote! { arbitrary::Arbitrary::arbitrary(&mut u)? } - } +fn construct_take_rest(fields: &Fields) -> Result { + construct(fields, |idx, field| { + determine_field_constructor(field).map(|field_constructor| match field_constructor { + FieldConstructor::Default => quote!(Default::default()), + FieldConstructor::Arbitrary => { + if idx + 1 == fields.len() { + quote! { arbitrary::Arbitrary::arbitrary_take_rest(u)? } + } else { + quote! { arbitrary::Arbitrary::arbitrary(&mut u)? } + } + } + FieldConstructor::With(function_or_closure) => quote!((#function_or_closure)(&mut u)?), + FieldConstructor::Value(value) => quote!(#value), + }) }) } -fn gen_size_hint_method(input: &DeriveInput) -> TokenStream { +fn gen_size_hint_method(input: &DeriveInput) -> Result { let size_hint_fields = |fields: &Fields| { - let tys = fields.iter().map(|f| &f.ty); - quote! { - arbitrary::size_hint::and_all(&[ - #( <#tys as arbitrary::Arbitrary>::size_hint(depth) ),* - ]) - } + fields + .iter() + .map(|f| { + let ty = &f.ty; + determine_field_constructor(f).map(|field_constructor| { + match field_constructor { + FieldConstructor::Default | FieldConstructor::Value(_) => { + quote!((0, Some(0))) + } + FieldConstructor::Arbitrary => { + quote! { <#ty as arbitrary::Arbitrary>::size_hint(depth) } + } + + // Note that in this case it's hard to determine what size_hint must be, so size_of::() is + // just an educated guess, although it's gonna be inaccurate for dynamically + // allocated types (Vec, HashMap, etc.). + FieldConstructor::With(_) => { + quote! { (::core::mem::size_of::<#ty>(), None) } + } + } + }) + }) + .collect::>>() + .map(|hints| { + quote! { + arbitrary::size_hint::and_all(&[ + #( #hints ),* + ]) + } + }) }; let size_hint_structlike = |fields: &Fields| { - let hint = size_hint_fields(fields); - quote! { - #[inline] - fn size_hint(depth: usize) -> (usize, Option) { - arbitrary::size_hint::recursion_guard(depth, |depth| #hint) + size_hint_fields(fields).map(|hint| { + quote! { + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + arbitrary::size_hint::recursion_guard(depth, |depth| #hint) + } } - } + }) }; match &input.data { Data::Struct(data) => size_hint_structlike(&data.fields), Data::Union(data) => size_hint_structlike(&Fields::Named(data.fields.clone())), - Data::Enum(data) => { - let variants = data.variants.iter().map(|v| size_hint_fields(&v.fields)); - quote! { - #[inline] - fn size_hint(depth: usize) -> (usize, Option) { - arbitrary::size_hint::and( - ::size_hint(depth), - arbitrary::size_hint::recursion_guard(depth, |depth| { - arbitrary::size_hint::or_all(&[ #( #variants ),* ]) - }), - ) + Data::Enum(data) => data + .variants + .iter() + .map(|v| size_hint_fields(&v.fields)) + .collect::>>() + .map(|variants| { + quote! { + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + arbitrary::size_hint::and( + ::size_hint(depth), + arbitrary::size_hint::recursion_guard(depth, |depth| { + arbitrary::size_hint::or_all(&[ #( #variants ),* ]) + }), + ) + } } - } - } + }), } } + +fn gen_constructor_for_field(field: &Field) -> Result { + let ctor = match determine_field_constructor(field)? { + FieldConstructor::Default => quote!(Default::default()), + FieldConstructor::Arbitrary => quote!(arbitrary::Arbitrary::arbitrary(u)?), + FieldConstructor::With(function_or_closure) => quote!((#function_or_closure)(u)?), + FieldConstructor::Value(value) => quote!(#value), + }; + Ok(ctor) +} diff --git a/src/lib.rs b/src/lib.rs index 6df225f..a3fa48b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1292,3 +1292,51 @@ mod test { assert_eq!((1, None), <(u8, Vec) as Arbitrary>::size_hint(0)); } } + +/// Multiple conflicting arbitrary attributes are used on the same field: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// struct Point { +/// #[arbitrary(value = 2)] +/// #[arbitrary(value = 2)] +/// x: i32, +/// } +/// ``` +/// +/// An unknown attribute: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// struct Point { +/// #[arbitrary(unknown_attr)] +/// x: i32, +/// } +/// ``` +/// +/// An unknown attribute with a value: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// struct Point { +/// #[arbitrary(unknown_attr = 13)] +/// x: i32, +/// } +/// ``` +/// +/// `value` without RHS: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// struct Point { +/// #[arbitrary(value)] +/// x: i32, +/// } +/// ``` +/// +/// `with` without RHS: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// struct Point { +/// #[arbitrary(with)] +/// x: i32, +/// } +/// ``` +#[cfg(all(doctest, feature = "derive"))] +pub struct CompileFailTests; diff --git a/tests/derive.rs b/tests/derive.rs index 2d666f6..f29d227 100644 --- a/tests/derive.rs +++ b/tests/derive.rs @@ -231,3 +231,48 @@ fn recursive_and_empty_input() { let _ = Nat5::arbitrary(&mut Unstructured::new(&[])); } + +#[test] +fn test_field_attributes() { + // A type that DOES NOT implement Arbitrary + #[derive(Debug)] + struct Weight(u8); + + #[derive(Debug, Arbitrary)] + struct Parcel { + #[arbitrary(with = arbitrary_weight)] + weight: Weight, + + #[arbitrary(default)] + width: u8, + + #[arbitrary(value = 2 + 2)] + length: u8, + + height: u8, + + #[arbitrary(with = |u: &mut Unstructured| u.int_in_range(0..=100))] + price: u8, + } + + fn arbitrary_weight(u: &mut Unstructured) -> arbitrary::Result { + u.int_in_range(45..=56).map(Weight) + } + + let parcel: Parcel = arbitrary_from(&[6, 199, 17]); + + // 45 + 6 = 51 + assert_eq!(parcel.weight.0, 51); + + // u8::default() + assert_eq!(parcel.width, 0); + + // 2 + 2 = 4 + assert_eq!(parcel.length, 4); + + // 199 is the 2nd byte used by arbitrary + assert_eq!(parcel.height, 199); + + // 17 is the 3rd byte used by arbitrary + assert_eq!(parcel.price, 17); +}