From e13b9e76b2f1889b96a8e14557b2776363059a69 Mon Sep 17 00:00:00 2001 From: Neville Dipale Date: Mon, 12 Jul 2021 01:49:47 +0200 Subject: [PATCH] support all primitive types through logical types --- parquet_derive/src/lib.rs | 2 +- parquet_derive/src/parquet_field.rs | 36 ++++++++++++++++---- parquet_derive_test/src/lib.rs | 51 ++++++++++++++++++++--------- 3 files changed, 67 insertions(+), 22 deletions(-) diff --git a/parquet_derive/src/lib.rs b/parquet_derive/src/lib.rs index 450af7b30d83..1c53227e7977 100644 --- a/parquet_derive/src/lib.rs +++ b/parquet_derive/src/lib.rs @@ -127,7 +127,7 @@ pub fn parquet_record_writer(input: proc_macro::TokenStream) -> proc_macro::Toke use parquet::schema::types::Type as ParquetType; use parquet::schema::types::TypePtr; use parquet::basic::LogicalType; - use parquet::basic::Repetition; + use parquet::basic::*; let mut fields: Vec = Vec::new(); #( diff --git a/parquet_derive/src/parquet_field.rs b/parquet_derive/src/parquet_field.rs index b27faee39565..6f2fa0c7d4cb 100644 --- a/parquet_derive/src/parquet_field.rs +++ b/parquet_derive/src/parquet_field.rs @@ -245,7 +245,12 @@ impl Field { } else if is_a_byte_buf { quote! { Some((&inner[..]).into())} } else { - quote! { Some(inner) } + // Type might need converting to a physical type + match self.ty.physical_type() { + parquet::basic::Type::INT32 => quote! { Some(inner as i32) }, + parquet::basic::Type::INT64 => quote! { Some(inner as i64) }, + _ => quote! { Some(inner) }, + } }; quote! { @@ -276,7 +281,12 @@ impl Field { } else if is_a_byte_buf { quote! { (&rec.#field_name[..]).into() } } else { - quote! { rec.#field_name } + // Type might need converting to a physical type + match self.ty.physical_type() { + parquet::basic::Type::INT32 => quote! { rec.#field_name as i32 }, + parquet::basic::Type::INT64 => quote! { rec.#field_name as i64 }, + _ => quote! { rec.#field_name }, + } }; quote! { @@ -447,7 +457,14 @@ impl Type { "bool" => BasicType::BOOLEAN, "u8" | "u16" | "u32" => BasicType::INT32, "i8" | "i16" | "i32" | "NaiveDate" => BasicType::INT32, - "u64" | "i64" | "usize" | "NaiveDateTime" => BasicType::INT64, + "u64" | "i64" | "NaiveDateTime" => BasicType::INT64, + "usize" | "isize" => { + if usize::BITS == 64 { + BasicType::INT64 + } else { + BasicType::INT32 + } + } "f32" => BasicType::FLOAT, "f64" => BasicType::DOUBLE, "String" | "str" | "Uuid" => BasicType::BYTE_ARRAY, @@ -626,7 +643,7 @@ mod test { assert_eq!(snippet, (quote!{ { - let vals : Vec < _ > = records . iter ( ) . map ( | rec | rec . counter ) . collect ( ); + let vals : Vec < _ > = records . iter ( ) . map ( | rec | rec . counter as i64 ) . collect ( ); if let parquet::column::writer::ColumnWriter::Int64ColumnWriter ( ref mut typed ) = column_writer { typed . write_batch ( & vals [ .. ] , None , None ) ?; @@ -706,7 +723,7 @@ mod test { let vals: Vec <_> = records.iter().filter_map( |rec| { if let Some ( inner ) = rec . optional_dumb_int { - Some ( inner ) + Some ( inner as i32 ) } else { None } @@ -757,12 +774,13 @@ mod test { struct ABasicStruct { yes_no: bool, name: String, + length: usize } }; let fields = extract_fields(snippet); let processed: Vec<_> = fields.iter().map(|field| Field::from(field)).collect(); - assert_eq!(processed.len(), 2); + assert_eq!(processed.len(), 3); assert_eq!( processed, @@ -778,6 +796,12 @@ mod test { ty: Type::TypePath(syn::parse_quote!(String)), is_a_byte_buf: true, third_party_type: None, + }, + Field { + ident: syn::Ident::new("length", proc_macro2::Span::call_site()), + ty: Type::TypePath(syn::parse_quote!(usize)), + is_a_byte_buf: false, + third_party_type: None, } ] ) diff --git a/parquet_derive_test/src/lib.rs b/parquet_derive_test/src/lib.rs index a65ef96e5532..bc8e9147d154 100644 --- a/parquet_derive_test/src/lib.rs +++ b/parquet_derive_test/src/lib.rs @@ -32,11 +32,18 @@ struct ACompleteRecord<'a> { pub a_borrowed_string: &'a String, pub maybe_a_str: Option<&'a str>, pub maybe_a_string: Option, - pub magic_number: i32, - pub low_quality_pi: f32, - pub high_quality_pi: f64, - pub maybe_pi: Option, - pub maybe_best_pi: Option, + pub i16: i16, + pub i32: i32, + pub u64: u64, + pub maybe_u8: Option, + pub maybe_i16: Option, + pub maybe_u32: Option, + pub maybe_usize: Option, + pub isize: isize, + pub float: f32, + pub double: f64, + pub maybe_float: Option, + pub maybe_double: Option, pub borrowed_maybe_a_string: &'a Option, pub borrowed_maybe_a_str: &'a Option<&'a str>, } @@ -67,11 +74,18 @@ mod tests { REQUIRED BINARY a_borrowed_string (STRING); OPTIONAL BINARY maybe_a_str (STRING); OPTIONAL BINARY maybe_a_string (STRING); - REQUIRED INT32 magic_number; - REQUIRED FLOAT low_quality_pi; - REQUIRED DOUBLE high_quality_pi; - OPTIONAL FLOAT maybe_pi; - OPTIONAL DOUBLE maybe_best_pi; + REQUIRED INT32 i16 (INTEGER(16,true)); + REQUIRED INT32 i32; + REQUIRED INT64 u64 (INTEGER(64,false)); + OPTIONAL INT32 maybe_u8 (INTEGER(8,false)); + OPTIONAL INT32 maybe_i16 (INTEGER(16,true)); + OPTIONAL INT32 maybe_u32 (INTEGER(32,false)); + OPTIONAL INT64 maybe_usize (INTEGER(64,false)); + REQUIRED INT64 isize (INTEGER(64,true)); + REQUIRED FLOAT float; + REQUIRED DOUBLE double; + OPTIONAL FLOAT maybe_float; + OPTIONAL DOUBLE maybe_double; OPTIONAL BINARY borrowed_maybe_a_string (STRING); OPTIONAL BINARY borrowed_maybe_a_str (STRING); }"; @@ -88,11 +102,18 @@ mod tests { a_borrowed_string: &a_borrowed_string, maybe_a_str: Some(&a_str[..]), maybe_a_string: Some(a_str.clone()), - magic_number: 100, - low_quality_pi: 3.14, - high_quality_pi: 3.1415, - maybe_pi: Some(3.14), - maybe_best_pi: Some(3.1415), + i16: -45, + i32: 456, + u64: 4563424, + maybe_u8: None, + maybe_i16: Some(3), + maybe_u32: None, + maybe_usize: Some(4456), + isize: -365, + float: 3.5, + double: std::f64::NAN, + maybe_float: None, + maybe_double: Some(std::f64::MAX), borrowed_maybe_a_string: &maybe_a_string, borrowed_maybe_a_str: &maybe_a_str, }];