From 9d618fa086400766a5467251236002ec88e1f48f Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 21 Mar 2023 23:15:19 +0000 Subject: [PATCH] Array equality for &dyn Array (#3880) --- arrow-array/src/array/mod.rs | 4 +- arrow/tests/array_equal.rs | 214 ++++++++++++++--------------------- 2 files changed, 88 insertions(+), 130 deletions(-) diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index 1ddcc2881863..9afefc07f8d4 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -425,13 +425,13 @@ pub trait ArrayAccessor: Array { unsafe fn value_unchecked(&self, index: usize) -> Self::Item; } -impl PartialEq for dyn Array { +impl PartialEq for dyn Array + '_ { fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) } } -impl PartialEq for dyn Array { +impl PartialEq for dyn Array + '_ { fn eq(&self, other: &T) -> bool { self.data().eq(other.data()) } diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs index d24a24e2ea48..b6f81f6a4c1a 100644 --- a/arrow/tests/array_equal.rs +++ b/arrow/tests/array_equal.rs @@ -23,6 +23,7 @@ use arrow::array::{ }; use arrow::datatypes::{Int16Type, Int32Type}; use arrow_array::builder::{StringBuilder, StructBuilder}; +use arrow_array::{DictionaryArray, FixedSizeListArray}; use arrow_buffer::{Buffer, ToByteSlice}; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{DataType, Field}; @@ -31,14 +32,11 @@ use std::sync::Arc; #[test] fn test_null_equal() { let a = NullArray::new(12); - let a = a.data(); let b = NullArray::new(12); - let b = b.data(); - test_equal(a, b, true); + test_equal(&a, &b, true); let b = NullArray::new(10); - let b = b.data(); - test_equal(a, b, false); + test_equal(&a, &b, false); // Test the case where offset != 0 @@ -54,69 +52,53 @@ fn test_null_equal() { #[test] fn test_boolean_equal() { let a = BooleanArray::from(vec![false, false, true]); - let a = a.data(); let b = BooleanArray::from(vec![false, false, true]); - let b = b.data(); - test_equal(a, b, true); + test_equal(&a, &b, true); let b = BooleanArray::from(vec![false, false, false]); - let b = b.data(); - test_equal(a, b, false); + test_equal(&a, &b, false); } #[test] fn test_boolean_equal_nulls() { let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]); - let a = a.data(); let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]); - let b = b.data(); - test_equal(a, b, true); + test_equal(&a, &b, true); let b = BooleanArray::from(vec![None, None, None, Some(true)]); - let b = b.data(); - test_equal(a, b, false); + test_equal(&a, &b, false); let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]); - let b = b.data(); - test_equal(a, b, false); + test_equal(&a, &b, false); } #[test] fn test_boolean_equal_offset() { let a = BooleanArray::from(vec![false, true, false, true, false, false, true]); - let a = a.data(); let b = BooleanArray::from(vec![true, false, false, false, true, false, true, true]); - let b = b.data(); - assert_ne!(a, b); - assert_ne!(b, a); + test_equal(&a, &b, false); let a_slice = a.slice(2, 3); let b_slice = b.slice(3, 3); - assert_eq!(a_slice, b_slice); - assert_eq!(b_slice, a_slice); + test_equal(&a_slice, &b_slice, true); let a_slice = a.slice(3, 4); let b_slice = b.slice(4, 4); - assert_ne!(a_slice, b_slice); - assert_ne!(b_slice, a_slice); + test_equal(&a_slice, &b_slice, false); // Test the optimization cases where null_count == 0 and starts at 0 and len >= size_of(u8) // Elements fill in `u8`'s exactly. let mut vector = vec![false, false, true, true, true, true, true, true]; let a = BooleanArray::from(vector.clone()); - let a = a.data(); let b = BooleanArray::from(vector.clone()); - let b = b.data(); - test_equal(a, b, true); + test_equal(&a, &b, true); // Elements fill in `u8`s + suffix bits. vector.push(true); let a = BooleanArray::from(vector.clone()); - let a = a.data(); let b = BooleanArray::from(vector); - let b = b.data(); - test_equal(a, b, true); + test_equal(&a, &b, true); } #[test] @@ -151,10 +133,8 @@ fn test_primitive() { for (lhs, rhs, expected) in cases { let lhs = Int32Array::from(lhs); - let lhs = lhs.data(); let rhs = Int32Array::from(rhs); - let rhs = rhs.data(); - test_equal(lhs, rhs, expected); + test_equal(&lhs, &rhs, expected); } } @@ -207,10 +187,8 @@ fn test_primitive_slice() { for (lhs, slice_lhs, rhs, slice_rhs, expected) in cases { let lhs = Int32Array::from(lhs); - let lhs = lhs.data(); let lhs = lhs.slice(slice_lhs.0, slice_lhs.1); let rhs = Int32Array::from(rhs); - let rhs = rhs.data(); let rhs = rhs.slice(slice_rhs.0, slice_rhs.1); test_equal(&lhs, &rhs, expected); @@ -218,7 +196,7 @@ fn test_primitive_slice() { } #[allow(clippy::eq_op)] -fn test_equal(lhs: &ArrayData, rhs: &ArrayData, expected: bool) { +fn test_equal(lhs: &dyn Array, rhs: &dyn Array, expected: bool) { // equality is symmetric assert_eq!(lhs, lhs); assert_eq!(rhs, rhs); @@ -275,10 +253,8 @@ fn test_generic_string_equal() { for (lhs, rhs, expected) in cases { let lhs: GenericStringArray = lhs.into_iter().collect(); - let lhs = lhs.data(); let rhs: GenericStringArray = rhs.into_iter().collect(); - let rhs = rhs.data(); - test_equal(lhs, rhs, expected); + test_equal(&lhs, &rhs, expected); } } @@ -305,10 +281,8 @@ fn test_generic_binary_equal() { .map(|x| x.as_deref().map(|x| x.as_bytes())) .collect(); let lhs = GenericBinaryArray::::from_opt_vec(lhs); - let lhs = lhs.data(); let rhs = GenericBinaryArray::::from_opt_vec(rhs); - let rhs = rhs.data(); - test_equal(lhs, rhs, expected); + test_equal(&lhs, &rhs, expected); } } @@ -326,32 +300,26 @@ fn test_large_binary_equal() { fn test_fixed_size_binary_array() { let a_input_arg = vec![vec![1, 2], vec![3, 4], vec![5, 6]]; let a = FixedSizeBinaryArray::try_from_iter(a_input_arg.into_iter()).unwrap(); - let a = a.data(); let b_input_arg = vec![vec![1, 2], vec![3, 4], vec![5, 6]]; let b = FixedSizeBinaryArray::try_from_iter(b_input_arg.into_iter()).unwrap(); - let b = b.data(); - test_equal(a, b, true); + test_equal(&a, &b, true); } #[test] fn test_string_offset() { let a = StringArray::from(vec![Some("a"), None, Some("b")]); - let a = a.data(); let a = a.slice(2, 1); let b = StringArray::from(vec![Some("b")]); - let b = b.data(); - test_equal(&a, b, true); + test_equal(&a, &b, true); } #[test] fn test_string_offset_larger() { let a = StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]); - let a = a.data(); let b = StringArray::from(vec![None, Some("b"), None, Some("c")]); - let b = b.data(); test_equal(&a.slice(2, 2), &b.slice(0, 2), false); test_equal(&a.slice(2, 2), &b.slice(1, 2), true); @@ -361,17 +329,14 @@ fn test_string_offset_larger() { #[test] fn test_null() { let a = NullArray::new(2); - let a = a.data(); let b = NullArray::new(2); - let b = b.data(); - test_equal(a, b, true); + test_equal(&a, &b, true); let b = NullArray::new(1); - let b = b.data(); - test_equal(a, b, false); + test_equal(&a, &b, false); } -fn create_list_array, T: AsRef<[Option]>>(data: T) -> ArrayData { +fn create_list_array, T: AsRef<[Option]>>(data: T) -> ListArray { let mut builder = ListBuilder::new(Int32Builder::with_capacity(10)); for d in data.as_ref() { if let Some(v) = d { @@ -381,7 +346,7 @@ fn create_list_array, T: AsRef<[Option]>>(data: T) -> ArrayDa builder.append(false); } } - builder.finish().into_data() + builder.finish() } #[test] @@ -400,7 +365,7 @@ fn test_empty_offsets_list_equal() { let values = Int32Array::from(empty); let empty_offsets: [u8; 0] = []; - let a = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( + let a: ListArray = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( "item", DataType::Int32, true, @@ -410,9 +375,10 @@ fn test_empty_offsets_list_equal() { .add_child_data(values.data().clone()) .null_bit_buffer(Some(Buffer::from(&empty_offsets))) .build() - .unwrap(); + .unwrap() + .into(); - let b = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( + let b: ListArray = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( "item", DataType::Int32, true, @@ -422,11 +388,12 @@ fn test_empty_offsets_list_equal() { .add_child_data(values.data().clone()) .null_bit_buffer(Some(Buffer::from(&empty_offsets))) .build() - .unwrap(); + .unwrap() + .into(); test_equal(&a, &b, true); - let c = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( + let c: ListArray = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( "item", DataType::Int32, true, @@ -440,7 +407,8 @@ fn test_empty_offsets_list_equal() { ) .null_bit_buffer(Some(Buffer::from(vec![0b00001001]))) .build() - .unwrap(); + .unwrap() + .into(); test_equal(&a, &c, true); } @@ -467,7 +435,7 @@ fn test_list_null() { // a list where the nullness of values is determined by the list's bitmap let c_values = Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]); - let c = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( + let c: ListArray = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( "item", DataType::Int32, true, @@ -477,7 +445,8 @@ fn test_list_null() { .add_child_data(c_values.into_data()) .null_bit_buffer(Some(Buffer::from(vec![0b00001001]))) .build() - .unwrap(); + .unwrap() + .into(); let d_values = Int32Array::from(vec![ Some(1), @@ -489,7 +458,7 @@ fn test_list_null() { None, None, ]); - let d = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( + let d: ListArray = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( "item", DataType::Int32, true, @@ -499,7 +468,8 @@ fn test_list_null() { .add_child_data(d_values.into_data()) .null_bit_buffer(Some(Buffer::from(vec![0b00001001]))) .build() - .unwrap(); + .unwrap() + .into(); test_equal(&c, &d, true); } @@ -524,7 +494,7 @@ fn test_list_offsets() { fn create_fixed_size_binary_array, T: AsRef<[Option]>>( data: T, -) -> ArrayData { +) -> FixedSizeBinaryArray { let mut builder = FixedSizeBinaryBuilder::with_capacity(data.as_ref().len(), 5); for d in data.as_ref() { @@ -534,7 +504,7 @@ fn create_fixed_size_binary_array, T: AsRef<[Option]>>( builder.append_null(); } } - builder.finish().into_data() + builder.finish() } #[test] @@ -598,12 +568,11 @@ fn test_fixed_size_binary_offsets() { test_equal(&a_slice, &b_slice, false); } -fn create_decimal_array(data: Vec>) -> ArrayData { +fn create_decimal_array(data: Vec>) -> Decimal128Array { data.into_iter() .collect::() .with_precision_and_scale(23, 6) .unwrap() - .into() } #[test] @@ -687,7 +656,7 @@ fn test_decimal_offsets() { /// Create a fixed size list of 2 value lengths fn create_fixed_size_list_array, T: AsRef<[Option]>>( data: T, -) -> ArrayData { +) -> FixedSizeListArray { let mut builder = FixedSizeListBuilder::new(Int32Builder::with_capacity(10), 3); for d in data.as_ref() { @@ -701,7 +670,7 @@ fn create_fixed_size_list_array, T: AsRef<[Option]>>( builder.append(false); } } - builder.finish().into_data() + builder.finish() } #[test] @@ -813,12 +782,10 @@ fn test_struct_equal() { let a = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) .unwrap(); - let a = a.data(); let b = StructArray::try_from(vec![("f1", strings), ("f2", ints)]).unwrap(); - let b = b.data(); - test_equal(a, b, true); + test_equal(&a, &b, true); } #[test] @@ -845,8 +812,8 @@ fn test_struct_equal_null() { ])) .null_bit_buffer(Some(Buffer::from(vec![0b00001011]))) .len(5) - .add_child_data(strings.data_ref().clone()) - .add_child_data(ints.data_ref().clone()) + .add_child_data(strings.to_data()) + .add_child_data(ints.to_data()) .build() .unwrap(); let a = make_array(a); @@ -857,13 +824,13 @@ fn test_struct_equal_null() { ])) .null_bit_buffer(Some(Buffer::from(vec![0b00001011]))) .len(5) - .add_child_data(strings.data_ref().clone()) - .add_child_data(ints_non_null.data_ref().clone()) + .add_child_data(strings.to_data()) + .add_child_data(ints_non_null.to_data()) .build() .unwrap(); let b = make_array(b); - test_equal(a.data_ref(), b.data_ref(), true); + test_equal(&a, &b, true); // test with arrays that are not equal let c_ints_non_null: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 0, 4])); @@ -873,13 +840,13 @@ fn test_struct_equal_null() { ])) .null_bit_buffer(Some(Buffer::from(vec![0b00001011]))) .len(5) - .add_child_data(strings.data_ref().clone()) - .add_child_data(c_ints_non_null.data_ref().clone()) + .add_child_data(strings.to_data()) + .add_child_data(c_ints_non_null.to_data()) .build() .unwrap(); let c = make_array(c); - test_equal(a.data_ref(), c.data_ref(), false); + test_equal(&a, &c, false); // test a nested struct let a = ArrayData::builder(DataType::Struct(vec![Field::new( @@ -908,8 +875,8 @@ fn test_struct_equal_null() { ])) .null_bit_buffer(Some(Buffer::from(vec![0b00001011]))) .len(5) - .add_child_data(strings.data_ref().clone()) - .add_child_data(ints_non_null.data_ref().clone()) + .add_child_data(strings.to_data()) + .add_child_data(ints_non_null.to_data()) .build() .unwrap(); @@ -925,7 +892,7 @@ fn test_struct_equal_null() { .unwrap(); let b = make_array(b); - test_equal(a.data_ref(), b.data_ref(), true); + test_equal(&a, &b, true); } #[test] @@ -970,7 +937,7 @@ fn test_struct_equal_null_variable_size() { .unwrap(); let b = make_array(b); - test_equal(a.data_ref(), b.data_ref(), true); + test_equal(&a, &b, true); // test with arrays that are not equal let strings3: ArrayRef = Arc::new(StringArray::from(vec![ @@ -987,15 +954,18 @@ fn test_struct_equal_null_variable_size() { )])) .null_bit_buffer(Some(Buffer::from(vec![0b00001011]))) .len(5) - .add_child_data(strings3.data_ref().clone()) + .add_child_data(strings3.to_data()) .build() .unwrap(); let c = make_array(c); - test_equal(a.data_ref(), c.data_ref(), false); + test_equal(&a, &c, false); } -fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData { +fn create_dictionary_array( + values: &[&str], + keys: &[Option<&str>], +) -> DictionaryArray { let values = StringArray::from(values.to_vec()); let mut builder = StringDictionaryBuilder::::new_with_dictionary(keys.len(), &values) @@ -1007,7 +977,7 @@ fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData builder.append_null() } } - builder.finish().into_data() + builder.finish() } #[test] @@ -1085,36 +1055,24 @@ fn test_dictionary_equal_null() { #[test] fn test_non_null_empty_strings() { - let s = StringArray::from(vec![Some(""), Some(""), Some("")]); - - let string1 = s.data(); - - let string2 = ArrayData::builder(DataType::Utf8) - .len(string1.len()) - .buffers(string1.buffers().to_vec()) - .build() - .unwrap(); + let s1 = StringArray::from(vec![Some(""), Some(""), Some("")]); + let data = s1.to_data().into_builder().nulls(None).build().unwrap(); + let s2 = StringArray::from(data); - // string2 is identical to string1 except that it has no validity buffer but since there - // are no nulls, string1 and string2 are equal - test_equal(string1, &string2, true); + // s2 is identical to s1 except that it has no validity buffer but since there + // are no nulls, s1 and s2 are equal + test_equal(&s1, &s2, true); } #[test] fn test_null_empty_strings() { - let s = StringArray::from(vec![Some(""), None, Some("")]); + let s1 = StringArray::from(vec![Some(""), None, Some("")]); + let data = s1.to_data().into_builder().nulls(None).build().unwrap(); + let s2 = StringArray::from(data); - let string1 = s.data(); - - let string2 = ArrayData::builder(DataType::Utf8) - .len(string1.len()) - .buffers(string1.buffers().to_vec()) - .build() - .unwrap(); - - // string2 is identical to string1 except that it has no validity buffer since string1 has - // nulls in it, string1 and string2 are not equal - test_equal(string1, &string2, false); + // s2 is identical to s1 except that it has no validity buffer since string1 has + // nulls in it, s1 and s2 are not equal + test_equal(&s1, &s2, false); } #[test] @@ -1159,9 +1117,9 @@ fn test_union_equal_dense() { builder.append::("b", 7).unwrap(); let union4 = builder.build().unwrap(); - test_equal(union1.data(), union2.data(), true); - test_equal(union1.data(), union3.data(), false); - test_equal(union1.data(), union4.data(), false); + test_equal(&union1, &union2, true); + test_equal(&union1, &union3, false); + test_equal(&union1, &union4, false); } #[test] @@ -1206,22 +1164,22 @@ fn test_union_equal_sparse() { builder.append::("b", 7).unwrap(); let union4 = builder.build().unwrap(); - test_equal(union1.data(), union2.data(), true); - test_equal(union1.data(), union3.data(), false); - test_equal(union1.data(), union4.data(), false); + test_equal(&union1, &union2, true); + test_equal(&union1, &union3, false); + test_equal(&union1, &union4, false); } #[test] fn test_boolean_slice() { let array = BooleanArray::from(vec![true; 32]); let slice = array.slice(4, 12); - assert_eq!(slice.data(), slice.data()); + assert_eq!(&slice, &slice); let slice = array.slice(8, 12); - assert_eq!(slice.data(), slice.data()); + assert_eq!(&slice, &slice); let slice = array.slice(8, 24); - assert_eq!(slice.data(), slice.data()); + assert_eq!(&slice, &slice); } #[test] @@ -1230,7 +1188,7 @@ fn test_sliced_nullable_boolean_array() { let b = BooleanArray::from(vec![true; 32]); let slice_a = a.slice(1, 12); let slice_b = b.slice(1, 12); - assert_ne!(slice_a.data(), slice_b.data()); + assert_ne!(&slice_a, &slice_b); } #[test] @@ -1333,5 +1291,5 @@ fn test_struct_equal_slice() { ]); assert_eq!(a, &b); - test_equal(a.data(), b.data(), true); + test_equal(&a, &b, true); }