From e7e50dfd8cc71633f77830e247ee4de0dc33079c Mon Sep 17 00:00:00 2001 From: Helgi Kristvin Sigurbjarnarson Date: Tue, 26 Oct 2021 13:29:27 -0700 Subject: [PATCH] fix: fix a bug in offset calculation for unions (#863) The `value_offset` function only read the least significant byte in the offset array, causing issues with unions with more than 255 rows of any given variant. Fix the issue by reading the entire i32 offset and add a unit test. --- arrow/src/array/array_union.rs | 47 ++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/arrow/src/array/array_union.rs b/arrow/src/array/array_union.rs index ba563ec796b4..6460e072646b 100644 --- a/arrow/src/array/array_union.rs +++ b/arrow/src/array/array_union.rs @@ -24,7 +24,6 @@ use crate::error::{ArrowError, Result}; use core::fmt; use std::any::Any; -use std::mem::size_of; /// An Array that can represent slots of varying types. /// @@ -177,7 +176,9 @@ impl UnionArray { Some(b) => b.count_set_bits_offset(0, index), None => index, }; - self.data().buffers()[1].as_slice()[valid_slots * size_of::()] as i32 + // safety: reinterpreting is safe since the offset buffer contains `i32` values and is + // properly aligned. + unsafe { self.data().buffers()[1].typed_data::()[valid_slots] } } else { index as i32 } @@ -334,6 +335,48 @@ mod tests { } } + #[test] + fn test_dense_i32_large() { + let mut builder = UnionBuilder::new_dense(1024); + + let expected_type_ids = vec![0_i8; 1024]; + let expected_value_offsets: Vec<_> = (0..1024).collect(); + let expected_array_values: Vec<_> = (1..=1024).collect(); + + expected_array_values + .iter() + .for_each(|v| builder.append::("a", *v).unwrap()); + + let union = builder.build().unwrap(); + + // Check type ids + assert_eq!( + union.data().buffers()[0], + Buffer::from_slice_ref(&expected_type_ids) + ); + for (i, id) in expected_type_ids.iter().enumerate() { + assert_eq!(id, &union.type_id(i)); + } + + // Check offsets + assert_eq!( + union.data().buffers()[1], + Buffer::from_slice_ref(&expected_value_offsets) + ); + for (i, id) in expected_value_offsets.iter().enumerate() { + assert_eq!(&union.value_offset(i), id); + } + + for (i, expected_value) in expected_array_values.iter().enumerate() { + assert!(!union.is_null(i)); + let slot = union.value(i); + let slot = slot.as_any().downcast_ref::().unwrap(); + assert_eq!(slot.len(), 1); + let value = slot.value(0); + assert_eq!(expected_value, &value); + } + } + #[test] fn test_dense_mixed() { let mut builder = UnionBuilder::new_dense(7);