From 66ea28b175c19e5ac97db0f8717886dc4c368991 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 9 Jun 2022 10:38:16 +0100 Subject: [PATCH 1/2] Replace RawPtrBox with ScalarBuffer (#1811) Fix value_offsets for empty variable length arrays (#1824) --- arrow/src/array/array.rs | 4 +- arrow/src/array/array_binary.rs | 93 ++++++++++++------------------ arrow/src/array/array_boolean.rs | 16 ++--- arrow/src/array/array_list.rs | 53 ++++++++--------- arrow/src/array/array_map.rs | 49 ++++++---------- arrow/src/array/array_primitive.rs | 28 +++------ arrow/src/array/array_string.rs | 75 +++++++++++++----------- arrow/src/array/mod.rs | 1 - arrow/src/array/raw_pointer.rs | 67 --------------------- arrow/src/buffer/scalar.rs | 16 +++++ 10 files changed, 153 insertions(+), 249 deletions(-) delete mode 100644 arrow/src/array/raw_pointer.rs diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs index f28aba59d73e..a12e1d29e738 100644 --- a/arrow/src/array/array.rs +++ b/arrow/src/array/array.rs @@ -731,7 +731,7 @@ mod tests { let array = new_empty_array(&DataType::Utf8); let a = array.as_any().downcast_ref::().unwrap(); assert_eq!(a.len(), 0); - assert_eq!(a.value_offsets()[0], 0i32); + assert_eq!(a.value_offsets().len(), 0); } #[test] @@ -741,7 +741,7 @@ mod tests { let array = new_empty_array(&data_type); let a = array.as_any().downcast_ref::().unwrap(); assert_eq!(a.len(), 0); - assert_eq!(a.value_offsets()[0], 0i32); + assert_eq!(a.value_offsets().len(), 0); } #[test] diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index af55854a5319..4d34aa58d788 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -22,11 +22,11 @@ use std::{any::Any, iter::FromIterator}; use super::BooleanBufferBuilder; use super::{ - array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, - FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait, + array::print_long_array, Array, ArrayData, FixedSizeListArray, GenericBinaryIter, + GenericListArray, OffsetSizeTrait, }; pub use crate::array::DecimalIter; -use crate::buffer::Buffer; +use crate::buffer::{Buffer, ScalarBuffer}; use crate::datatypes::{ validate_decimal_precision, DECIMAL_DEFAULT_SCALE, DECIMAL_MAX_PRECISION, DECIMAL_MAX_SCALE, @@ -39,8 +39,8 @@ use crate::{buffer::MutableBuffer, datatypes::DataType}; /// binary data. pub struct GenericBinaryArray { data: ArrayData, - value_offsets: RawPtrBox, - value_data: RawPtrBox, + value_offsets: ScalarBuffer, + value_data: Buffer, } impl GenericBinaryArray { @@ -64,67 +64,40 @@ impl GenericBinaryArray { /// Returns a clone of the value data buffer pub fn value_data(&self) -> Buffer { - self.data.buffers()[1].clone() + self.value_data.clone() } /// Returns the offset values in the offsets buffer #[inline] pub fn value_offsets(&self) -> &[OffsetSize] { - // Soundness - // pointer alignment & location is ensured by RawPtrBox - // buffer bounds/offset is ensured by the ArrayData instance. - unsafe { - std::slice::from_raw_parts( - self.value_offsets.as_ptr().add(self.data.offset()), - self.len() + 1, - ) - } + &self.value_offsets } /// Returns the element at index `i` as bytes slice /// # Safety /// Caller is responsible for ensuring that the index is within the bounds of the array pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] { - let end = *self.value_offsets().get_unchecked(i + 1); - let start = *self.value_offsets().get_unchecked(i); - - // Soundness - // pointer alignment & location is ensured by RawPtrBox - // buffer bounds/offset is ensured by the value_offset invariants - // Safety of `to_isize().unwrap()` // `start` and `end` are &OffsetSize, which is a generic type that implements the // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait, // both of which should cleanly cast to isize on an architecture that supports // 32/64-bit offsets - std::slice::from_raw_parts( - self.value_data.as_ptr().offset(start.to_isize().unwrap()), - (end - start).to_usize().unwrap(), - ) + let start = self.value_offsets.get_unchecked(i).to_isize().unwrap(); + let end = self.value_offsets.get_unchecked(i + 1).to_isize().unwrap(); + + // buffer bounds/offset is ensured by the value_offset invariants + self.value_data.get_unchecked(start as usize..end as usize) } /// Returns the element at index `i` as bytes slice pub fn value(&self, i: usize) -> &[u8] { - assert!(i < self.data.len(), "BinaryArray out of bounds access"); - //Soundness: length checked above, offset buffer length is 1 larger than logical array length - let end = unsafe { self.value_offsets().get_unchecked(i + 1) }; - let start = unsafe { self.value_offsets().get_unchecked(i) }; - - // Soundness - // pointer alignment & location is ensured by RawPtrBox - // buffer bounds/offset is ensured by the value_offset invariants - - // Safety of `to_isize().unwrap()` - // `start` and `end` are &OffsetSize, which is a generic type that implements the - // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait, - // both of which should cleanly cast to isize on an architecture that supports - // 32/64-bit offsets - unsafe { - std::slice::from_raw_parts( - self.value_data.as_ptr().offset(start.to_isize().unwrap()), - (*end - *start).to_usize().unwrap(), - ) - } + assert!( + i < self.data.len(), + "BinaryArray index out of bounds: the len is {} but the index is {}", + self.data.len(), + i + ); + unsafe { self.value_unchecked(i) } } /// Creates a [GenericBinaryArray] from a vector of byte slices @@ -259,12 +232,14 @@ impl From for GenericBinaryArray From> for GenericBinaryArray { /// pub struct FixedSizeBinaryArray { data: ArrayData, - value_data: RawPtrBox, + value_data: Buffer, length: i32, } @@ -662,14 +637,15 @@ impl From for FixedSizeBinaryArray { 1, "FixedSizeBinaryArray data should contain 1 buffer only (values)" ); - let value_data = data.buffers()[0].as_ptr(); + // TODO: Materialize offset into value_data + let value_data = data.buffers()[0].clone(); let length = match data.data_type() { DataType::FixedSizeBinary(len) => *len, _ => panic!("Expected data type to be FixedSizeBinary"), }; Self { data, - value_data: unsafe { RawPtrBox::new(value_data) }, + value_data, length, } } @@ -756,7 +732,7 @@ impl Array for FixedSizeBinaryArray { /// pub struct DecimalArray { data: ArrayData, - value_data: RawPtrBox, + value_data: Buffer, precision: usize, scale: usize, length: i32, @@ -949,15 +925,16 @@ impl From for DecimalArray { 1, "DecimalArray data should contain 1 buffer only (values)" ); - let values = data.buffers()[0].as_ptr(); let (precision, scale) = match data.data_type() { DataType::Decimal(precision, scale) => (*precision, *scale), _ => panic!("Expected data type to be Decimal"), }; + // TODO: Materialize offset into value_data + let value_data = data.buffers()[0].clone(); let length = 16; Self { data, - value_data: unsafe { RawPtrBox::new(values) }, + value_data, precision, scale, length, @@ -1441,7 +1418,9 @@ mod tests { } #[test] - #[should_panic(expected = "BinaryArray out of bounds access")] + #[should_panic( + expected = "BinaryArray index out of bounds access: the len is 3 but the index is 4" + )] fn test_binary_array_get_value_index_out_of_bound() { let values: [u8; 12] = [104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116]; diff --git a/arrow/src/array/array_boolean.rs b/arrow/src/array/array_boolean.rs index f4e9ce28b733..c630b3b3ae48 100644 --- a/arrow/src/array/array_boolean.rs +++ b/arrow/src/array/array_boolean.rs @@ -20,8 +20,8 @@ use std::convert::From; use std::iter::{FromIterator, IntoIterator}; use std::{any::Any, fmt}; +use super::array::print_long_array; use super::*; -use super::{array::print_long_array, raw_pointer::RawPtrBox}; use crate::buffer::{Buffer, MutableBuffer}; use crate::util::bit_util; @@ -66,9 +66,7 @@ use crate::util::bit_util; /// ``` pub struct BooleanArray { data: ArrayData, - /// Pointer to the value array. The lifetime of this must be <= to the value buffer - /// stored in `data`, so it's safe to store. - raw_values: RawPtrBox, + raw_values: Buffer, } impl fmt::Debug for BooleanArray { @@ -101,7 +99,7 @@ impl BooleanArray { /// /// Note this doesn't take the offset of this array into account. pub fn values(&self) -> &Buffer { - &self.data.buffers()[0] + &self.raw_values } /// Returns the boolean value at index `i`. @@ -186,11 +184,9 @@ impl From for BooleanArray { 1, "BooleanArray data should contain a single buffer only (values buffer)" ); - let ptr = data.buffers()[0].as_ptr(); - Self { - data, - raw_values: unsafe { RawPtrBox::new(ptr) }, - } + // TODO: Hydrate offset into BitMap + let raw_values = data.buffers()[0].clone(); + Self { data, raw_values } } } diff --git a/arrow/src/array/array_list.rs b/arrow/src/array/array_list.rs index 709e4e7ba7d0..ca6bc05d61da 100644 --- a/arrow/src/array/array_list.rs +++ b/arrow/src/array/array_list.rs @@ -21,9 +21,10 @@ use std::fmt; use num::Integer; use super::{ - array::print_long_array, make_array, raw_pointer::RawPtrBox, Array, ArrayData, - ArrayRef, BooleanBufferBuilder, GenericListArrayIter, PrimitiveArray, + array::print_long_array, make_array, Array, ArrayData, ArrayRef, + BooleanBufferBuilder, GenericListArrayIter, PrimitiveArray, }; +use crate::buffer::ScalarBuffer; use crate::{ buffer::MutableBuffer, datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType, Field}, @@ -49,10 +50,10 @@ impl OffsetSizeTrait for i64 { /// /// /// For non generic lists, you may wish to consider using [`ListArray`] or [`LargeListArray`]` -pub struct GenericListArray { +pub struct GenericListArray { data: ArrayData, values: ArrayRef, - value_offsets: RawPtrBox, + value_offsets: ScalarBuffer, } impl GenericListArray { @@ -70,32 +71,27 @@ impl GenericListArray { /// # Safety /// Caller must ensure that the index is within the array bounds pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef { - let end = *self.value_offsets().get_unchecked(i + 1); - let start = *self.value_offsets().get_unchecked(i); + let end = *self.value_offsets.get_unchecked(i + 1); + let start = *self.value_offsets.get_unchecked(i); self.values .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap()) } /// Returns ith value of this list array. pub fn value(&self, i: usize) -> ArrayRef { - let end = self.value_offsets()[i + 1]; - let start = self.value_offsets()[i]; - self.values - .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap()) + assert!( + i < self.data.len(), + "GenericListArray index out of bounds: the len is {} but the index is {}", + self.data.len(), + i + ); + unsafe { self.value_unchecked(i) } } /// Returns the offset values in the offsets buffer #[inline] pub fn value_offsets(&self) -> &[OffsetSize] { - // Soundness - // pointer alignment & location is ensured by RawPtrBox - // buffer bounds/offset is ensured by the ArrayData instance. - unsafe { - std::slice::from_raw_parts( - self.value_offsets.as_ptr().add(self.data.offset()), - self.len() + 1, - ) - } + &self.value_offsets } /// Returns the length for value at index `i`. @@ -227,8 +223,10 @@ impl GenericListArray { } let values = make_array(values); - let value_offsets = data.buffers()[0].as_ptr(); - let value_offsets = unsafe { RawPtrBox::::new(value_offsets) }; + let offsets = data.buffers()[0].clone(); + let offsets_len = data.is_empty().then(|| 0).unwrap_or(data.len() + 1); + let value_offsets = ScalarBuffer::new(offsets, data.offset(), offsets_len); + Ok(Self { data, values, @@ -940,7 +938,7 @@ mod tests { } #[test] - #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")] + #[should_panic(expected = "index out of bounds: the len is 9 but the index is 10")] fn test_list_array_index_out_of_bound() { // Construct a value array let value_data = ArrayData::builder(DataType::Int32) @@ -1142,12 +1140,13 @@ mod tests { } #[test] - #[should_panic(expected = "memory is not aligned")] + #[should_panic(expected = "buffer is not aligned to 4 byte boundary")] fn test_primitive_array_alignment() { let ptr = alloc::allocate_aligned::(8); - let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; + let buf = unsafe { Buffer::from_raw_parts(ptr, 9, 9) }; let buf2 = buf.slice(1); let array_data = ArrayData::builder(DataType::Int32) + .len(2) .add_buffer(buf2) .build() .unwrap(); @@ -1155,18 +1154,19 @@ mod tests { } #[test] - #[should_panic(expected = "memory is not aligned")] + #[should_panic(expected = "buffer is not aligned to 4 byte boundary")] // Different error messages, so skip for now // https://github.com/apache/arrow-rs/issues/1545 #[cfg(not(feature = "force_validate"))] fn test_list_array_alignment() { let ptr = alloc::allocate_aligned::(8); - let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; + let buf = unsafe { Buffer::from_raw_parts(ptr, 9, 9) }; let buf2 = buf.slice(1); let values: [i32; 8] = [0; 8]; let value_data = unsafe { ArrayData::builder(DataType::Int32) + .len(1) .add_buffer(Buffer::from_slice_ref(&values)) .build_unchecked() }; @@ -1175,6 +1175,7 @@ mod tests { DataType::List(Box::new(Field::new("item", DataType::Int32, false))); let list_data = unsafe { ArrayData::builder(list_data_type) + .len(1) .add_buffer(buf2) .add_child_data(value_data) .build_unchecked() diff --git a/arrow/src/array/array_map.rs b/arrow/src/array/array_map.rs index 081362021aa0..ca816d8e8619 100644 --- a/arrow/src/array/array_map.rs +++ b/arrow/src/array/array_map.rs @@ -16,17 +16,15 @@ // under the License. use crate::array::{StringArray, StructArray}; -use crate::buffer::Buffer; +use crate::buffer::{Buffer, ScalarBuffer}; use std::any::Any; use std::fmt; use std::mem; use std::sync::Arc; use super::make_array; -use super::{ - array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, ArrayRef, -}; -use crate::datatypes::{ArrowNativeType, DataType, Field, ToByteSlice}; +use super::{array::print_long_array, Array, ArrayData, ArrayRef}; +use crate::datatypes::{DataType, Field, ToByteSlice}; use crate::error::ArrowError; /// A nested array type where each record is a key-value map. @@ -37,7 +35,7 @@ use crate::error::ArrowError; pub struct MapArray { data: ArrayData, values: ArrayRef, - value_offsets: RawPtrBox, + value_offsets: ScalarBuffer, } impl MapArray { @@ -65,31 +63,26 @@ impl MapArray { /// # Safety /// Caller must ensure that the index is within the array bounds pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef { - let end = *self.value_offsets().get_unchecked(i + 1); let start = *self.value_offsets().get_unchecked(i); - self.values - .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap()) + let end = *self.value_offsets().get_unchecked(i + 1); + self.values.slice(start as usize, (end - start) as usize) } /// Returns ith value of this map array. pub fn value(&self, i: usize) -> ArrayRef { - let end = self.value_offsets()[i + 1] as usize; - let start = self.value_offsets()[i] as usize; - self.values.slice(start, end - start) + assert!( + i < self.data.len(), + "MapArray index out of bounds: the len is {} but the index is {}", + self.data.len(), + i + ); + unsafe { self.value_unchecked(i) } } /// Returns the offset values in the offsets buffer #[inline] pub fn value_offsets(&self) -> &[i32] { - // Soundness - // pointer alignment & location is ensured by RawPtrBox - // buffer bounds/offset is ensured by the ArrayData instance. - unsafe { - std::slice::from_raw_parts( - self.value_offsets.as_ptr().add(self.data.offset()), - self.len() + 1, - ) - } + &self.value_offsets } /// Returns the length for value at index `i`. @@ -139,16 +132,10 @@ impl MapArray { } let values = make_array(entries); - let value_offsets = data.buffers()[0].as_ptr(); - - let value_offsets = unsafe { RawPtrBox::::new(value_offsets) }; - unsafe { - if (*value_offsets.as_ptr().offset(0)) != 0 { - return Err(ArrowError::InvalidArgumentError(String::from( - "offsets do not start at zero", - ))); - } - } + let offsets = data.buffers()[0].clone(); + let offsets_len = data.is_empty().then(|| 0).unwrap_or(data.len() + 1); + let value_offsets = ScalarBuffer::new(offsets, data.offset(), offsets_len); + Ok(Self { data, values, diff --git a/arrow/src/array/array_primitive.rs b/arrow/src/array/array_primitive.rs index 8893703aa853..3c81c6a54bff 100644 --- a/arrow/src/array/array_primitive.rs +++ b/arrow/src/array/array_primitive.rs @@ -24,7 +24,6 @@ use std::mem; use chrono::{prelude::*, Duration}; use super::array::print_long_array; -use super::raw_pointer::RawPtrBox; use super::*; use crate::temporal_conversions; use crate::util::bit_util; @@ -33,6 +32,7 @@ use crate::{ util::trusted_len_unzip, }; +use crate::buffer::ScalarBuffer; use half::f16; /// Array whose elements are of primitive types. @@ -59,7 +59,7 @@ pub struct PrimitiveArray { /// # Safety /// raw_values must have a value equivalent to `data.buffers()[0].raw_data()` /// raw_values must have alignment for type T::NativeType - raw_values: RawPtrBox, + raw_values: ScalarBuffer, } impl PrimitiveArray { @@ -77,15 +77,7 @@ impl PrimitiveArray { /// Returns a slice of the values of this array #[inline] pub fn values(&self) -> &[T::Native] { - // Soundness - // raw_values alignment & location is ensured by fn from(ArrayDataRef) - // buffer bounds/offset is ensured by the ArrayData instance. - unsafe { - std::slice::from_raw_parts( - self.raw_values.as_ptr().add(self.data.offset()), - self.len(), - ) - } + &self.raw_values } // Returns a new primitive array builder @@ -100,8 +92,7 @@ impl PrimitiveArray { /// caller must ensure that the passed in offset is less than the array len() #[inline] pub unsafe fn value_unchecked(&self, i: usize) -> T::Native { - let offset = i + self.offset(); - *self.raw_values.as_ptr().add(offset) + *self.raw_values.get_unchecked(i) } /// Returns the primitive value at index `i`. @@ -109,8 +100,7 @@ impl PrimitiveArray { /// Panics of offset `i` is out of bounds #[inline] pub fn value(&self, i: usize) -> T::Native { - assert!(i < self.len()); - unsafe { self.value_unchecked(i) } + self.raw_values[i] } /// Creates a PrimitiveArray based on an iterator of values without nulls @@ -565,11 +555,9 @@ impl From for PrimitiveArray { "PrimitiveArray data should contain a single buffer only (values buffer)" ); - let ptr = data.buffers()[0].as_ptr(); - Self { - data, - raw_values: unsafe { RawPtrBox::new(ptr) }, - } + let buffer = data.buffers()[0].clone(); + let raw_values = ScalarBuffer::new(buffer, data.offset(), data.len()); + Self { data, raw_values } } } diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs index 9e09350f7e68..25e462ee7af0 100644 --- a/arrow/src/array/array_string.rs +++ b/arrow/src/array/array_string.rs @@ -20,10 +20,10 @@ use std::fmt; use std::{any::Any, iter::FromIterator}; use super::{ - array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, GenericListArray, - GenericStringIter, OffsetSizeTrait, + array::print_long_array, Array, ArrayData, GenericListArray, GenericStringIter, + OffsetSizeTrait, }; -use crate::buffer::Buffer; +use crate::buffer::{Buffer, ScalarBuffer}; use crate::util::bit_util; use crate::{buffer::MutableBuffer, datatypes::DataType}; @@ -33,8 +33,8 @@ use crate::{buffer::MutableBuffer, datatypes::DataType}; /// specific string data. pub struct GenericStringArray { data: ArrayData, - value_offsets: RawPtrBox, - value_data: RawPtrBox, + value_offsets: ScalarBuffer, + value_data: Buffer, } impl GenericStringArray { @@ -59,20 +59,12 @@ impl GenericStringArray { /// Returns the offset values in the offsets buffer #[inline] pub fn value_offsets(&self) -> &[OffsetSize] { - // Soundness - // pointer alignment & location is ensured by RawPtrBox - // buffer bounds/offset is ensured by the ArrayData instance. - unsafe { - std::slice::from_raw_parts( - self.value_offsets.as_ptr().add(self.data.offset()), - self.len() + 1, - ) - } + &self.value_offsets } /// Returns a clone of the value data buffer pub fn value_data(&self) -> Buffer { - self.data.buffers()[1].clone() + self.value_data.clone() } /// Returns the number of `Unicode Scalar Value` in the string at index `i`. @@ -89,30 +81,28 @@ impl GenericStringArray { /// caller is responsible for ensuring that index is within the array bounds #[inline] pub unsafe fn value_unchecked(&self, i: usize) -> &str { - let end = self.value_offsets().get_unchecked(i + 1); - let start = self.value_offsets().get_unchecked(i); - - // Soundness - // pointer alignment & location is ensured by RawPtrBox - // buffer bounds/offset is ensured by the value_offset invariants - // ISSUE: utf-8 well formedness is not checked - // Safety of `to_isize().unwrap()` // `start` and `end` are &OffsetSize, which is a generic type that implements the // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait, // both of which should cleanly cast to isize on an architecture that supports // 32/64-bit offsets - let slice = std::slice::from_raw_parts( - self.value_data.as_ptr().offset(start.to_isize().unwrap()), - (*end - *start).to_usize().unwrap(), - ); + let start = self.value_offsets.get_unchecked(i).to_isize().unwrap(); + let end = self.value_offsets.get_unchecked(i + 1).to_isize().unwrap(); + + // buffer bounds/offset is ensured by the value_offset invariants + let slice = self.value_data.get_unchecked(start as usize..end as usize); std::str::from_utf8_unchecked(slice) } /// Returns the element at index `i` as &str #[inline] pub fn value(&self, i: usize) -> &str { - assert!(i < self.data.len(), "StringArray out of bounds access"); + assert!( + i < self.data.len(), + "StringArray index out of bounds: the len is {} but the index is {}", + self.data.len(), + i + ); // Safety: // `i < self.data.len() unsafe { self.value_unchecked(i) } @@ -306,12 +296,14 @@ impl From for GenericStringArray From> for GenericStringArray { #[cfg(test)] mod tests { - - use crate::array::{ListBuilder, StringBuilder}; + use crate::array::{ArrayDataBuilder, ListBuilder, StringBuilder}; use super::*; @@ -465,7 +456,9 @@ mod tests { } #[test] - #[should_panic(expected = "StringArray out of bounds access")] + #[should_panic( + expected = "StringArray index out of bounds access: the len is 3 but the index is 4" + )] fn test_string_array_get_value_index_out_of_bound() { let values: [u8; 12] = [ b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't', @@ -573,6 +566,18 @@ mod tests { .expect("All null array has valid array data"); } + #[test] + fn test_string_array_empty_offsets() { + let data = ArrayDataBuilder::new(DataType::Utf8) + .len(0) + .add_buffer(Buffer::from([])) + .add_buffer(Buffer::from([])) + .build() + .unwrap(); + let array = GenericStringArray::::from(data); + assert_eq!(array.value_offsets().len(), 0); + } + #[cfg(feature = "test_utils")] #[test] fn bad_size_collect_string() { diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs index 0bd32d347772..1b720e73107d 100644 --- a/arrow/src/array/mod.rs +++ b/arrow/src/array/mod.rs @@ -99,7 +99,6 @@ mod ffi; mod iterator; mod null; mod ord; -mod raw_pointer; mod transform; use crate::datatypes::*; diff --git a/arrow/src/array/raw_pointer.rs b/arrow/src/array/raw_pointer.rs deleted file mode 100644 index 1016b808bc5a..000000000000 --- a/arrow/src/array/raw_pointer.rs +++ /dev/null @@ -1,67 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::ptr::NonNull; - -/// This struct is highly `unsafe` and offers the possibility to -/// self-reference a [crate::buffer::Buffer] from -/// [crate::array::ArrayData], as a pointer to the beginning of its -/// contents. -pub(super) struct RawPtrBox { - ptr: NonNull, -} - -impl RawPtrBox { - /// # Safety - /// The user must guarantee that: - /// * the contents where `ptr` points to are never `moved`. This is guaranteed when they are Pinned. - /// * the lifetime of this struct does not outlive the lifetime of `ptr`. - /// Failure to fulfill any the above conditions results in undefined behavior. - /// # Panic - /// This function panics if: - /// * `ptr` is null - /// * `ptr` is not aligned to a slice of type `T`. This is guaranteed if it was built from a slice of type `T`. - pub(super) unsafe fn new(ptr: *const u8) -> Self { - let ptr = NonNull::new(ptr as *mut u8).expect("Pointer cannot be null"); - assert_eq!( - ptr.as_ptr().align_offset(std::mem::align_of::()), - 0, - "memory is not aligned" - ); - Self { ptr: ptr.cast() } - } - - pub(super) fn as_ptr(&self) -> *const T { - self.ptr.as_ptr() - } -} - -unsafe impl Send for RawPtrBox {} -unsafe impl Sync for RawPtrBox {} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - #[should_panic(expected = "memory is not aligned")] - #[cfg_attr(miri, ignore)] // sometimes does not panic as expected - fn test_primitive_array_alignment() { - let bytes = vec![0u8, 1u8]; - unsafe { RawPtrBox::::new(bytes.as_ptr().offset(1)) }; - } -} diff --git a/arrow/src/buffer/scalar.rs b/arrow/src/buffer/scalar.rs index 7d663cd2bf96..e9ee817d85eb 100644 --- a/arrow/src/buffer/scalar.rs +++ b/arrow/src/buffer/scalar.rs @@ -82,6 +82,22 @@ impl AsRef<[T]> for ScalarBuffer { } } +/// [`ScalarBuffer`] is `Send` if `Buffer: Send` and `T: Sync` +unsafe impl Send for ScalarBuffer +where + T: Sync, + Buffer: Send, +{ +} + +/// [`ScalarBuffer`] is `Sync` if `Buffer` and `T` are both Sync +unsafe impl Sync for ScalarBuffer +where + T: Sync, + Buffer: Sync, +{ +} + #[cfg(test)] mod tests { use super::*; From c5ba200224e1f0abb4b4e67faa299b38548b44e9 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 9 Jun 2022 11:11:29 +0100 Subject: [PATCH 2/2] Fix panic message tests --- arrow/src/array/array_binary.rs | 2 +- arrow/src/array/array_string.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index 4d34aa58d788..9ac51336982c 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -1419,7 +1419,7 @@ mod tests { #[test] #[should_panic( - expected = "BinaryArray index out of bounds access: the len is 3 but the index is 4" + expected = "BinaryArray index out of bounds: the len is 3 but the index is 4" )] fn test_binary_array_get_value_index_out_of_bound() { let values: [u8; 12] = diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs index 25e462ee7af0..59d17f02ddf8 100644 --- a/arrow/src/array/array_string.rs +++ b/arrow/src/array/array_string.rs @@ -457,7 +457,7 @@ mod tests { #[test] #[should_panic( - expected = "StringArray index out of bounds access: the len is 3 but the index is 4" + expected = "StringArray index out of bounds: the len is 3 but the index is 4" )] fn test_string_array_get_value_index_out_of_bound() { let values: [u8; 12] = [