diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs index 8dd206bd2639..8c2616624c0c 100644 --- a/arrow-array/src/array/byte_array.rs +++ b/arrow-array/src/array/byte_array.rs @@ -21,7 +21,7 @@ use crate::raw_pointer::RawPtrBox; use crate::types::bytes::ByteArrayNativeType; use crate::types::ByteArrayType; use crate::{Array, ArrayAccessor, OffsetSizeTrait}; -use arrow_buffer::{ArrowNativeType, Buffer}; +use arrow_buffer::ArrowNativeType; use arrow_data::ArrayData; use arrow_schema::DataType; use std::any::Any; @@ -55,9 +55,9 @@ impl GenericByteArray { offsets[i + 1] - offsets[i] } - /// Returns a clone of the value data buffer - pub fn value_data(&self) -> Buffer { - self.data.buffers()[1].clone() + /// Returns the raw value data + pub fn value_data(&self) -> &[u8] { + self.data.buffers()[1].as_slice() } /// Returns the offset values in the offsets buffer diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 3bf97cf7ade4..343e845c96e1 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -3390,14 +3390,13 @@ where OffsetSizeFrom: OffsetSizeTrait + ToPrimitive, OffsetSizeTo: OffsetSizeTrait + NumCast + ArrowNativeType, { - let str_array = array - .as_any() - .downcast_ref::>() - .unwrap(); - let list_data = array.data(); - let str_values_buf = str_array.value_data(); - - let offsets = list_data.buffers()[0].typed_data::(); + let data = array.data(); + assert_eq!( + data.data_type(), + &GenericStringArray::::DATA_TYPE + ); + let str_values_buf = data.buffers()[1].clone(); + let offsets = data.buffers()[0].typed_data::(); let mut offset_builder = BufferBuilder::::new(offsets.len()); offsets @@ -3414,18 +3413,14 @@ where let offset_buffer = offset_builder.finish(); - let dtype = if matches!(std::mem::size_of::(), 8) { - DataType::LargeUtf8 - } else { - DataType::Utf8 - }; + let dtype = GenericStringArray::::DATA_TYPE; let builder = ArrayData::builder(dtype) .offset(array.offset()) .len(array.len()) .add_buffer(offset_buffer) .add_buffer(str_values_buf) - .null_bit_buffer(list_data.null_buffer().cloned()); + .null_bit_buffer(data.null_buffer().cloned()); let array_data = unsafe { builder.build_unchecked() }; diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs index 1c0a0925df74..a908ba9ab5d8 100644 --- a/arrow/src/compute/kernels/concat_elements.rs +++ b/arrow/src/compute/kernels/concat_elements.rs @@ -50,10 +50,8 @@ pub fn concat_elements_utf8( let left_offsets = left.value_offsets(); let right_offsets = right.value_offsets(); - let left_buffer = left.value_data(); - let right_buffer = right.value_data(); - let left_values = left_buffer.as_slice(); - let right_values = right_buffer.as_slice(); + let left_values = left.value_data(); + let right_values = right.value_data(); let mut output_values = BufferBuilder::::new( left_values.len() + right_values.len() @@ -115,16 +113,11 @@ pub fn concat_elements_utf8_many( size, )?; - let data_buffers = arrays + let data_values = arrays .iter() .map(|array| array.value_data()) .collect::>(); - let data_values = data_buffers - .iter() - .map(|buffer| buffer.as_slice()) - .collect::>(); - let mut offsets = arrays .iter() .map(|a| a.value_offsets().iter().peekable()) diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs index 76568ae0dac0..23cb2c19fddf 100644 --- a/arrow/src/compute/kernels/substring.rs +++ b/arrow/src/compute/kernels/substring.rs @@ -253,8 +253,7 @@ fn binary_substring( length: Option, ) -> Result { let offsets = array.value_offsets(); - let values = array.value_data(); - let data = values.as_slice(); + let data = array.value_data(); let zero = OffsetSize::zero(); // start and end offsets of all substrings @@ -364,8 +363,7 @@ fn utf8_substring( length: Option, ) -> Result { let offsets = array.value_offsets(); - let values = array.value_data(); - let data = values.as_slice(); + let data = array.value_data(); let zero = OffsetSize::zero(); // Check if `offset` is at a valid char boundary.