Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return slice from GenericByteArray::value_data #3171

Merged
merged 1 commit into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::raw_pointer::RawPtrBox;
use crate::types::bytes::ByteArrayNativeType;
use crate::types::ByteArrayType;
use crate::{Array, ArrayAccessor, OffsetSizeTrait};
use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_buffer::ArrowNativeType;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
Expand Down Expand Up @@ -55,9 +55,9 @@ impl<T: ByteArrayType> GenericByteArray<T> {
offsets[i + 1] - offsets[i]
}

/// Returns a clone of the value data buffer
pub fn value_data(&self) -> Buffer {
self.data.buffers()[1].clone()
/// Returns the raw value data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also double checked that it is possible to get the buffers (via ArrayData) directly if someone needs access. 👍

impl<T: ByteArrayType> From<GenericByteArray<T>> for ArrayData {
    fn from(array: GenericByteArray<T>) -> Self {
        array.data
    }
}

pub fn value_data(&self) -> &[u8] {
self.data.buffers()[1].as_slice()
}

/// Returns the offset values in the offsets buffer
Expand Down
23 changes: 9 additions & 14 deletions arrow-cast/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3390,14 +3390,13 @@ where
OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
OffsetSizeTo: OffsetSizeTrait + NumCast + ArrowNativeType,
{
let str_array = array
.as_any()
.downcast_ref::<GenericStringArray<OffsetSizeFrom>>()
.unwrap();
let list_data = array.data();
let str_values_buf = str_array.value_data();

let offsets = list_data.buffers()[0].typed_data::<OffsetSizeFrom>();
let data = array.data();
assert_eq!(
data.data_type(),
&GenericStringArray::<OffsetSizeFrom>::DATA_TYPE
);
let str_values_buf = data.buffers()[1].clone();
let offsets = data.buffers()[0].typed_data::<OffsetSizeFrom>();

let mut offset_builder = BufferBuilder::<OffsetSizeTo>::new(offsets.len());
offsets
Expand All @@ -3414,18 +3413,14 @@ where

let offset_buffer = offset_builder.finish();

let dtype = if matches!(std::mem::size_of::<OffsetSizeTo>(), 8) {
DataType::LargeUtf8
} else {
DataType::Utf8
};
let dtype = GenericStringArray::<OffsetSizeTo>::DATA_TYPE;

let builder = ArrayData::builder(dtype)
.offset(array.offset())
.len(array.len())
.add_buffer(offset_buffer)
.add_buffer(str_values_buf)
.null_bit_buffer(list_data.null_buffer().cloned());
.null_bit_buffer(data.null_buffer().cloned());

let array_data = unsafe { builder.build_unchecked() };

Expand Down
13 changes: 3 additions & 10 deletions arrow/src/compute/kernels/concat_elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,8 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
let left_offsets = left.value_offsets();
let right_offsets = right.value_offsets();

let left_buffer = left.value_data();
let right_buffer = right.value_data();
let left_values = left_buffer.as_slice();
let right_values = right_buffer.as_slice();
let left_values = left.value_data();
let right_values = right.value_data();

let mut output_values = BufferBuilder::<u8>::new(
left_values.len() + right_values.len()
Expand Down Expand Up @@ -115,16 +113,11 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
size,
)?;

let data_buffers = arrays
let data_values = arrays
.iter()
.map(|array| array.value_data())
.collect::<Vec<_>>();

let data_values = data_buffers
.iter()
.map(|buffer| buffer.as_slice())
.collect::<Vec<_>>();

let mut offsets = arrays
.iter()
.map(|a| a.value_offsets().iter().peekable())
Expand Down
6 changes: 2 additions & 4 deletions arrow/src/compute/kernels/substring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,7 @@ fn binary_substring<OffsetSize: OffsetSizeTrait>(
length: Option<OffsetSize>,
) -> Result<ArrayRef> {
let offsets = array.value_offsets();
let values = array.value_data();
let data = values.as_slice();
let data = array.value_data();
let zero = OffsetSize::zero();

// start and end offsets of all substrings
Expand Down Expand Up @@ -364,8 +363,7 @@ fn utf8_substring<OffsetSize: OffsetSizeTrait>(
length: Option<OffsetSize>,
) -> Result<ArrayRef> {
let offsets = array.value_offsets();
let values = array.value_data();
let data = values.as_slice();
let data = array.value_data();
let zero = OffsetSize::zero();

// Check if `offset` is at a valid char boundary.
Expand Down