Skip to content

Commit

Permalink
Provide into_builder for bytearray (#3326)
Browse files Browse the repository at this point in the history
* Provide into_builder for bytearray

* For review

* Remove slices_mut

* Modify test and remove values_slice_mut
  • Loading branch information
viirya authored Dec 31, 2022
1 parent 808a982 commit ec43d6f
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 3 deletions.
86 changes: 86 additions & 0 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

use crate::array::{empty_offsets, print_long_array};
use crate::builder::GenericByteBuilder;
use crate::iterator::ArrayIter;
use crate::raw_pointer::RawPtrBox;
use crate::types::bytes::ByteArrayNativeType;
Expand Down Expand Up @@ -139,6 +140,91 @@ impl<T: ByteArrayType> GenericByteArray<T> {
pub fn iter(&self) -> ArrayIter<&Self> {
ArrayIter::new(self)
}

/// Returns `GenericByteBuilder` of this byte array for mutating its values if the underlying
/// offset and data buffers are not shared by others.
pub fn into_builder(self) -> Result<GenericByteBuilder<T>, Self> {
let len = self.len();
let null_bit_buffer = self
.data
.null_buffer()
.map(|b| b.bit_slice(self.data.offset(), len));

let element_len = std::mem::size_of::<T::Offset>();
let offset_buffer = self.data.buffers()[0]
.slice_with_length(self.data.offset() * element_len, (len + 1) * element_len);

let element_len = std::mem::size_of::<u8>();
let value_len =
T::Offset::as_usize(self.value_offsets()[len] - self.value_offsets()[0]);
let value_buffer = self.data.buffers()[1]
.slice_with_length(self.data.offset() * element_len, value_len * element_len);

drop(self.data);

let try_mutable_null_buffer = match null_bit_buffer {
None => Ok(None),
Some(null_buffer) => {
// Null buffer exists, tries to make it mutable
null_buffer.into_mutable().map(Some)
}
};

let try_mutable_buffers = match try_mutable_null_buffer {
Ok(mutable_null_buffer) => {
// Got mutable null buffer, tries to get mutable value buffer
let try_mutable_offset_buffer = offset_buffer.into_mutable();
let try_mutable_value_buffer = value_buffer.into_mutable();

// try_mutable_offset_buffer.map(...).map_err(...) doesn't work as the compiler complains
// mutable_null_buffer is moved into map closure.
match (try_mutable_offset_buffer, try_mutable_value_buffer) {
(Ok(mutable_offset_buffer), Ok(mutable_value_buffer)) => unsafe {
Ok(GenericByteBuilder::<T>::new_from_buffer(
mutable_offset_buffer,
mutable_value_buffer,
mutable_null_buffer,
))
},
(Ok(mutable_offset_buffer), Err(value_buffer)) => Err((
mutable_offset_buffer.into(),
value_buffer,
mutable_null_buffer.map(|b| b.into()),
)),
(Err(offset_buffer), Ok(mutable_value_buffer)) => Err((
offset_buffer,
mutable_value_buffer.into(),
mutable_null_buffer.map(|b| b.into()),
)),
(Err(offset_buffer), Err(value_buffer)) => Err((
offset_buffer,
value_buffer,
mutable_null_buffer.map(|b| b.into()),
)),
}
}
Err(mutable_null_buffer) => {
// Unable to get mutable null buffer
Err((offset_buffer, value_buffer, Some(mutable_null_buffer)))
}
};

match try_mutable_buffers {
Ok(builder) => Ok(builder),
Err((offset_buffer, value_buffer, null_bit_buffer)) => {
let builder = ArrayData::builder(T::DATA_TYPE)
.len(len)
.add_buffer(offset_buffer)
.add_buffer(value_buffer)
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { builder.build_unchecked() };
let array = GenericByteArray::<T>::from(array_data);

Err(array)
}
}
}
}

impl<T: ByteArrayType> std::fmt::Debug for GenericByteArray<T> {
Expand Down
25 changes: 25 additions & 0 deletions arrow-array/src/array/string_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -697,4 +697,29 @@ mod tests {
assert_eq!(string.len(), 0);
assert_eq!(string.value_offsets(), &[0]);
}

#[test]
fn test_into_builder() {
let array: StringArray = vec!["hello", "arrow"].into();

// Append values
let mut builder = array.into_builder().unwrap();

builder.append_value("rust");

let expected: StringArray = vec!["hello", "arrow", "rust"].into();
let array = builder.finish();
assert_eq!(expected, array);
}

#[test]
fn test_into_builder_err() {
let array: StringArray = vec!["hello", "arrow"].into();

// Clone it, so we cannot get a mutable builder back
let shared_array = array.clone();

let err_return = array.into_builder().unwrap_err();
assert_eq!(&err_return, &shared_array);
}
}
40 changes: 39 additions & 1 deletion arrow-array/src/builder/generic_bytes_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
use crate::{ArrayRef, GenericByteArray, OffsetSizeTrait};
use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
use arrow_data::ArrayDataBuilder;
use std::any::Any;
use std::sync::Arc;
Expand Down Expand Up @@ -53,6 +53,34 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
}
}

/// Creates a new [`GenericByteBuilder`] from buffers.
///
/// # Safety
/// This doesn't verify buffer contents as it assumes the buffers are from existing and
/// valid [`GenericByteArray`].
pub unsafe fn new_from_buffer(
offsets_buffer: MutableBuffer,
value_buffer: MutableBuffer,
null_buffer: Option<MutableBuffer>,
) -> Self {
let offsets_builder = BufferBuilder::<T::Offset>::new_from_buffer(offsets_buffer);
let value_builder = BufferBuilder::<u8>::new_from_buffer(value_buffer);

let null_buffer_builder = null_buffer
.map(|buffer| {
NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1)
})
.unwrap_or_else(|| {
NullBufferBuilder::new_with_len(offsets_builder.len() - 1)
});

Self {
offsets_builder,
value_builder,
null_buffer_builder,
}
}

/// Appends a value into the builder.
#[inline]
pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
Expand Down Expand Up @@ -122,6 +150,16 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
pub fn offsets_slice(&self) -> &[T::Offset] {
self.offsets_builder.as_slice()
}

/// Returns the current null buffer as a slice
pub fn validity_slice(&self) -> Option<&[u8]> {
self.null_buffer_builder.as_slice()
}

/// Returns the current null buffer as a mutable slice
pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
self.null_buffer_builder.as_slice_mut()
}
}

impl<T: ByteArrayType> std::fmt::Debug for GenericByteBuilder<T> {
Expand Down
4 changes: 2 additions & 2 deletions arrow-array/src/builder/primitive_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,12 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
self.values_builder.as_slice_mut()
}

/// Returns the current values buffer as a slice
/// Returns the current null buffer as a slice
pub fn validity_slice(&self) -> Option<&[u8]> {
self.null_buffer_builder.as_slice()
}

/// Returns the current values buffer as a mutable slice
/// Returns the current null buffer as a mutable slice
pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
self.null_buffer_builder.as_slice_mut()
}
Expand Down

0 comments on commit ec43d6f

Please sign in to comment.