From e9ace5e51cd46e70d9bff0dbd109fb9f0a7b5660 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 10 Jun 2022 21:54:15 +0100 Subject: [PATCH] Strongly typed UnionBuilder --- arrow/src/array/builder.rs | 150 +++++++++++------------------------- arrow/src/buffer/mutable.rs | 2 +- 2 files changed, 46 insertions(+), 106 deletions(-) diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs index 041b7a92c33f..4d96ec66ebb4 100644 --- a/arrow/src/array/builder.rs +++ b/arrow/src/array/builder.rs @@ -34,29 +34,6 @@ use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::util::bit_util; -/// Converts a `MutableBuffer` to a `BufferBuilder`. -/// -/// `slots` is the number of array slots currently represented in the `MutableBuffer`. -pub(crate) fn mutable_buffer_to_builder( - mutable_buffer: MutableBuffer, - slots: usize, -) -> BufferBuilder { - BufferBuilder:: { - buffer: mutable_buffer, - len: slots, - _marker: PhantomData, - } -} - -/// Converts a `BufferBuilder` into its underlying `MutableBuffer`. -/// -/// `From` is not implemented because associated type bounds are unstable. -pub(crate) fn builder_to_mutable_buffer( - builder: BufferBuilder, -) -> MutableBuffer { - builder.buffer -} - /// Builder for creating a [`Buffer`](crate::buffer::Buffer) object. /// /// A [`Buffer`](crate::buffer::Buffer) is the underlying data @@ -1909,101 +1886,65 @@ struct FieldData { /// The Arrow data type represented in the `values_buffer`, which is untyped data_type: DataType, /// A buffer containing the values for this field in raw bytes - values_buffer: Option, + values_buffer: Box, /// The number of array slots represented by the buffer slots: usize, /// A builder for the null bitmap bitmap_builder: BooleanBufferBuilder, } +/// A type-erased [`BufferBuilder`] used by [`FieldData`] +trait FieldDataValues: std::fmt::Debug { + fn as_mut_any(&mut self) -> &mut dyn Any; + + fn append_null(&mut self); + + fn finish(&mut self) -> Buffer; +} + +impl FieldDataValues for BufferBuilder { + fn as_mut_any(&mut self) -> &mut dyn Any { + self + } + + fn append_null(&mut self) { + self.advance(1) + } + + fn finish(&mut self) -> Buffer { + self.finish() + } +} + impl FieldData { /// Creates a new `FieldData`. - fn new(type_id: i8, data_type: DataType) -> Self { + fn new(type_id: i8, data_type: DataType) -> Self { Self { type_id, data_type, - values_buffer: Some(MutableBuffer::new(1)), slots: 0, + values_buffer: Box::new(BufferBuilder::::new(1)), bitmap_builder: BooleanBufferBuilder::new(1), } } /// Appends a single value to this `FieldData`'s `values_buffer`. - #[allow(clippy::unnecessary_wraps)] - fn append_to_values_buffer( - &mut self, - v: T::Native, - ) -> Result<()> { - let values_buffer = self - .values_buffer - .take() - .expect("Values buffer was never created"); - let mut builder: BufferBuilder = - mutable_buffer_to_builder(values_buffer, self.slots); - builder.append(v); - let mutable_buffer = builder_to_mutable_buffer(builder); - self.values_buffer = Some(mutable_buffer); + fn append_value(&mut self, v: T::Native) -> () { + self.values_buffer + .as_mut_any() + .downcast_mut::>() + .unwrap() + .append(v); - self.slots += 1; self.bitmap_builder.append(true); - Ok(()) + self.slots += 1; } /// Appends a null to this `FieldData`. - #[allow(clippy::unnecessary_wraps)] - fn append_null(&mut self) -> Result<()> { - let values_buffer = self - .values_buffer - .take() - .expect("Values buffer was never created"); - - let mut builder: BufferBuilder = - mutable_buffer_to_builder(values_buffer, self.slots); - - builder.advance(1); - let mutable_buffer = builder_to_mutable_buffer(builder); - self.values_buffer = Some(mutable_buffer); - self.slots += 1; + fn append_null(&mut self) { + self.values_buffer.append_null(); self.bitmap_builder.append(false); - Ok(()) - } - - /// Appends a null to this `FieldData` when the type is not known at compile time. - /// - /// As the main `append` method of `UnionBuilder` is generic, we need a way to append null - /// slots to the fields that are not being appended to in the case of sparse unions. This - /// method solves this problem by appending dynamically based on `DataType`. - /// - /// Note, this method does **not** update the length of the `UnionArray` (this is done by the - /// main append operation) and assumes that it is called from a method that is generic over `T` - /// where `T` satisfies the bound `ArrowPrimitiveType`. - fn append_null_dynamic(&mut self) -> Result<()> { - match self.data_type { - DataType::Null => unimplemented!(), - DataType::Int8 => self.append_null::()?, - DataType::Int16 => self.append_null::()?, - DataType::Int32 - | DataType::Date32 - | DataType::Time32(_) - | DataType::Interval(IntervalUnit::YearMonth) => { - self.append_null::()? - } - DataType::Int64 - | DataType::Timestamp(_, _) - | DataType::Date64 - | DataType::Time64(_) - | DataType::Interval(IntervalUnit::DayTime) - | DataType::Duration(_) => self.append_null::()?, - DataType::Interval(IntervalUnit::MonthDayNano) => self.append_null::()?, - DataType::UInt8 => self.append_null::()?, - DataType::UInt16 => self.append_null::()?, - DataType::UInt32 => self.append_null::()?, - DataType::UInt64 => self.append_null::()?, - DataType::Float32 => self.append_null::()?, - DataType::Float64 => self.append_null::()?, - _ => unreachable!("All cases of types that satisfy the trait bounds over T are covered above."), - }; - Ok(()) + self.slots += 1; } } @@ -2119,11 +2060,12 @@ impl UnionBuilder { data } None => match self.value_offset_builder { - Some(_) => FieldData::new(self.fields.len() as i8, T::DATA_TYPE), + Some(_) => FieldData::new::(self.fields.len() as i8, T::DATA_TYPE), None => { - let mut fd = FieldData::new(self.fields.len() as i8, T::DATA_TYPE); + let mut fd = + FieldData::new::(self.fields.len() as i8, T::DATA_TYPE); for _ in 0..self.len { - fd.append_null::()?; + fd.append_null(); } fd } @@ -2140,14 +2082,14 @@ impl UnionBuilder { None => { for (_, fd) in self.fields.iter_mut() { // Append to all bar the FieldData currently being appended to - fd.append_null_dynamic()?; + fd.append_null(); } } } match v { - Some(v) => field_data.append_to_values_buffer::(v)?, - None => field_data.append_null::()?, + Some(v) => field_data.append_value::(v), + None => field_data.append_null(), } self.fields.insert(type_name, field_data); @@ -2165,15 +2107,13 @@ impl UnionBuilder { FieldData { type_id, data_type, - values_buffer, + mut values_buffer, slots, mut bitmap_builder, }, ) in self.fields.into_iter() { - let buffer = values_buffer - .expect("The `values_buffer` should only ever be None inside the `append` method.") - .into(); + let buffer = values_buffer.finish(); let arr_data_builder = ArrayDataBuilder::new(data_type.clone()) .add_buffer(buffer) .len(slots) diff --git a/arrow/src/buffer/mutable.rs b/arrow/src/buffer/mutable.rs index 709973b4401b..5cbfb27fd589 100644 --- a/arrow/src/buffer/mutable.rs +++ b/arrow/src/buffer/mutable.rs @@ -273,7 +273,7 @@ impl MutableBuffer { Buffer::from_bytes(bytes) } - /// View this buffer asa slice of a specific type. + /// View this buffer as a slice of a specific type. /// /// # Safety ///