From 1dc5efc68acb56e224e52feb3da79d8e8a381ee8 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Fri, 1 Nov 2024 14:37:20 +0100 Subject: [PATCH] refactor(rust): Remove MutableStructArray (#19587) --- crates/polars-arrow/src/array/mod.rs | 2 +- crates/polars-arrow/src/array/struct_/mod.rs | 2 - .../polars-arrow/src/array/struct_/mutable.rs | 240 ------------------ 3 files changed, 1 insertion(+), 243 deletions(-) delete mode 100644 crates/polars-arrow/src/array/struct_/mutable.rs diff --git a/crates/polars-arrow/src/array/mod.rs b/crates/polars-arrow/src/array/mod.rs index a2acd7164f6a..60d7cba0d53d 100644 --- a/crates/polars-arrow/src/array/mod.rs +++ b/crates/polars-arrow/src/array/mod.rs @@ -679,7 +679,7 @@ use polars_error::PolarsResult; pub use primitive::*; pub use static_array::{ParameterFreeDtypeStaticArray, StaticArray}; pub use static_array_collect::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype}; -pub use struct_::{MutableStructArray, StructArray}; +pub use struct_::StructArray; pub use union::UnionArray; pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter}; pub use values::ValueSize; diff --git a/crates/polars-arrow/src/array/struct_/mod.rs b/crates/polars-arrow/src/array/struct_/mod.rs index eeaac519bb0d..e64e49bb02f9 100644 --- a/crates/polars-arrow/src/array/struct_/mod.rs +++ b/crates/polars-arrow/src/array/struct_/mod.rs @@ -5,8 +5,6 @@ use crate::datatypes::{ArrowDataType, Field}; mod ffi; pub(super) mod fmt; mod iterator; -mod mutable; -pub use mutable::*; use polars_error::{polars_bail, polars_ensure, PolarsResult}; use crate::compute::utils::combine_validities_and; diff --git a/crates/polars-arrow/src/array/struct_/mutable.rs b/crates/polars-arrow/src/array/struct_/mutable.rs deleted file mode 100644 index e066d7b6aef2..000000000000 --- a/crates/polars-arrow/src/array/struct_/mutable.rs +++ /dev/null @@ -1,240 +0,0 @@ -use std::sync::Arc; - -use polars_error::{polars_bail, PolarsResult}; - -use super::StructArray; -use crate::array::{Array, MutableArray}; -use crate::bitmap::MutableBitmap; -use crate::datatypes::ArrowDataType; - -/// Converting a [`MutableStructArray`] into a [`StructArray`] is `O(1)`. -#[derive(Debug)] -pub struct MutableStructArray { - dtype: ArrowDataType, - length: usize, - values: Vec>, - validity: Option, -} - -fn check( - dtype: &ArrowDataType, - length: usize, - values: &[Box], - validity: Option, -) -> PolarsResult<()> { - let fields = StructArray::try_get_fields(dtype)?; - - if fields.len() != values.len() { - polars_bail!(ComputeError: "a StructArray must have a number of fields in its DataType equal to the number of child values") - } - - fields - .iter().map(|a| &a.dtype) - .zip(values.iter().map(|a| a.dtype())) - .enumerate() - .try_for_each(|(index, (dtype, child))| { - if dtype != child { - polars_bail!(ComputeError: "The children DataTypes of a StructArray must equal the children data types.\nHowever, the field {index} has data type {dtype:?} but the value has data type {child:?}") - } else { - Ok(()) - } - })?; - - values - .iter() - .map(|f| f.len()) - .enumerate() - .try_for_each(|(index, f_length)| { - if f_length != length { - polars_bail!(ComputeError: "The children must have the given number of values.\nHowever, the values at index {index} have a length of {f_length}, which is different from given length {length}.") - } else { - Ok(()) - } - })?; - - if validity.map_or(false, |validity| validity != length) { - polars_bail!(ComputeError: - "the validity length of a StructArray must match its number of elements", - ) - } - Ok(()) -} - -impl From for StructArray { - fn from(other: MutableStructArray) -> Self { - let validity = if other.validity.as_ref().map(|x| x.unset_bits()).unwrap_or(0) > 0 { - other.validity.map(|x| x.into()) - } else { - None - }; - - StructArray::new( - other.dtype, - other.length, - other.values.into_iter().map(|mut v| v.as_box()).collect(), - validity, - ) - } -} - -impl MutableStructArray { - /// Creates a new [`MutableStructArray`]. - pub fn new(dtype: ArrowDataType, length: usize, values: Vec>) -> Self { - Self::try_new(dtype, length, values, None).unwrap() - } - - /// Create a [`MutableStructArray`] out of low-end APIs. - /// # Errors - /// This function errors iff: - /// * `dtype` is not [`ArrowDataType::Struct`] - /// * The inner types of `dtype` are not equal to those of `values` - /// * `validity` is not `None` and its length is different from the `values`'s length - pub fn try_new( - dtype: ArrowDataType, - length: usize, - values: Vec>, - validity: Option, - ) -> PolarsResult { - check(&dtype, length, &values, validity.as_ref().map(|x| x.len()))?; - Ok(Self { - dtype, - length, - values, - validity, - }) - } - - /// Extract the low-end APIs from the [`MutableStructArray`]. - pub fn into_inner( - self, - ) -> ( - ArrowDataType, - usize, - Vec>, - Option, - ) { - (self.dtype, self.length, self.values, self.validity) - } - - /// The values - pub fn values(&self) -> &Vec> { - &self.values - } -} - -impl MutableStructArray { - /// Reserves `additional` entries. - pub fn reserve(&mut self, additional: usize) { - for v in &mut self.values { - v.reserve(additional); - } - if let Some(x) = self.validity.as_mut() { - x.reserve(additional) - } - } - - /// Call this once for each "row" of children you push. - pub fn push(&mut self, valid: bool) { - match &mut self.validity { - Some(validity) => validity.push(valid), - None => match valid { - true => (), - false => self.init_validity(), - }, - }; - self.length += 1; - } - - fn push_null(&mut self) { - for v in &mut self.values { - v.push_null(); - } - self.push(false); - } - - fn init_validity(&mut self) { - let mut validity = MutableBitmap::with_capacity(self.values.capacity()); - let len = self.len(); - if len > 0 { - validity.extend_constant(len, true); - validity.set(len - 1, false); - } - self.validity = Some(validity) - } - - /// Converts itself into an [`Array`]. - pub fn into_arc(self) -> Arc { - let a: StructArray = self.into(); - Arc::new(a) - } - - /// Shrinks the capacity of the [`MutableStructArray`] to fit its current length. - pub fn shrink_to_fit(&mut self) { - for v in &mut self.values { - v.shrink_to_fit(); - } - if let Some(validity) = self.validity.as_mut() { - validity.shrink_to_fit() - } - } -} - -impl MutableArray for MutableStructArray { - fn len(&self) -> usize { - self.length - } - - fn validity(&self) -> Option<&MutableBitmap> { - self.validity.as_ref() - } - - fn as_box(&mut self) -> Box { - StructArray::new( - self.dtype.clone(), - self.length, - std::mem::take(&mut self.values) - .into_iter() - .map(|mut v| v.as_box()) - .collect(), - std::mem::take(&mut self.validity).map(|x| x.into()), - ) - .boxed() - } - - fn as_arc(&mut self) -> Arc { - StructArray::new( - self.dtype.clone(), - self.length, - std::mem::take(&mut self.values) - .into_iter() - .map(|mut v| v.as_box()) - .collect(), - std::mem::take(&mut self.validity).map(|x| x.into()), - ) - .arced() - } - - fn dtype(&self) -> &ArrowDataType { - &self.dtype - } - - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn as_mut_any(&mut self) -> &mut dyn std::any::Any { - self - } - - fn push_null(&mut self) { - self.push_null() - } - - fn shrink_to_fit(&mut self) { - self.shrink_to_fit() - } - - fn reserve(&mut self, additional: usize) { - self.reserve(additional) - } -}