Skip to content

Commit

Permalink
AnonymousBuilder push null
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa committed Dec 25, 2023
1 parent 55774d2 commit 8411e1f
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 47 deletions.
2 changes: 1 addition & 1 deletion crates/polars-arrow/src/array/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ impl NullArray {
/// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Null`].
pub fn try_new(data_type: ArrowDataType, length: usize) -> PolarsResult<Self> {
if data_type.to_physical_type() != PhysicalType::Null {
polars_bail!(ComputeError: "NullArray can only be initialized with a DataType whose physical type is Boolean");
polars_bail!(ComputeError: "NullArray can only be initialized with a DataType whose physical type is Null");
}

Ok(Self { data_type, length })
Expand Down
33 changes: 30 additions & 3 deletions crates/polars-arrow/src/legacy/array/fixed_size_list.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use polars_error::PolarsResult;

use crate::array::{ArrayRef, FixedSizeListArray};
use crate::array::{ArrayRef, FixedSizeListArray, NullArray};
use crate::bitmap::MutableBitmap;
use crate::datatypes::ArrowDataType;
use crate::legacy::array::{convert_inner_type, is_nested_null};
use crate::legacy::kernels::concatenate::concatenate_owned_unchecked;

#[derive(Default)]
Expand Down Expand Up @@ -34,6 +35,8 @@ impl AnonymousBuilder {
}

pub fn push_null(&mut self) {
self.arrays
.push(NullArray::new(ArrowDataType::Null, self.width).boxed());
match &mut self.validity {
Some(validity) => validity.push(false),
None => self.init_validity(),
Expand All @@ -48,8 +51,32 @@ impl AnonymousBuilder {
}

pub fn finish(self, inner_dtype: Option<&ArrowDataType>) -> PolarsResult<FixedSizeListArray> {
let values = concatenate_owned_unchecked(&self.arrays)?;
let inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].data_type());
let mut inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].data_type());

if is_nested_null(inner_dtype) {
for arr in &self.arrays {
if !is_nested_null(arr.data_type()) {
inner_dtype = arr.data_type();
break;
}
}
};

// convert nested null arrays to the correct dtype.
let arrays = self
.arrays
.iter()
.map(|arr| {
if is_nested_null(arr.data_type()) {
convert_inner_type(&**arr, inner_dtype)
} else {
arr.to_boxed()
}
})
.collect::<Vec<_>>();

let values = concatenate_owned_unchecked(&arrays)?;

let data_type = FixedSizeListArray::default_datatype(inner_dtype.clone(), self.width);
Ok(FixedSizeListArray::new(
data_type,
Expand Down
44 changes: 2 additions & 42 deletions crates/polars-arrow/src/legacy/array/list.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use polars_error::PolarsResult;

use crate::array::{new_null_array, Array, ArrayRef, ListArray, NullArray, StructArray};
use crate::array::{new_null_array, Array, ArrayRef, ListArray, NullArray};
use crate::bitmap::MutableBitmap;
use crate::compute::concatenate;
use crate::datatypes::ArrowDataType;
use crate::legacy::array::is_nested_null;
use crate::legacy::kernels::concatenate::concatenate_owned_unchecked;
use crate::legacy::prelude::*;
use crate::offset::Offsets;
Expand Down Expand Up @@ -162,44 +163,3 @@ impl<'a> AnonymousBuilder<'a> {
))
}
}

fn is_nested_null(data_type: &ArrowDataType) -> bool {
match data_type {
ArrowDataType::Null => true,
ArrowDataType::LargeList(field) => is_nested_null(field.data_type()),
ArrowDataType::Struct(fields) => {
fields.iter().all(|field| is_nested_null(field.data_type()))
},
_ => false,
}
}

/// Cast null arrays to inner type and ensure that all offsets remain correct
pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {
match dtype {
ArrowDataType::LargeList(field) => {
let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();
let inner = array.values();
let new_values = convert_inner_type(inner.as_ref(), field.data_type());
let dtype = LargeListArray::default_datatype(new_values.data_type().clone());
LargeListArray::new(
dtype,
array.offsets().clone(),
new_values,
array.validity().cloned(),
)
.boxed()
},
ArrowDataType::Struct(fields) => {
let array = array.as_any().downcast_ref::<StructArray>().unwrap();
let inner = array.values();
let new_values = inner
.iter()
.zip(fields)
.map(|(arr, field)| convert_inner_type(arr.as_ref(), field.data_type()))
.collect::<Vec<_>>();
StructArray::new(dtype.clone(), new_values, array.validity().cloned()).boxed()
},
_ => new_null_array(dtype.clone(), array.len()),
}
}
57 changes: 56 additions & 1 deletion crates/polars-arrow/src/legacy/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use crate::array::{Array, BinaryArray, BooleanArray, ListArray, PrimitiveArray, Utf8Array};
use crate::array::{
new_null_array, Array, BinaryArray, BooleanArray, FixedSizeListArray, ListArray,
PrimitiveArray, StructArray, Utf8Array,
};
use crate::bitmap::MutableBitmap;
use crate::datatypes::ArrowDataType;
use crate::legacy::utils::CustomIterTools;
Expand All @@ -16,6 +19,8 @@ pub mod utf8;

pub use slice::*;

use crate::legacy::prelude::LargeListArray;

macro_rules! iter_to_values {
($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{
$iterator
Expand Down Expand Up @@ -206,3 +211,53 @@ pub trait PolarsArray: Array {
}

impl<A: Array + ?Sized> PolarsArray for A {}

fn is_nested_null(data_type: &ArrowDataType) -> bool {
match data_type {
ArrowDataType::Null => true,
ArrowDataType::LargeList(field) => is_nested_null(field.data_type()),
ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.data_type()),
ArrowDataType::Struct(fields) => {
fields.iter().all(|field| is_nested_null(field.data_type()))
},
_ => false,
}
}

/// Cast null arrays to inner type and ensure that all offsets remain correct
pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {
match dtype {
ArrowDataType::LargeList(field) => {
let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();
let inner = array.values();
let new_values = convert_inner_type(inner.as_ref(), field.data_type());
let dtype = LargeListArray::default_datatype(new_values.data_type().clone());
LargeListArray::new(
dtype,
array.offsets().clone(),
new_values,
array.validity().cloned(),
)
.boxed()
},
ArrowDataType::FixedSizeList(field, width) => {
let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
let inner = array.values();
let new_values = convert_inner_type(inner.as_ref(), field.data_type());
let dtype =
FixedSizeListArray::default_datatype(new_values.data_type().clone(), *width);
FixedSizeListArray::new(dtype, new_values, array.validity().cloned()).boxed()
},
ArrowDataType::Struct(fields) => {
let array = array.as_any().downcast_ref::<StructArray>().unwrap();
let inner = array.values();
let new_values = inner
.iter()
.zip(fields)
.map(|(arr, field)| convert_inner_type(arr.as_ref(), field.data_type()))
.collect::<Vec<_>>();
StructArray::new(dtype.clone(), new_values, array.validity().cloned()).boxed()
},
_ => new_null_array(dtype.clone(), array.len()),
}
}

0 comments on commit 8411e1f

Please sign in to comment.