Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Use Buffer<T> in ObjectSeries, fixes variety of offset bugs #18637

Merged
merged 2 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions crates/polars-core/src/chunked_array/from_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use std::borrow::{Borrow, Cow};

#[cfg(feature = "object")]
use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::bitmap::MutableBitmap;

use crate::chunked_array::builder::{get_list_builder, AnonymousOwnedListBuilder};
#[cfg(feature = "object")]
Expand Down Expand Up @@ -268,17 +268,7 @@ impl<T: PolarsObject> FromIterator<Option<T>> for ObjectChunked<T> {
})
.collect();

let null_bit_buffer: Option<Bitmap> = null_mask_builder.into();
let null_bitmap = null_bit_buffer;

let len = values.len();

let arr = Box::new(ObjectArray {
values: Arc::new(values),
null_bitmap,
offset: 0,
len,
});
let arr = Box::new(ObjectArray::from(values).with_validity(null_mask_builder.into()));
ChunkedArray::new_with_compute_len(
Arc::new(Field::new(PlSmallStr::EMPTY, get_object_type::<T>())),
vec![arr],
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/iterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ impl<T: PolarsObject> ObjectChunked<T> {
// we know that we only iterate over length == self.len()
unsafe {
self.downcast_iter()
.flat_map(|arr| arr.values().iter())
.flat_map(|arr| arr.values_iter())
.trust_my_length(self.len())
}
}
Expand Down
18 changes: 6 additions & 12 deletions crates/polars-core/src/chunked_array/object/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,8 @@ where
.unwrap_or(0) as IdxSize;

let arr = Box::new(ObjectArray {
values: Arc::new(self.values),
null_bitmap,
offset: 0,
len,
values: self.values.into(),
validity: null_bitmap,
});

self.field.dtype = get_object_type::<T>();
Expand Down Expand Up @@ -140,10 +138,8 @@ where
let field = Arc::new(Field::new(name, DataType::Object(T::type_name(), None)));
let len = v.len();
let arr = Box::new(ObjectArray {
values: Arc::new(v),
null_bitmap: None,
offset: 0,
len,
values: v.into(),
validity: None,
});

unsafe { ObjectChunked::new_with_dims(field, vec![arr], len as IdxSize, 0) }
Expand All @@ -154,10 +150,8 @@ where
let len = v.len();
let null_count = validity.unset_bits();
let arr = Box::new(ObjectArray {
values: Arc::new(v),
null_bitmap: Some(validity),
offset: 0,
len,
values: v.into(),
validity: Some(validity),
});

unsafe {
Expand Down
63 changes: 30 additions & 33 deletions crates/polars-core/src/chunked_array/object/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::hash::Hash;

use arrow::bitmap::utils::{BitmapIter, ZipValidity};
use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::buffer::Buffer;
use polars_utils::total_ord::TotalHash;

use crate::prelude::*;
Expand All @@ -22,10 +23,8 @@ pub struct ObjectArray<T>
where
T: PolarsObject,
{
pub(crate) values: Arc<Vec<T>>,
pub(crate) null_bitmap: Option<Bitmap>,
pub(crate) offset: usize,
pub(crate) len: usize,
values: Buffer<T>,
validity: Option<Bitmap>,
}

/// Trimmed down object safe polars object
Expand Down Expand Up @@ -80,23 +79,18 @@ impl<T> ObjectArray<T>
where
T: PolarsObject,
{
/// Get a reference to the underlying data
pub fn values(&self) -> &Arc<Vec<T>> {
&self.values
}

pub fn values_iter(&self) -> ObjectValueIter<'_, T> {
self.values.iter()
}

/// Returns an iterator of `Option<&T>` over every element of this array.
pub fn iter(&self) -> ZipValidity<&T, ObjectValueIter<'_, T>, BitmapIter> {
ZipValidity::new_with_validity(self.values_iter(), self.null_bitmap.as_ref())
ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
}

/// Get a value at a certain index location
pub fn value(&self, index: usize) -> &T {
&self.values[self.offset + index]
&self.values[index]
}

pub fn get(&self, index: usize) -> Option<&T> {
Expand All @@ -123,7 +117,7 @@ where
/// No bounds checks
#[inline]
pub unsafe fn is_valid_unchecked(&self, i: usize) -> bool {
if let Some(b) = &self.null_bitmap {
if let Some(b) = &self.validity {
b.get_bit_unchecked(i)
} else {
true
Expand Down Expand Up @@ -157,7 +151,7 @@ where
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity must be equal to the array's length")
}
self.null_bitmap = validity;
self.validity = validity;
}
}

Expand All @@ -182,14 +176,12 @@ where
}

unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
let len = std::cmp::min(self.len - offset, length);
self.null_bitmap = self
.null_bitmap
self.validity = self
.validity
.take()
.map(|bitmap| bitmap.sliced_unchecked(offset, length))
.filter(|bitmap| bitmap.unset_bits() > 0);
self.len = len;
self.offset = offset;
self.values.slice_unchecked(offset, length);
}

fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
Expand All @@ -203,11 +195,11 @@ where
}

fn len(&self) -> usize {
self.len
self.values.len()
}

fn validity(&self) -> Option<&Bitmap> {
self.null_bitmap.as_ref()
self.validity.as_ref()
}

fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Expand All @@ -223,7 +215,7 @@ where
}

fn null_count(&self) -> usize {
match &self.null_bitmap {
match &self.validity {
None => 0,
Some(validity) => validity.unset_bits(),
}
Expand All @@ -236,18 +228,16 @@ impl<T: PolarsObject> Splitable for ObjectArray<T> {
}

unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
let (left_values, right_values) = unsafe { self.values.split_at_unchecked(offset) };
let (left_validity, right_validity) = unsafe { self.validity.split_at_unchecked(offset) };
(
Self {
values: self.values.clone(),
null_bitmap: self.null_bitmap.clone(),
len: offset,
offset: self.offset,
values: left_values,
validity: left_validity,
},
Self {
values: self.values.clone(),
null_bitmap: self.null_bitmap.clone(),
len: self.len() - offset,
offset: self.offset + offset,
values: right_values,
validity: right_validity,
},
)
}
Expand Down Expand Up @@ -277,10 +267,8 @@ impl<T: PolarsObject> StaticArray for ObjectArray<T> {

fn full_null(length: usize, _dtype: ArrowDataType) -> Self {
ObjectArray {
values: Arc::new(vec![T::default(); length]),
null_bitmap: Some(Bitmap::new_with_value(false, length)),
offset: 0,
len: length,
values: vec![T::default(); length].into(),
validity: Some(Bitmap::new_with_value(false, length)),
}
}
}
Expand Down Expand Up @@ -328,3 +316,12 @@ where
}
}
}

impl<T: PolarsObject> From<Vec<T>> for ObjectArray<T> {
fn from(values: Vec<T>) -> Self {
Self {
values: values.into(),
validity: None,
}
}
}
13 changes: 1 addition & 12 deletions crates/polars-core/src/datatypes/static_array_collect.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
use std::sync::Arc;

use arrow::array::ArrayFromIter;
use arrow::bitmap::Bitmap;

use crate::chunked_array::object::{ObjectArray, PolarsObject};

Expand Down Expand Up @@ -41,14 +38,6 @@ impl<'a, T: PolarsObject> ArrayFromIter<Option<&'a T>> for ObjectArray<T> {
})
.collect::<Result<Vec<T>, E>>()?;

let null_bit_buffer: Option<Bitmap> = null_mask_builder.into();
let null_bitmap = null_bit_buffer;
let len = values.len();
Ok(ObjectArray {
values: Arc::new(values),
null_bitmap,
offset: 0,
len,
})
Ok(ObjectArray::from(values).with_validity(null_mask_builder.into()))
}
}