Skip to content

Commit

Permalink
perf: Purge ChunkedArray Metadata (#20371)
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite authored Dec 20, 2024
1 parent ff00869 commit 5d2d550
Show file tree
Hide file tree
Showing 43 changed files with 294 additions and 1,521 deletions.
14 changes: 6 additions & 8 deletions crates/polars-core/src/chunked_array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use polars_compute::cast::CastOptionsImpl;
#[cfg(feature = "serde-lazy")]
use serde::{Deserialize, Serialize};

use crate::chunked_array::metadata::MetadataProperties;
use super::flags::StatisticsFlags;
#[cfg(feature = "timezones")]
use crate::chunked_array::temporal::validate_time_zone;
#[cfg(feature = "dtype-datetime")]
Expand Down Expand Up @@ -380,15 +380,14 @@ impl BinaryChunked {
pub unsafe fn to_string_unchecked(&self) -> StringChunked {
let chunks = self
.downcast_iter()
.map(|arr| arr.to_utf8view_unchecked().boxed())
.map(|arr| unsafe { arr.to_utf8view_unchecked() }.boxed())
.collect();
let field = Arc::new(Field::new(self.name().clone(), DataType::String));

let mut ca = StringChunked::new_with_compute_len(field, chunks);

use MetadataProperties as P;
ca.copy_metadata_cast(self, P::SORTED | P::FAST_EXPLODE_LIST);

use StatisticsFlags as F;
ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
ca
}
}
Expand All @@ -403,9 +402,8 @@ impl StringChunked {

let mut ca = BinaryChunked::new_with_compute_len(field, chunks);

use MetadataProperties as P;
ca.copy_metadata_cast(self, P::SORTED | P::FAST_EXPLODE_LIST);

use StatisticsFlags as F;
ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
ca
}
}
Expand Down
116 changes: 116 additions & 0 deletions crates/polars-core/src/chunked_array/flags.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
use std::sync::atomic::{AtomicU32, Ordering};

use crate::series::IsSorted;

/// An interior mutable version of [`StatisticsFlags`]
pub struct StatisticsFlagsIM {
inner: AtomicU32,
}

bitflags::bitflags! {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct StatisticsFlags: u32 {
const IS_SORTED_ANY = 0x03;

const IS_SORTED_ASC = 0x01;
const IS_SORTED_DSC = 0x02;
const CAN_FAST_EXPLODE_LIST = 0x04;
}
}

impl std::fmt::Debug for StatisticsFlagsIM {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("ChunkedArrayFlagsIM")
.field(&self.get())
.finish()
}
}

impl Clone for StatisticsFlagsIM {
fn clone(&self) -> Self {
Self::new(self.get())
}
}

impl PartialEq for StatisticsFlagsIM {
fn eq(&self, other: &Self) -> bool {
self.get() == other.get()
}
}
impl Eq for StatisticsFlagsIM {}

impl From<StatisticsFlags> for StatisticsFlagsIM {
fn from(value: StatisticsFlags) -> Self {
Self {
inner: AtomicU32::new(value.bits()),
}
}
}

impl StatisticsFlagsIM {
pub fn new(value: StatisticsFlags) -> Self {
Self {
inner: AtomicU32::new(value.bits()),
}
}

pub fn empty() -> Self {
Self::new(StatisticsFlags::empty())
}

pub fn get_mut(&mut self) -> StatisticsFlags {
StatisticsFlags::from_bits(*self.inner.get_mut()).unwrap()
}
pub fn set_mut(&mut self, value: StatisticsFlags) {
*self.inner.get_mut() = value.bits();
}

pub fn get(&self) -> StatisticsFlags {
StatisticsFlags::from_bits(self.inner.load(Ordering::Relaxed)).unwrap()
}
pub fn set(&self, value: StatisticsFlags) {
self.inner.store(value.bits(), Ordering::Relaxed);
}
}

impl StatisticsFlags {
pub fn is_sorted(&self) -> IsSorted {
let is_sorted_asc = self.contains(Self::IS_SORTED_ASC);
let is_sorted_dsc = self.contains(Self::IS_SORTED_DSC);

assert!(!is_sorted_asc || !is_sorted_dsc);

if is_sorted_asc {
IsSorted::Ascending
} else if is_sorted_dsc {
IsSorted::Descending
} else {
IsSorted::Not
}
}

pub fn set_sorted(&mut self, is_sorted: IsSorted) {
let is_sorted = match is_sorted {
IsSorted::Not => Self::empty(),
IsSorted::Ascending => Self::IS_SORTED_ASC,
IsSorted::Descending => Self::IS_SORTED_DSC,
};
self.remove(Self::IS_SORTED_ASC | Self::IS_SORTED_DSC);
self.insert(is_sorted);
}

pub fn is_sorted_any(&self) -> bool {
self.contains(Self::IS_SORTED_ASC) | self.contains(Self::IS_SORTED_DSC)
}
pub fn is_sorted_ascending(&self) -> bool {
self.contains(Self::IS_SORTED_ASC)
}
pub fn is_sorted_descending(&self) -> bool {
self.contains(Self::IS_SORTED_DSC)
}

pub fn can_fast_explode_list(&self) -> bool {
self.contains(Self::CAN_FAST_EXPLODE_LIST)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub use revmap::*;

use super::*;
use crate::chunked_array::cast::CastOptions;
use crate::chunked_array::metadata::MetadataFlags;
use crate::chunked_array::flags::StatisticsFlags;
use crate::prelude::*;
use crate::series::IsSorted;
use crate::using_string_cache;
Expand Down Expand Up @@ -174,15 +174,15 @@ impl CategoricalChunked {
}
}

pub(crate) fn get_flags(&self) -> MetadataFlags {
pub(crate) fn get_flags(&self) -> StatisticsFlags {
self.physical().get_flags()
}

/// Set flags for the Chunked Array
pub(crate) fn set_flags(&mut self, mut flags: MetadataFlags) {
pub(crate) fn set_flags(&mut self, mut flags: StatisticsFlags) {
// We should not set the sorted flag if we are sorting in lexical order
if self.uses_lexical_ordering() {
flags.set_sorted_flag(IsSorted::Not)
flags.set_sorted(IsSorted::Not)
}
self.physical_mut().set_flags(flags)
}
Expand Down
46 changes: 0 additions & 46 deletions crates/polars-core/src/chunked_array/metadata/collect.rs

This file was deleted.

122 changes: 0 additions & 122 deletions crates/polars-core/src/chunked_array/metadata/env.rs

This file was deleted.

23 changes: 0 additions & 23 deletions crates/polars-core/src/chunked_array/metadata/guard.rs

This file was deleted.

Loading

0 comments on commit 5d2d550

Please sign in to comment.