Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Dispatch Parquet Primitive PLAIN decoding to faster kernels when possible #19611

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 99 additions & 1 deletion crates/polars-arrow/src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::sync::LazyLock;
use either::Either;
use polars_error::{polars_bail, PolarsResult};

use super::utils::{count_zeros, fmt, get_bit_unchecked, BitChunk, BitChunks, BitmapIter};
use super::utils::{self, count_zeros, fmt, get_bit_unchecked, BitChunk, BitChunks, BitmapIter};
use super::{chunk_iter_to_vec, intersects_with, num_intersections_with, IntoIter, MutableBitmap};
use crate::array::Splitable;
use crate::bitmap::aligned::AlignedBitmapSlice;
Expand Down Expand Up @@ -532,6 +532,104 @@ impl Bitmap {
pub fn num_edges(&self) -> usize {
super::bitmap_ops::num_edges(self)
}

/// Returns the number of zero bits from the start before a one bit is seen
pub fn leading_zeros(&self) -> usize {
utils::leading_zeros(&self.storage, self.offset, self.length)
}
/// Returns the number of one bits from the start before a zero bit is seen
pub fn leading_ones(&self) -> usize {
utils::leading_ones(&self.storage, self.offset, self.length)
}
/// Returns the number of zero bits from the back before a one bit is seen
pub fn trailing_zeros(&self) -> usize {
utils::trailing_zeros(&self.storage, self.offset, self.length)
}
/// Returns the number of one bits from the back before a zero bit is seen
pub fn trailing_ones(&mut self) -> usize {
utils::trailing_ones(&self.storage, self.offset, self.length)
}

/// Take all `0` bits at the start of the [`Bitmap`] before a `1` is seen, returning how many
/// bits were taken
pub fn take_leading_zeros(&mut self) -> usize {
if self
.lazy_unset_bits()
.is_some_and(|unset_bits| unset_bits == self.length)
{
let leading_zeros = self.length;
self.offset += self.length;
self.length = 0;
*self.unset_bit_count_cache.get_mut() = 0;
return leading_zeros;
}

let leading_zeros = self.leading_zeros();
self.offset += leading_zeros;
self.length -= leading_zeros;
if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
*self.unset_bit_count_cache.get_mut() -= leading_zeros as u64;
}
leading_zeros
}
/// Take all `1` bits at the start of the [`Bitmap`] before a `0` is seen, returning how many
/// bits were taken
pub fn take_leading_ones(&mut self) -> usize {
if self
.lazy_unset_bits()
.is_some_and(|unset_bits| unset_bits == 0)
{
let leading_ones = self.length;
self.offset += self.length;
self.length = 0;
*self.unset_bit_count_cache.get_mut() = 0;
return leading_ones;
}

let leading_ones = self.leading_ones();
self.offset += leading_ones;
self.length -= leading_ones;
// @NOTE: the unset_bit_count_cache remains unchanged
leading_ones
}
/// Take all `0` bits at the back of the [`Bitmap`] before a `1` is seen, returning how many
/// bits were taken
pub fn take_trailing_zeros(&mut self) -> usize {
if self
.lazy_unset_bits()
.is_some_and(|unset_bits| unset_bits == self.length)
{
let trailing_zeros = self.length;
self.length = 0;
*self.unset_bit_count_cache.get_mut() = 0;
return trailing_zeros;
}

let trailing_zeros = self.trailing_zeros();
self.length -= trailing_zeros;
if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
*self.unset_bit_count_cache.get_mut() -= trailing_zeros as u64;
}
trailing_zeros
}
/// Take all `1` bits at the back of the [`Bitmap`] before a `0` is seen, returning how many
/// bits were taken
pub fn take_trailing_ones(&mut self) -> usize {
if self
.lazy_unset_bits()
.is_some_and(|unset_bits| unset_bits == 0)
{
let trailing_ones = self.length;
self.length = 0;
*self.unset_bit_count_cache.get_mut() = 0;
return trailing_ones;
}

let trailing_ones = self.trailing_ones();
self.length -= trailing_ones;
// @NOTE: the unset_bit_count_cache remains unchanged
trailing_ones
}
}

impl<P: AsRef<[bool]>> From<P> for Bitmap {
Expand Down
210 changes: 210 additions & 0 deletions crates/polars-arrow/src/bitmap/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,213 @@ pub fn count_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
let ones_in_suffix = aligned.suffix().count_ones() as usize;
len - ones_in_prefix - ones_in_bulk - ones_in_suffix
}

/// Returns the number of zero bits before seeing a one bit in the slice offsetted by `offset` and
/// a length of `length`.
///
/// # Panics
/// This function panics iff `offset + len > 8 * slice.len()``.
pub fn leading_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}

assert!(8 * slice.len() >= offset + len);

let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let leading_zeros_in_prefix =
(aligned.prefix().trailing_zeros() as usize).min(aligned.prefix_bitlen());
if leading_zeros_in_prefix < aligned.prefix_bitlen() {
return leading_zeros_in_prefix;
}
if let Some(full_zero_bulk_words) = aligned.bulk_iter().position(|w| w != 0) {
return aligned.prefix_bitlen()
+ full_zero_bulk_words * 64
+ aligned.bulk()[full_zero_bulk_words].trailing_zeros() as usize;
}

aligned.prefix_bitlen()
+ aligned.bulk_bitlen()
+ (aligned.suffix().trailing_zeros() as usize).min(aligned.suffix_bitlen())
}

/// Returns the number of one bits before seeing a zero bit in the slice offsetted by `offset` and
/// a length of `length`.
///
/// # Panics
/// This function panics iff `offset + len > 8 * slice.len()``.
pub fn leading_ones(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}

assert!(8 * slice.len() >= offset + len);

let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let leading_ones_in_prefix = aligned.prefix().trailing_ones() as usize;
if leading_ones_in_prefix < aligned.prefix_bitlen() {
return leading_ones_in_prefix;
}
if let Some(full_one_bulk_words) = aligned.bulk_iter().position(|w| w != u64::MAX) {
return aligned.prefix_bitlen()
+ full_one_bulk_words * 64
+ aligned.bulk()[full_one_bulk_words].trailing_ones() as usize;
}

aligned.prefix_bitlen() + aligned.bulk_bitlen() + aligned.suffix().trailing_ones() as usize
}

/// Returns the number of zero bits before seeing a one bit in the slice offsetted by `offset` and
/// a length of `length`.
///
/// # Panics
/// This function panics iff `offset + len > 8 * slice.len()``.
pub fn trailing_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}

assert!(8 * slice.len() >= offset + len);

let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let trailing_zeros_in_suffix = ((aligned.suffix() << ((64 - aligned.suffix_bitlen()) % 64))
.leading_zeros() as usize)
.min(aligned.suffix_bitlen());
if trailing_zeros_in_suffix < aligned.suffix_bitlen() {
return trailing_zeros_in_suffix;
}
if let Some(full_zero_bulk_words) = aligned.bulk_iter().rev().position(|w| w != 0) {
return aligned.suffix_bitlen()
+ full_zero_bulk_words * 64
+ aligned.bulk()[aligned.bulk().len() - full_zero_bulk_words - 1].leading_zeros()
as usize;
}

let trailing_zeros_in_prefix = ((aligned.prefix() << ((64 - aligned.prefix_bitlen()) % 64))
.leading_zeros() as usize)
.min(aligned.prefix_bitlen());
aligned.suffix_bitlen() + aligned.bulk_bitlen() + trailing_zeros_in_prefix
}

/// Returns the number of one bits before seeing a zero bit in the slice offsetted by `offset` and
/// a length of `length`.
///
/// # Panics
/// This function panics iff `offset + len > 8 * slice.len()``.
pub fn trailing_ones(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}

assert!(8 * slice.len() >= offset + len);

let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let trailing_ones_in_suffix =
(aligned.suffix() << ((64 - aligned.suffix_bitlen()) % 64)).leading_ones() as usize;
if trailing_ones_in_suffix < aligned.suffix_bitlen() {
return trailing_ones_in_suffix;
}
if let Some(full_one_bulk_words) = aligned.bulk_iter().rev().position(|w| w != u64::MAX) {
return aligned.suffix_bitlen()
+ full_one_bulk_words * 64
+ aligned.bulk()[aligned.bulk().len() - full_one_bulk_words - 1].leading_ones()
as usize;
}

let trailing_ones_in_prefix =
(aligned.prefix() << ((64 - aligned.prefix_bitlen()) % 64)).leading_ones() as usize;
aligned.suffix_bitlen() + aligned.bulk_bitlen() + trailing_ones_in_prefix
}

#[cfg(test)]
mod tests {
use rand::Rng;

use super::*;
use crate::bitmap::Bitmap;

#[test]
fn leading_trailing() {
macro_rules! testcase {
($slice:expr, $offset:expr, $length:expr => lz=$lz:expr,lo=$lo:expr,tz=$tz:expr,to=$to:expr) => {
assert_eq!(
leading_zeros($slice, $offset, $length),
$lz,
"leading_zeros"
);
assert_eq!(leading_ones($slice, $offset, $length), $lo, "leading_ones");
assert_eq!(
trailing_zeros($slice, $offset, $length),
$tz,
"trailing_zeros"
);
assert_eq!(
trailing_ones($slice, $offset, $length),
$to,
"trailing_ones"
);
};
}

testcase!(&[], 0, 0 => lz=0,lo=0,tz=0,to=0);
testcase!(&[0], 0, 1 => lz=1,lo=0,tz=1,to=0);
testcase!(&[1], 0, 1 => lz=0,lo=1,tz=0,to=1);

testcase!(&[0b010], 0, 3 => lz=1,lo=0,tz=1,to=0);
testcase!(&[0b101], 0, 3 => lz=0,lo=1,tz=0,to=1);
testcase!(&[0b100], 0, 3 => lz=2,lo=0,tz=0,to=1);
testcase!(&[0b110], 0, 3 => lz=1,lo=0,tz=0,to=2);
testcase!(&[0b001], 0, 3 => lz=0,lo=1,tz=2,to=0);
testcase!(&[0b011], 0, 3 => lz=0,lo=2,tz=1,to=0);

testcase!(&[0b010], 1, 2 => lz=0,lo=1,tz=1,to=0);
testcase!(&[0b101], 1, 2 => lz=1,lo=0,tz=0,to=1);
testcase!(&[0b100], 1, 2 => lz=1,lo=0,tz=0,to=1);
testcase!(&[0b110], 1, 2 => lz=0,lo=2,tz=0,to=2);
testcase!(&[0b001], 1, 2 => lz=2,lo=0,tz=2,to=0);
testcase!(&[0b011], 1, 2 => lz=0,lo=1,tz=1,to=0);
}

#[ignore = "Fuzz test. Too slow"]
#[test]
fn leading_trailing_fuzz() {
let mut rng = rand::thread_rng();

const SIZE: usize = 1000;
const REPEATS: usize = 10_000;

let mut v = Vec::<bool>::with_capacity(SIZE);

for _ in 0..REPEATS {
v.clear();
let offset = rng.gen_range(0..SIZE);
let length = rng.gen_range(0..SIZE - offset);
let extra_padding = rng.gen_range(0..64);

let mut num_remaining = usize::min(SIZE, offset + length + extra_padding);
while num_remaining > 0 {
let chunk_size = rng.gen_range(1..=num_remaining);
v.extend(
rng.clone()
.sample_iter(rand::distributions::Slice::new(&[false, true]).unwrap())
.take(chunk_size),
);
num_remaining -= chunk_size;
}

let v_slice = &v[offset..offset + length];
let lz = v_slice.iter().take_while(|&v| !*v).count();
let lo = v_slice.iter().take_while(|&v| *v).count();
let tz = v_slice.iter().rev().take_while(|&v| !*v).count();
let to = v_slice.iter().rev().take_while(|&v| *v).count();

let bm = Bitmap::from_iter(v.iter().copied());
let (slice, _, _) = bm.as_slice();

assert_eq!(leading_zeros(slice, offset, length), lz);
assert_eq!(leading_ones(slice, offset, length), lo);
assert_eq!(trailing_zeros(slice, offset, length), tz);
assert_eq!(trailing_ones(slice, offset, length), to);
}
}
}
Loading