From 2147b6fe20b666a02d321f3f6f173f03ddd4984b Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 2 Dec 2022 16:57:02 +0000 Subject: [PATCH 1/7] Add BooleanArray::from_unary and BooleanArray::from_binary --- arrow-array/src/array/boolean_array.rs | 72 +++++- arrow-data/src/bit_mask.rs | 216 +++++++++++++++++ arrow/src/compute/kernels/arithmetic.rs | 4 +- arrow/src/compute/kernels/arity.rs | 2 +- arrow/src/compute/kernels/boolean.rs | 2 +- arrow/src/compute/kernels/comparison.rs | 53 +--- arrow/src/compute/kernels/concat_elements.rs | 2 +- arrow/src/compute/mod.rs | 2 - arrow/src/compute/util.rs | 243 ------------------- 9 files changed, 295 insertions(+), 301 deletions(-) delete mode 100644 arrow/src/compute/util.rs diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index e166f467a70c..71ae2c625533 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -20,8 +20,9 @@ use crate::iterator::BooleanIter; use crate::raw_pointer::RawPtrBox; use crate::{print_long_array, Array, ArrayAccessor}; use arrow_buffer::{bit_util, Buffer, MutableBuffer}; +use arrow_data::bit_mask::combine_option_bitmap; use arrow_data::ArrayData; -use arrow_schema::DataType; +use arrow_schema::{ArrowError, DataType}; use std::any::Any; /// Array of bools @@ -173,6 +174,75 @@ impl BooleanArray { ) -> impl Iterator> + 'a { indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) } + + /// Create a [`BooleanArray`] by applying evaluating the operation for + /// each element of the provided array + pub fn from_unary(left: T, op: F) -> Result + where + F: Fn(T::Item) -> bool, + { + let null_bit_buffer = left + .data() + .null_buffer() + .map(|b| b.bit_slice(left.offset(), left.len())); + + let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe { + // SAFETY: i in range 0..len + op(left.value_unchecked(i)) + }); + + let data = unsafe { + ArrayData::new_unchecked( + DataType::Boolean, + left.len(), + None, + null_bit_buffer, + 0, + vec![Buffer::from(buffer)], + vec![], + ) + }; + Ok(Self::from(data)) + } + + /// Create a [`BooleanArray`] by evaluating the binary operation for + /// each element of the provided arrays + pub fn from_binary( + left: T, + right: S, + op: F, + ) -> Result + where + F: Fn(T::Item, S::Item) -> bool, + { + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform binary operation on arrays of different length" + .to_string(), + )); + } + + let null_bit_buffer = + combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?; + + let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe { + // SAFETY: i in range 0..len + op(left.value_unchecked(i), right.value_unchecked(i)) + }); + + let data = unsafe { + ArrayData::new_unchecked( + DataType::Boolean, + left.len(), + None, + null_bit_buffer, + 0, + vec![Buffer::from(buffer)], + vec![], + ) + }; + Ok(Self::from(data)) + } } impl Array for BooleanArray { diff --git a/arrow-data/src/bit_mask.rs b/arrow-data/src/bit_mask.rs index 6a0a46038992..9b276a62b7a8 100644 --- a/arrow-data/src/bit_mask.rs +++ b/arrow-data/src/bit_mask.rs @@ -17,8 +17,12 @@ //! Utils for working with packed bit masks +use crate::ArrayData; use arrow_buffer::bit_chunk_iterator::BitChunks; use arrow_buffer::bit_util::{ceil, get_bit, set_bit}; +use arrow_buffer::buffer::buffer_bin_and; +use arrow_buffer::Buffer; +use arrow_schema::ArrowError; /// Sets all bits on `write_data` in the range `[offset_write..offset_write+len]` to be equal to the /// bits in `data` in the range `[offset_read..offset_read+len]` @@ -62,9 +66,48 @@ pub fn set_bits( null_count as usize } +/// Combines the null bitmaps of multiple arrays using a bitwise `and` operation. +/// +/// This function is useful when implementing operations on higher level arrays. +pub fn combine_option_bitmap( + arrays: &[&ArrayData], + len_in_bits: usize, +) -> Result, ArrowError> { + arrays + .iter() + .map(|array| (array.null_buffer().cloned(), array.offset())) + .reduce(|acc, buffer_and_offset| match (acc, buffer_and_offset) { + ((None, _), (None, _)) => (None, 0), + ((Some(buffer), offset), (None, _)) | ((None, _), (Some(buffer), offset)) => { + (Some(buffer), offset) + } + ((Some(buffer_left), offset_left), (Some(buffer_right), offset_right)) => ( + Some(buffer_bin_and( + &buffer_left, + offset_left, + &buffer_right, + offset_right, + len_in_bits, + )), + 0, + ), + }) + .map_or( + Err(ArrowError::ComputeError( + "Arrays must not be empty".to_string(), + )), + |(buffer, offset)| { + Ok(buffer.map(|buffer| buffer.bit_slice(offset, len_in_bits))) + }, + ) +} + #[cfg(test)] mod tests { use super::*; + use arrow_buffer::buffer::buffer_bin_or; + use arrow_schema::DataType; + use std::sync::Arc; #[test] fn test_set_bits_aligned() { @@ -187,4 +230,177 @@ mod tests { assert_eq!(destination, expected_data); assert_eq!(result, expected_null_count); } + + /// Compares the null bitmaps of two arrays using a bitwise `or` operation. + /// + /// This function is useful when implementing operations on higher level arrays. + pub(super) fn compare_option_bitmap( + left_data: &ArrayData, + right_data: &ArrayData, + len_in_bits: usize, + ) -> Result, ArrowError> { + let left_offset_in_bits = left_data.offset(); + let right_offset_in_bits = right_data.offset(); + + let left = left_data.null_buffer(); + let right = right_data.null_buffer(); + + match left { + None => match right { + None => Ok(None), + Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))), + }, + Some(l) => match right { + None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))), + + Some(r) => Ok(Some(buffer_bin_or( + l, + left_offset_in_bits, + r, + right_offset_in_bits, + len_in_bits, + ))), + }, + } + } + + fn make_data_with_null_bit_buffer( + len: usize, + offset: usize, + null_bit_buffer: Option, + ) -> Arc { + let buffer = Buffer::from(&vec![11; len + offset]); + + Arc::new( + ArrayData::try_new( + DataType::UInt8, + len, + null_bit_buffer, + offset, + vec![buffer], + vec![], + ) + .unwrap(), + ) + } + + #[test] + fn test_combine_option_bitmap() { + let none_bitmap = make_data_with_null_bit_buffer(8, 0, None); + let some_bitmap = + make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010]))); + let inverse_bitmap = + make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101]))); + let some_other_bitmap = + make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b11010111]))); + assert_eq!( + combine_option_bitmap(&[], 8).unwrap_err().to_string(), + "Compute error: Arrays must not be empty", + ); + assert_eq!( + Some(Buffer::from([0b01001010])), + combine_option_bitmap(&[&some_bitmap], 8).unwrap() + ); + assert_eq!( + None, + combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b01001010])), + combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b11010111])), + combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b01001010])), + combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b0])), + combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b01000010])), + combine_option_bitmap(&[&some_bitmap, &some_other_bitmap, &none_bitmap], 8,) + .unwrap() + ); + assert_eq!( + Some(Buffer::from([0b00001001])), + combine_option_bitmap( + &[ + &some_bitmap.slice(3, 5), + &inverse_bitmap.slice(2, 5), + &some_other_bitmap.slice(1, 5) + ], + 5, + ) + .unwrap() + ); + } + + #[test] + fn test_combine_option_bitmap_with_offsets() { + let none_bitmap = make_data_with_null_bit_buffer(8, 0, None); + let bitmap0 = + make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10101010]))); + let bitmap1 = + make_data_with_null_bit_buffer(8, 1, Some(Buffer::from([0b01010100, 0b1]))); + let bitmap2 = + make_data_with_null_bit_buffer(8, 2, Some(Buffer::from([0b10101000, 0b10]))); + assert_eq!( + Some(Buffer::from([0b10101010])), + combine_option_bitmap(&[&bitmap1], 8).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b10101010])), + combine_option_bitmap(&[&bitmap2], 8).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b10101010])), + combine_option_bitmap(&[&bitmap1, &none_bitmap], 8).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b10101010])), + combine_option_bitmap(&[&none_bitmap, &bitmap2], 8).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b10101010])), + combine_option_bitmap(&[&bitmap0, &bitmap1], 8).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b10101010])), + combine_option_bitmap(&[&bitmap1, &bitmap2], 8).unwrap() + ); + } + + #[test] + fn test_compare_option_bitmap() { + let none_bitmap = make_data_with_null_bit_buffer(8, 0, None); + let some_bitmap = + make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010]))); + let inverse_bitmap = + make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101]))); + assert_eq!( + None, + compare_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b01001010])), + compare_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b01001010])), + compare_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b01001010])), + compare_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap() + ); + assert_eq!( + Some(Buffer::from([0b11111111])), + compare_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap() + ); + } } diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index c57e27095c23..9119ae2e67c3 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -310,7 +310,7 @@ where } // Create the combined `Bitmap` - let null_bit_buffer = crate::compute::util::combine_option_bitmap( + let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap( &[left.data_ref(), right.data_ref()], left.len(), )?; @@ -652,7 +652,7 @@ where ))); } - let null_bit_buffer = crate::compute::util::combine_option_bitmap( + let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap( &[left.data_ref(), right.data_ref()], left.len(), )?; diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs index d0f18cf5866d..94d7d83a886d 100644 --- a/arrow/src/compute/kernels/arity.rs +++ b/arrow/src/compute/kernels/arity.rs @@ -22,12 +22,12 @@ use crate::array::{ PrimitiveArray, }; use crate::buffer::Buffer; -use crate::compute::util::combine_option_bitmap; use crate::datatypes::{ArrowNumericType, ArrowPrimitiveType}; use crate::downcast_dictionary_array; use crate::error::{ArrowError, Result}; use crate::util::bit_iterator::try_for_each_valid_idx; use arrow_buffer::MutableBuffer; +use arrow_data::bit_mask::combine_option_bitmap; use std::sync::Arc; #[inline] diff --git a/arrow/src/compute/kernels/boolean.rs b/arrow/src/compute/kernels/boolean.rs index 1b33fa19ea02..b263af10d8ba 100644 --- a/arrow/src/compute/kernels/boolean.rs +++ b/arrow/src/compute/kernels/boolean.rs @@ -29,10 +29,10 @@ use crate::buffer::{ bitwise_bin_op_helper, bitwise_quaternary_op_helper, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer, MutableBuffer, }; -use crate::compute::util::combine_option_bitmap; use crate::datatypes::DataType; use crate::error::{ArrowError, Result}; use crate::util::bit_util::ceil; +use arrow_data::bit_mask::combine_option_bitmap; /// Updates null buffer based on data buffer and null buffer of the operand at other side /// in boolean AND kernel with Kleene logic. In short, because for AND kernel, null AND false diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 33a24500aabd..6e74ba4f5b05 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -25,12 +25,12 @@ use crate::array::*; use crate::buffer::{buffer_unary_not, Buffer, MutableBuffer}; -use crate::compute::util::combine_option_bitmap; use crate::datatypes::*; #[allow(unused_imports)] use crate::downcast_dictionary_array; use crate::error::{ArrowError, Result}; use crate::util::bit_util; +use arrow_data::bit_mask::combine_option_bitmap; use arrow_select::take::take; use num::ToPrimitive; use regex::Regex; @@ -46,33 +46,7 @@ fn compare_op( where F: Fn(T::Item, S::Item) -> bool, { - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Cannot perform comparison operation on arrays of different length" - .to_string(), - )); - } - - let null_bit_buffer = - combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?; - - let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe { - // SAFETY: i in range 0..len - op(left.value_unchecked(i), right.value_unchecked(i)) - }); - - let data = unsafe { - ArrayData::new_unchecked( - DataType::Boolean, - left.len(), - None, - null_bit_buffer, - 0, - vec![Buffer::from(buffer)], - vec![], - ) - }; - Ok(BooleanArray::from(data)) + BooleanArray::from_binary(left, right, op) } /// Helper function to perform boolean lambda function on values from array accessor, this @@ -81,28 +55,7 @@ fn compare_op_scalar(left: T, op: F) -> Result bool, { - let null_bit_buffer = left - .data() - .null_buffer() - .map(|b| b.bit_slice(left.offset(), left.len())); - - let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe { - // SAFETY: i in range 0..len - op(left.value_unchecked(i)) - }); - - let data = unsafe { - ArrayData::new_unchecked( - DataType::Boolean, - left.len(), - None, - null_bit_buffer, - 0, - vec![Buffer::from(buffer)], - vec![], - ) - }; - Ok(BooleanArray::from(data)) + BooleanArray::from_unary(left, op) } /// Evaluate `op(left, right)` for [`PrimitiveArray`]s using a specified diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs index a908ba9ab5d8..7212cfa6064e 100644 --- a/arrow/src/compute/kernels/concat_elements.rs +++ b/arrow/src/compute/kernels/concat_elements.rs @@ -16,8 +16,8 @@ // under the License. use crate::array::*; -use crate::compute::util::combine_option_bitmap; use crate::error::{ArrowError, Result}; +use arrow_data::bit_mask::combine_option_bitmap; /// Returns the elementwise concatenation of a [`StringArray`]. /// diff --git a/arrow/src/compute/mod.rs b/arrow/src/compute/mod.rs index 28e5e6b520bc..c0b10afe48a6 100644 --- a/arrow/src/compute/mod.rs +++ b/arrow/src/compute/mod.rs @@ -19,8 +19,6 @@ pub mod kernels; -mod util; - pub use self::kernels::aggregate::*; pub use self::kernels::arithmetic::*; pub use self::kernels::arity::*; diff --git a/arrow/src/compute/util.rs b/arrow/src/compute/util.rs deleted file mode 100644 index 9ddc535017ff..000000000000 --- a/arrow/src/compute/util.rs +++ /dev/null @@ -1,243 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Common utilities for computation kernels. - -use crate::array::*; -use crate::buffer::{buffer_bin_and, Buffer}; -use crate::error::{ArrowError, Result}; - -/// Combines the null bitmaps of multiple arrays using a bitwise `and` operation. -/// -/// This function is useful when implementing operations on higher level arrays. -#[allow(clippy::unnecessary_wraps)] -pub(super) fn combine_option_bitmap( - arrays: &[&ArrayData], - len_in_bits: usize, -) -> Result> { - arrays - .iter() - .map(|array| (array.null_buffer().cloned(), array.offset())) - .reduce(|acc, buffer_and_offset| match (acc, buffer_and_offset) { - ((None, _), (None, _)) => (None, 0), - ((Some(buffer), offset), (None, _)) | ((None, _), (Some(buffer), offset)) => { - (Some(buffer), offset) - } - ((Some(buffer_left), offset_left), (Some(buffer_right), offset_right)) => ( - Some(buffer_bin_and( - &buffer_left, - offset_left, - &buffer_right, - offset_right, - len_in_bits, - )), - 0, - ), - }) - .map_or( - Err(ArrowError::ComputeError( - "Arrays must not be empty".to_string(), - )), - |(buffer, offset)| { - Ok(buffer.map(|buffer| buffer.bit_slice(offset, len_in_bits))) - }, - ) -} - -#[cfg(test)] -pub(super) mod tests { - use super::*; - - use std::sync::Arc; - - use crate::array::ArrayData; - use crate::buffer::buffer_bin_or; - use crate::datatypes::DataType; - - /// Compares the null bitmaps of two arrays using a bitwise `or` operation. - /// - /// This function is useful when implementing operations on higher level arrays. - pub(super) fn compare_option_bitmap( - left_data: &ArrayData, - right_data: &ArrayData, - len_in_bits: usize, - ) -> Result> { - let left_offset_in_bits = left_data.offset(); - let right_offset_in_bits = right_data.offset(); - - let left = left_data.null_buffer(); - let right = right_data.null_buffer(); - - match left { - None => match right { - None => Ok(None), - Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))), - }, - Some(l) => match right { - None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))), - - Some(r) => Ok(Some(buffer_bin_or( - l, - left_offset_in_bits, - r, - right_offset_in_bits, - len_in_bits, - ))), - }, - } - } - - fn make_data_with_null_bit_buffer( - len: usize, - offset: usize, - null_bit_buffer: Option, - ) -> Arc { - let buffer = Buffer::from(&vec![11; len + offset]); - - Arc::new( - ArrayData::try_new( - DataType::UInt8, - len, - null_bit_buffer, - offset, - vec![buffer], - vec![], - ) - .unwrap(), - ) - } - - #[test] - fn test_combine_option_bitmap() { - let none_bitmap = make_data_with_null_bit_buffer(8, 0, None); - let some_bitmap = - make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010]))); - let inverse_bitmap = - make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101]))); - let some_other_bitmap = - make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b11010111]))); - assert_eq!( - combine_option_bitmap(&[], 8).unwrap_err().to_string(), - "Compute error: Arrays must not be empty", - ); - assert_eq!( - Some(Buffer::from([0b01001010])), - combine_option_bitmap(&[&some_bitmap], 8).unwrap() - ); - assert_eq!( - None, - combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b01001010])), - combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b11010111])), - combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b01001010])), - combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b0])), - combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b01000010])), - combine_option_bitmap(&[&some_bitmap, &some_other_bitmap, &none_bitmap], 8,) - .unwrap() - ); - assert_eq!( - Some(Buffer::from([0b00001001])), - combine_option_bitmap( - &[ - &some_bitmap.slice(3, 5), - &inverse_bitmap.slice(2, 5), - &some_other_bitmap.slice(1, 5) - ], - 5, - ) - .unwrap() - ); - } - - #[test] - fn test_combine_option_bitmap_with_offsets() { - let none_bitmap = make_data_with_null_bit_buffer(8, 0, None); - let bitmap0 = - make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10101010]))); - let bitmap1 = - make_data_with_null_bit_buffer(8, 1, Some(Buffer::from([0b01010100, 0b1]))); - let bitmap2 = - make_data_with_null_bit_buffer(8, 2, Some(Buffer::from([0b10101000, 0b10]))); - assert_eq!( - Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&bitmap1], 8).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&bitmap2], 8).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&bitmap1, &none_bitmap], 8).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&none_bitmap, &bitmap2], 8).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&bitmap0, &bitmap1], 8).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&bitmap1, &bitmap2], 8).unwrap() - ); - } - - #[test] - fn test_compare_option_bitmap() { - let none_bitmap = make_data_with_null_bit_buffer(8, 0, None); - let some_bitmap = - make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010]))); - let inverse_bitmap = - make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101]))); - assert_eq!( - None, - compare_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b01001010])), - compare_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b01001010])), - compare_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b01001010])), - compare_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap() - ); - assert_eq!( - Some(Buffer::from([0b11111111])), - compare_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap() - ); - } -} From 5e9a7bc3381e457c41371c440f944cca57285273 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 2 Dec 2022 17:12:23 +0000 Subject: [PATCH 2/7] Add docs --- arrow-array/src/array/boolean_array.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 71ae2c625533..df4c9da7a14a 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -175,8 +175,16 @@ impl BooleanArray { indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) } - /// Create a [`BooleanArray`] by applying evaluating the operation for + /// Create a [`BooleanArray`] by evaluating the operation for /// each element of the provided array + /// + /// ``` + /// # use arrow_array::{BooleanArray, Int32Array}; + /// + /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]); + /// let r = BooleanArray::from_unary(&array, |x| x > 2).unwrap(); + /// assert_eq!(&r, &BooleanArray::from(vec![false, false, true, true, true])); + /// ``` pub fn from_unary(left: T, op: F) -> Result where F: Fn(T::Item) -> bool, @@ -207,6 +215,15 @@ impl BooleanArray { /// Create a [`BooleanArray`] by evaluating the binary operation for /// each element of the provided arrays + /// + /// ``` + /// # use arrow_array::{BooleanArray, Int32Array}; + /// + /// let a = Int32Array::from(vec![1, 2, 3, 4, 5]); + /// let b = Int32Array::from(vec![1, 2, 0, 2, 5]); + /// let r = BooleanArray::from_binary(&a, &b, |a, b| a == b).unwrap(); + /// assert_eq!(&r, &BooleanArray::from(vec![true, true, false, false, true])); + /// ``` pub fn from_binary( left: T, right: S, From 12ad98bc69e876189bf1332316c0703c7789d7d4 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 2 Dec 2022 17:29:09 +0000 Subject: [PATCH 3/7] Tweak signatures --- arrow-array/src/array/boolean_array.rs | 12 ++++++------ arrow/src/compute/kernels/comparison.rs | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index df4c9da7a14a..b98d43a82e6d 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -182,12 +182,12 @@ impl BooleanArray { /// # use arrow_array::{BooleanArray, Int32Array}; /// /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]); - /// let r = BooleanArray::from_unary(&array, |x| x > 2).unwrap(); + /// let r = BooleanArray::from_unary(&array, |x| x > 2); /// assert_eq!(&r, &BooleanArray::from(vec![false, false, true, true, true])); /// ``` - pub fn from_unary(left: T, op: F) -> Result + pub fn from_unary(left: T, mut op: F) -> Self where - F: Fn(T::Item) -> bool, + F: FnMut(T::Item) -> bool, { let null_bit_buffer = left .data() @@ -210,7 +210,7 @@ impl BooleanArray { vec![], ) }; - Ok(Self::from(data)) + Self::from(data) } /// Create a [`BooleanArray`] by evaluating the binary operation for @@ -227,10 +227,10 @@ impl BooleanArray { pub fn from_binary( left: T, right: S, - op: F, + mut op: F, ) -> Result where - F: Fn(T::Item, S::Item) -> bool, + F: FnMut(T::Item, S::Item) -> bool, { if left.len() != right.len() { return Err(ArrowError::ComputeError( diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 6e74ba4f5b05..d705d850f8ef 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -55,7 +55,7 @@ fn compare_op_scalar(left: T, op: F) -> Result bool, { - BooleanArray::from_unary(left, op) + Ok(BooleanArray::from_unary(left, op)) } /// Evaluate `op(left, right)` for [`PrimitiveArray`]s using a specified From d6f6755710858d126f19149fa4d22d5b64fc67ee Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 2 Dec 2022 18:07:04 +0000 Subject: [PATCH 4/7] Remove fallibility from combine_option_bitmap --- arrow-array/src/array/boolean_array.rs | 2 +- arrow-data/src/bit_mask.rs | 61 ++++++++------------ arrow/src/compute/kernels/arithmetic.rs | 4 +- arrow/src/compute/kernels/arity.rs | 8 +-- arrow/src/compute/kernels/boolean.rs | 2 +- arrow/src/compute/kernels/comparison.rs | 10 ++-- arrow/src/compute/kernels/concat_elements.rs | 4 +- 7 files changed, 38 insertions(+), 53 deletions(-) diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index b98d43a82e6d..5d1d5506f548 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -240,7 +240,7 @@ impl BooleanArray { } let null_bit_buffer = - combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?; + combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len()); let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe { // SAFETY: i in range 0..len diff --git a/arrow-data/src/bit_mask.rs b/arrow-data/src/bit_mask.rs index 9b276a62b7a8..5235b358a42e 100644 --- a/arrow-data/src/bit_mask.rs +++ b/arrow-data/src/bit_mask.rs @@ -22,7 +22,6 @@ use arrow_buffer::bit_chunk_iterator::BitChunks; use arrow_buffer::bit_util::{ceil, get_bit, set_bit}; use arrow_buffer::buffer::buffer_bin_and; use arrow_buffer::Buffer; -use arrow_schema::ArrowError; /// Sets all bits on `write_data` in the range `[offset_write..offset_write+len]` to be equal to the /// bits in `data` in the range `[offset_read..offset_read+len]` @@ -72,8 +71,8 @@ pub fn set_bits( pub fn combine_option_bitmap( arrays: &[&ArrayData], len_in_bits: usize, -) -> Result, ArrowError> { - arrays +) -> Option { + let (buffer, offset) = arrays .iter() .map(|array| (array.null_buffer().cloned(), array.offset())) .reduce(|acc, buffer_and_offset| match (acc, buffer_and_offset) { @@ -91,15 +90,9 @@ pub fn combine_option_bitmap( )), 0, ), - }) - .map_or( - Err(ArrowError::ComputeError( - "Arrays must not be empty".to_string(), - )), - |(buffer, offset)| { - Ok(buffer.map(|buffer| buffer.bit_slice(offset, len_in_bits))) - }, - ) + })?; + + Some(buffer?.bit_slice(offset, len_in_bits)) } #[cfg(test)] @@ -293,38 +286,34 @@ mod tests { make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101]))); let some_other_bitmap = make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b11010111]))); - assert_eq!( - combine_option_bitmap(&[], 8).unwrap_err().to_string(), - "Compute error: Arrays must not be empty", - ); + assert_eq!(None, combine_option_bitmap(&[], 8)); assert_eq!( Some(Buffer::from([0b01001010])), - combine_option_bitmap(&[&some_bitmap], 8).unwrap() + combine_option_bitmap(&[&some_bitmap], 8) ); assert_eq!( None, - combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8).unwrap() + combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8) ); assert_eq!( Some(Buffer::from([0b01001010])), - combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8).unwrap() + combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8) ); assert_eq!( Some(Buffer::from([0b11010111])), - combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8).unwrap() + combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8) ); assert_eq!( Some(Buffer::from([0b01001010])), - combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,).unwrap() + combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,) ); assert_eq!( Some(Buffer::from([0b0])), - combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,).unwrap() + combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,) ); assert_eq!( Some(Buffer::from([0b01000010])), combine_option_bitmap(&[&some_bitmap, &some_other_bitmap, &none_bitmap], 8,) - .unwrap() ); assert_eq!( Some(Buffer::from([0b00001001])), @@ -336,7 +325,6 @@ mod tests { ], 5, ) - .unwrap() ); } @@ -351,27 +339,27 @@ mod tests { make_data_with_null_bit_buffer(8, 2, Some(Buffer::from([0b10101000, 0b10]))); assert_eq!( Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&bitmap1], 8).unwrap() + combine_option_bitmap(&[&bitmap1], 8) ); assert_eq!( Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&bitmap2], 8).unwrap() + combine_option_bitmap(&[&bitmap2], 8) ); assert_eq!( Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&bitmap1, &none_bitmap], 8).unwrap() + combine_option_bitmap(&[&bitmap1, &none_bitmap], 8) ); assert_eq!( Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&none_bitmap, &bitmap2], 8).unwrap() + combine_option_bitmap(&[&none_bitmap, &bitmap2], 8) ); assert_eq!( Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&bitmap0, &bitmap1], 8).unwrap() + combine_option_bitmap(&[&bitmap0, &bitmap1], 8) ); assert_eq!( Some(Buffer::from([0b10101010])), - combine_option_bitmap(&[&bitmap1, &bitmap2], 8).unwrap() + combine_option_bitmap(&[&bitmap1, &bitmap2], 8) ); } @@ -382,25 +370,22 @@ mod tests { make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010]))); let inverse_bitmap = make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101]))); - assert_eq!( - None, - compare_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap() - ); + assert_eq!(None, compare_option_bitmap(&none_bitmap, &none_bitmap, 8)); assert_eq!( Some(Buffer::from([0b01001010])), - compare_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap() + compare_option_bitmap(&some_bitmap, &none_bitmap, 8) ); assert_eq!( Some(Buffer::from([0b01001010])), - compare_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap() + compare_option_bitmap(&none_bitmap, &some_bitmap, 8) ); assert_eq!( Some(Buffer::from([0b01001010])), - compare_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap() + compare_option_bitmap(&some_bitmap, &some_bitmap, 8) ); assert_eq!( Some(Buffer::from([0b11111111])), - compare_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap() + compare_option_bitmap(&some_bitmap, &inverse_bitmap, 8) ); } } diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 9119ae2e67c3..588df6cee183 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -313,7 +313,7 @@ where let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap( &[left.data_ref(), right.data_ref()], left.len(), - )?; + ); let lanes = T::lanes(); let buffer_size = left.len() * std::mem::size_of::(); @@ -655,7 +655,7 @@ where let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap( &[left.data_ref(), right.data_ref()], left.len(), - )?; + ); // Safety justification: Since the inputs are valid Arrow arrays, all values are // valid indexes into the dictionary (which is verified during construction) diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs index 94d7d83a886d..6207ab63935d 100644 --- a/arrow/src/compute/kernels/arity.rs +++ b/arrow/src/compute/kernels/arity.rs @@ -215,7 +215,7 @@ where return Ok(PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE))); } - let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap(); + let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len); let null_count = null_buffer .as_ref() .map(|x| len - x.count_set_bits_offset(0, len)) @@ -275,7 +275,7 @@ where let len = a.len(); - let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap(); + let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len); let null_count = null_buffer .as_ref() .map(|x| len - x.count_set_bits_offset(0, len)) @@ -333,7 +333,7 @@ where if a.null_count() == 0 && b.null_count() == 0 { try_binary_no_nulls(len, a, b, op) } else { - let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap(); + let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len); let null_count = null_buffer .as_ref() @@ -401,7 +401,7 @@ where if a.null_count() == 0 && b.null_count() == 0 { try_binary_no_nulls_mut(len, a, b, op) } else { - let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap(); + let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len); let null_count = null_buffer .as_ref() .map(|x| len - x.count_set_bits_offset(0, len)) diff --git a/arrow/src/compute/kernels/boolean.rs b/arrow/src/compute/kernels/boolean.rs index b263af10d8ba..aa42f3d20c03 100644 --- a/arrow/src/compute/kernels/boolean.rs +++ b/arrow/src/compute/kernels/boolean.rs @@ -108,7 +108,7 @@ pub(crate) fn build_null_buffer_for_and_or( len_in_bits: usize, ) -> Option { // `arrays` are not empty, so safely do `unwrap` directly. - combine_option_bitmap(&[left_data, right_data], len_in_bits).unwrap() + combine_option_bitmap(&[left_data, right_data], len_in_bits) } /// Updates null buffer based on data buffer and null buffer of the operand at other side diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index d705d850f8ef..d80bd8367bc7 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -111,7 +111,7 @@ where } let null_bit_buffer = - combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?; + combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len()); let mut result = BooleanBufferBuilder::new(left.len()); for i in 0..left.len() { @@ -1125,7 +1125,7 @@ pub fn regexp_is_match_utf8( )); } let null_bit_buffer = - combine_option_bitmap(&[array.data_ref(), regex_array.data_ref()], array.len())?; + combine_option_bitmap(&[array.data_ref(), regex_array.data_ref()], array.len()); let mut patterns: HashMap = HashMap::new(); let mut result = BooleanBufferBuilder::new(array.len()); @@ -2247,7 +2247,7 @@ where } let null_bit_buffer = - combine_option_bitmap(&[left.data_ref(), right.data_ref()], len)?; + combine_option_bitmap(&[left.data_ref(), right.data_ref()], len); // we process the data in chunks so that each iteration results in one u64 of comparison result bits const CHUNK_SIZE: usize = 64; @@ -3654,7 +3654,7 @@ where let num_bytes = bit_util::ceil(left_len, 8); let not_both_null_bit_buffer = - match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len)? { + match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len) { Some(buff) => buff, None => new_all_set_buffer(num_bytes), }; @@ -3711,7 +3711,7 @@ where let num_bytes = bit_util::ceil(left_len, 8); let not_both_null_bit_buffer = - match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len)? { + match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len) { Some(buff) => buff, None => new_all_set_buffer(num_bytes), }; diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs index 7212cfa6064e..25c8f60de3f6 100644 --- a/arrow/src/compute/kernels/concat_elements.rs +++ b/arrow/src/compute/kernels/concat_elements.rs @@ -45,7 +45,7 @@ pub fn concat_elements_utf8( ))); } - let output_bitmap = combine_option_bitmap(&[left.data(), right.data()], left.len())?; + let output_bitmap = combine_option_bitmap(&[left.data(), right.data()], left.len()); let left_offsets = left.value_offsets(); let right_offsets = right.value_offsets(); @@ -111,7 +111,7 @@ pub fn concat_elements_utf8_many( .collect::>() .as_slice(), size, - )?; + ); let data_values = arrays .iter() From a2d6809dbd109f979b30e3e6aa43814064258cc8 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 2 Dec 2022 18:10:26 +0000 Subject: [PATCH 5/7] Remove unused compare_option_bitmap --- arrow-data/src/bit_mask.rs | 59 -------------------------------------- 1 file changed, 59 deletions(-) diff --git a/arrow-data/src/bit_mask.rs b/arrow-data/src/bit_mask.rs index 5235b358a42e..a09382bd4bfd 100644 --- a/arrow-data/src/bit_mask.rs +++ b/arrow-data/src/bit_mask.rs @@ -224,39 +224,6 @@ mod tests { assert_eq!(result, expected_null_count); } - /// Compares the null bitmaps of two arrays using a bitwise `or` operation. - /// - /// This function is useful when implementing operations on higher level arrays. - pub(super) fn compare_option_bitmap( - left_data: &ArrayData, - right_data: &ArrayData, - len_in_bits: usize, - ) -> Result, ArrowError> { - let left_offset_in_bits = left_data.offset(); - let right_offset_in_bits = right_data.offset(); - - let left = left_data.null_buffer(); - let right = right_data.null_buffer(); - - match left { - None => match right { - None => Ok(None), - Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))), - }, - Some(l) => match right { - None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))), - - Some(r) => Ok(Some(buffer_bin_or( - l, - left_offset_in_bits, - r, - right_offset_in_bits, - len_in_bits, - ))), - }, - } - } - fn make_data_with_null_bit_buffer( len: usize, offset: usize, @@ -362,30 +329,4 @@ mod tests { combine_option_bitmap(&[&bitmap1, &bitmap2], 8) ); } - - #[test] - fn test_compare_option_bitmap() { - let none_bitmap = make_data_with_null_bit_buffer(8, 0, None); - let some_bitmap = - make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010]))); - let inverse_bitmap = - make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101]))); - assert_eq!(None, compare_option_bitmap(&none_bitmap, &none_bitmap, 8)); - assert_eq!( - Some(Buffer::from([0b01001010])), - compare_option_bitmap(&some_bitmap, &none_bitmap, 8) - ); - assert_eq!( - Some(Buffer::from([0b01001010])), - compare_option_bitmap(&none_bitmap, &some_bitmap, 8) - ); - assert_eq!( - Some(Buffer::from([0b01001010])), - compare_option_bitmap(&some_bitmap, &some_bitmap, 8) - ); - assert_eq!( - Some(Buffer::from([0b11111111])), - compare_option_bitmap(&some_bitmap, &inverse_bitmap, 8) - ); - } } From b99f7d763cf95aec7d3601f3a114a93a76328c7e Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 2 Dec 2022 18:12:50 +0000 Subject: [PATCH 6/7] Remove fallibility --- arrow-array/src/array/boolean_array.rs | 18 +++++++++--------- arrow-data/src/bit_mask.rs | 1 - arrow/src/compute/kernels/comparison.rs | 9 ++++++++- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 5d1d5506f548..202f0e658e21 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -22,7 +22,7 @@ use crate::{print_long_array, Array, ArrayAccessor}; use arrow_buffer::{bit_util, Buffer, MutableBuffer}; use arrow_data::bit_mask::combine_option_bitmap; use arrow_data::ArrayData; -use arrow_schema::{ArrowError, DataType}; +use arrow_schema::DataType; use std::any::Any; /// Array of bools @@ -224,20 +224,20 @@ impl BooleanArray { /// let r = BooleanArray::from_binary(&a, &b, |a, b| a == b).unwrap(); /// assert_eq!(&r, &BooleanArray::from(vec![true, true, false, false, true])); /// ``` + /// + /// # Panics + /// + /// This function panics if left and right are not the same length + /// pub fn from_binary( left: T, right: S, mut op: F, - ) -> Result + ) -> Self where F: FnMut(T::Item, S::Item) -> bool, { - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Cannot perform binary operation on arrays of different length" - .to_string(), - )); - } + assert_eq!(left.len(), right.len()); let null_bit_buffer = combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len()); @@ -258,7 +258,7 @@ impl BooleanArray { vec![], ) }; - Ok(Self::from(data)) + Self::from(data) } } diff --git a/arrow-data/src/bit_mask.rs b/arrow-data/src/bit_mask.rs index a09382bd4bfd..ed8e65257788 100644 --- a/arrow-data/src/bit_mask.rs +++ b/arrow-data/src/bit_mask.rs @@ -98,7 +98,6 @@ pub fn combine_option_bitmap( #[cfg(test)] mod tests { use super::*; - use arrow_buffer::buffer::buffer_bin_or; use arrow_schema::DataType; use std::sync::Arc; diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index d80bd8367bc7..b672410fec15 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -46,7 +46,14 @@ fn compare_op( where F: Fn(T::Item, S::Item) -> bool, { - BooleanArray::from_binary(left, right, op) + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform comparison operation on arrays of different length" + .to_string(), + )); + } + + Ok(BooleanArray::from_binary(left, right, op)) } /// Helper function to perform boolean lambda function on values from array accessor, this From 86c295c4c80a39d81188f56a7845b3d1784ad155 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 2 Dec 2022 18:19:41 +0000 Subject: [PATCH 7/7] Fix doc --- arrow-array/src/array/boolean_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 202f0e658e21..920fdabc2c71 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -221,7 +221,7 @@ impl BooleanArray { /// /// let a = Int32Array::from(vec![1, 2, 3, 4, 5]); /// let b = Int32Array::from(vec![1, 2, 0, 2, 5]); - /// let r = BooleanArray::from_binary(&a, &b, |a, b| a == b).unwrap(); + /// let r = BooleanArray::from_binary(&a, &b, |a, b| a == b); /// assert_eq!(&r, &BooleanArray::from(vec![true, true, false, false, true])); /// ``` ///