Skip to content

Commit

Permalink
Add Array::logical_null_count for inspecting number of null values
Browse files Browse the repository at this point in the history
Add counter-part of `Array::null_count`, but counting the logical null
values. This will be useful in DataFusion. Current alternative is to
compute null mask (via `Array::logical_nulls()`) and do counting on it.
Given this might be expensive and verbose, caller may naturally feel
steer towards `Array::null_count` which may or may not be applicable,
depending on the context.
  • Loading branch information
findepi committed Oct 21, 2024
1 parent 4d4c0c6 commit bf12f4e
Show file tree
Hide file tree
Showing 11 changed files with 51 additions and 3 deletions.
1 change: 1 addition & 0 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,7 @@ mod tests {
assert_eq!(array.values().data_type(), &DataType::Utf8);

assert_eq!(array.null_count(), 1);
assert_eq!(array.logical_null_count(), 1);

assert!(array.keys().is_valid(0));
assert!(array.keys().is_valid(1));
Expand Down
1 change: 1 addition & 0 deletions arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,7 @@ mod tests {
let array = FixedSizeBinaryArray::from(values);
assert_eq!(array.len(), 4);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 0);
assert_eq!(array.value(0), b"one");
assert_eq!(array.value(1), b"two");
assert_eq!(array.value(2), b"six");
Expand Down
27 changes: 26 additions & 1 deletion arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,27 @@ pub trait Array: std::fmt::Debug + Send + Sync {
self.nulls().map(|n| n.null_count()).unwrap_or_default()
}

/// Returns the total number of logical null values in this array.
///
/// Note: this method returns the logical null count, i.e. that encoded in
/// [`Array::logical_nulls`]. For representation-specific null buffer see [`Array::null_count`].
///
/// # Example:
///
/// ```
/// use arrow_array::{Array, Int32Array};
///
/// // Construct an array with values [1, NULL, NULL]
/// let array = Int32Array::from(vec![Some(1), None, None]);
///
/// assert_eq!(array.logical_null_count(), 2);
/// ```
fn logical_null_count(&self) -> usize {
self.logical_nulls()
.map(|n| n.null_count())
.unwrap_or_default()
}

/// Returns `false` if the array is guaranteed to not contain any logical nulls
///
/// In general this will be equivalent to `Array::null_count() != 0` but may differ in the
Expand All @@ -289,7 +310,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// even if the nulls present in [`DictionaryArray::values`] are not referenced by any key,
/// and therefore would not appear in [`Array::logical_nulls`].
fn is_nullable(&self) -> bool {
self.null_count() != 0
self.logical_null_count() != 0
}

/// Returns the total number of bytes of memory pointed to by this array.
Expand Down Expand Up @@ -959,11 +980,13 @@ mod tests {
let array = as_union_array(array.as_ref());
assert_eq!(array.len(), 4);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 0);

for i in 0..4 {
let a = array.value(i);
assert_eq!(a.len(), 1);
assert_eq!(a.null_count(), 1);
assert_eq!(a.logical_null_count(), 1);
assert!(a.is_null(0))
}

Expand All @@ -987,6 +1010,7 @@ mod tests {
array => {
assert_eq!(array.len(), 4);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 0);
assert_eq!(array.values().len(), 1);
assert_eq!(array.values().null_count(), 1);
assert_eq!(array.run_ends().len(), 4);
Expand All @@ -1012,6 +1036,7 @@ mod tests {

assert_eq!(array.len(), 6);
assert_eq!(array.null_count(), 6);
assert_eq!(array.logical_null_count(), 6);
array.iter().for_each(|x| assert!(x.is_none()));
}
}
Expand Down
7 changes: 5 additions & 2 deletions arrow-array/src/array/null_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use std::sync::Arc;
/// assert!(array.is_nullable());
/// assert_eq!(array.len(), 10);
/// assert_eq!(array.null_count(), 0);
/// assert_eq!(array.logical_null_count(), 10);
/// assert_eq!(array.logical_nulls().unwrap().null_count(), 10);
/// ```
#[derive(Clone)]
Expand Down Expand Up @@ -116,8 +117,8 @@ impl Array for NullArray {
(self.len != 0).then(|| NullBuffer::new_null(self.len))
}

fn is_nullable(&self) -> bool {
!self.is_empty()
fn logical_null_count(&self) -> usize {
self.len
}

fn get_buffer_memory_size(&self) -> usize {
Expand Down Expand Up @@ -172,6 +173,7 @@ mod tests {

assert_eq!(null_arr.len(), 32);
assert_eq!(null_arr.null_count(), 0);
assert_eq!(null_arr.logical_null_count(), 32);
assert_eq!(null_arr.logical_nulls().unwrap().null_count(), 32);
assert!(null_arr.is_valid(0));
assert!(null_arr.is_nullable());
Expand All @@ -184,6 +186,7 @@ mod tests {
let array2 = array1.slice(8, 16);
assert_eq!(array2.len(), 16);
assert_eq!(array2.null_count(), 0);
assert_eq!(array2.logical_null_count(), 16);
assert_eq!(array2.logical_nulls().unwrap().null_count(), 16);
assert!(array2.is_valid(0));
assert!(array2.is_nullable());
Expand Down
6 changes: 6 additions & 0 deletions arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,7 @@ mod tests {

assert_eq!(array.len(), 20);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 0);

assert_eq!(
"RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
Expand All @@ -799,6 +800,7 @@ mod tests {

assert_eq!(array.len(), 4);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 1);

let array: RunArray<Int16Type> = test.into_iter().collect();
assert_eq!(
Expand All @@ -814,6 +816,7 @@ mod tests {

assert_eq!(array.len(), 4);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 0);

let run_ends = array.run_ends();
assert_eq!(&[1, 2, 3, 4], run_ends.values());
Expand All @@ -826,6 +829,7 @@ mod tests {

assert_eq!(array.len(), 6);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 3);

let run_ends = array.run_ends();
assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
Expand All @@ -842,6 +846,7 @@ mod tests {

assert_eq!(array.len(), 3);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 3);

let run_ends = array.run_ends();
assert_eq!(3, run_ends.len());
Expand All @@ -862,6 +867,7 @@ mod tests {
assert_eq!(array.values().data_type(), &DataType::Utf8);

assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 1);
assert_eq!(array.len(), 4);
assert_eq!(array.values().null_count(), 1);

Expand Down
3 changes: 3 additions & 0 deletions arrow-array/src/builder/generic_byte_run_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ mod tests {

assert_eq!(array.len(), 11);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 2);

assert_eq!(array.run_ends().values(), &[3, 5, 7, 11]);

Expand Down Expand Up @@ -448,6 +449,7 @@ mod tests {

assert_eq!(array.len(), 5);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 1);

assert_eq!(array.run_ends().values(), &[1, 2, 4, 5]);

Expand All @@ -469,6 +471,7 @@ mod tests {

assert_eq!(array.len(), 8);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 1);

assert_eq!(array.run_ends().values(), &[1, 2, 4, 7, 8]);

Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/builder/generic_list_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,10 +545,12 @@ mod tests {
let array = builder.finish();
assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]);
assert_eq!(array.null_count(), 1);
assert_eq!(array.logical_null_count(), 1);
assert!(array.is_null(3));
let elements = array.values().as_primitive::<Int32Type>();
assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
assert_eq!(elements.null_count(), 1);
assert_eq!(elements.logical_null_count(), 1);
assert!(elements.is_null(3));
}

Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/builder/primitive_run_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ mod tests {
let array = builder.finish();

assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 1);
assert_eq!(array.len(), 6);

assert_eq!(array.run_ends().values(), &[3, 4, 6]);
Expand All @@ -302,6 +303,7 @@ mod tests {

assert_eq!(array.len(), 11);
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 0);
assert_eq!(array.run_ends().values(), &[1, 3, 5, 9, 10, 11]);
assert_eq!(
array.values().as_primitive::<Int16Type>().values(),
Expand Down
1 change: 1 addition & 0 deletions arrow-select/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -849,5 +849,6 @@ mod tests {
let dict_b = DictionaryArray::new(keys, Arc::new(values));
let array = concat(&[&dict_a, &dict_b]).unwrap();
assert_eq!(array.null_count(), 10);
assert_eq!(array.logical_null_count(), 10);
}
}
2 changes: 2 additions & 0 deletions arrow/src/util/data_gen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,7 @@ mod tests {
assert_eq!(batch.num_columns(), schema_ref.fields().len());
for array in batch.columns() {
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 0);
}
// Test that the list's child values are non-null
let b_array = batch.column(1);
Expand Down Expand Up @@ -710,6 +711,7 @@ mod tests {
assert_eq!(array.len(), 100);
// Map field is not null
assert_eq!(array.null_count(), 0);
assert_eq!(array.logical_null_count(), 0);
// Maps have multiple values like a list, so internal arrays are longer
assert!(array.as_map().keys().len() > array.len());
assert!(array.as_map().values().len() > array.len());
Expand Down
2 changes: 2 additions & 0 deletions parquet/src/arrow/array_reader/byte_array_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,7 @@ mod tests {

assert_eq!(array.len(), 8);
assert_eq!(array.null_count(), 8);
assert_eq!(array.logical_null_count(), 8);
}

for (encoding, page) in pages {
Expand All @@ -667,6 +668,7 @@ mod tests {

assert_eq!(array.len(), 8);
assert_eq!(array.null_count(), 8);
assert_eq!(array.logical_null_count(), 8);
}
}
}

0 comments on commit bf12f4e

Please sign in to comment.