Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add try_unary, binary, try_binary kernels ~90% faster #2666

Merged
merged 1 commit into from
Sep 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 49 additions & 94 deletions arrow/benches/arithmetic_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,107 +20,62 @@ extern crate criterion;
use criterion::Criterion;
use rand::Rng;

use std::sync::Arc;

extern crate arrow;

use arrow::datatypes::Float32Type;
use arrow::util::bench_util::*;
use arrow::{array::*, datatypes::Float32Type};
use arrow::{compute::kernels::arithmetic::*, util::test_util::seedable_rng};

fn create_array(size: usize, with_nulls: bool) -> ArrayRef {
let null_density = if with_nulls { 0.5 } else { 0.0 };
let array = create_primitive_array::<Float32Type>(size, null_density);
Arc::new(array)
}

fn bench_add(arr_a: &ArrayRef, arr_b: &ArrayRef) {
let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
criterion::black_box(add(arr_a, arr_b).unwrap());
}

fn bench_subtract(arr_a: &ArrayRef, arr_b: &ArrayRef) {
let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
criterion::black_box(subtract(arr_a, arr_b).unwrap());
}

fn bench_multiply(arr_a: &ArrayRef, arr_b: &ArrayRef) {
let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
criterion::black_box(multiply(arr_a, arr_b).unwrap());
}

fn bench_divide(arr_a: &ArrayRef, arr_b: &ArrayRef) {
let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
criterion::black_box(divide_checked(arr_a, arr_b).unwrap());
}

fn bench_divide_unchecked(arr_a: &ArrayRef, arr_b: &ArrayRef) {
let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
criterion::black_box(divide(arr_a, arr_b).unwrap());
}

fn bench_divide_scalar(array: &ArrayRef, divisor: f32) {
let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
criterion::black_box(divide_scalar(array, divisor).unwrap());
}

fn bench_modulo(arr_a: &ArrayRef, arr_b: &ArrayRef) {
let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
criterion::black_box(modulus(arr_a, arr_b).unwrap());
}

fn bench_modulo_scalar(array: &ArrayRef, divisor: f32) {
let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
criterion::black_box(modulus_scalar(array, divisor).unwrap());
}

fn add_benchmark(c: &mut Criterion) {
const BATCH_SIZE: usize = 64 * 1024;
let arr_a = create_array(BATCH_SIZE, false);
let arr_b = create_array(BATCH_SIZE, false);
let scalar = seedable_rng().gen();

c.bench_function("add", |b| b.iter(|| bench_add(&arr_a, &arr_b)));
c.bench_function("subtract", |b| b.iter(|| bench_subtract(&arr_a, &arr_b)));
c.bench_function("multiply", |b| b.iter(|| bench_multiply(&arr_a, &arr_b)));
c.bench_function("divide", |b| b.iter(|| bench_divide(&arr_a, &arr_b)));
c.bench_function("divide_unchecked", |b| {
b.iter(|| bench_divide_unchecked(&arr_a, &arr_b))
});
c.bench_function("divide_scalar", |b| {
b.iter(|| bench_divide_scalar(&arr_a, scalar))
});
c.bench_function("modulo", |b| b.iter(|| bench_modulo(&arr_a, &arr_b)));
c.bench_function("modulo_scalar", |b| {
b.iter(|| bench_modulo_scalar(&arr_a, scalar))
});

let arr_a_nulls = create_array(BATCH_SIZE, true);
let arr_b_nulls = create_array(BATCH_SIZE, true);
c.bench_function("add_nulls", |b| {
b.iter(|| bench_add(&arr_a_nulls, &arr_b_nulls))
});
c.bench_function("divide_nulls", |b| {
b.iter(|| bench_divide(&arr_a_nulls, &arr_b_nulls))
});
c.bench_function("divide_nulls_unchecked", |b| {
b.iter(|| bench_divide_unchecked(&arr_a_nulls, &arr_b_nulls))
});
c.bench_function("divide_scalar_nulls", |b| {
b.iter(|| bench_divide_scalar(&arr_a_nulls, scalar))
});
c.bench_function("modulo_nulls", |b| {
b.iter(|| bench_modulo(&arr_a_nulls, &arr_b_nulls))
});
c.bench_function("modulo_scalar_nulls", |b| {
b.iter(|| bench_modulo_scalar(&arr_a_nulls, scalar))
});
for null_density in [0., 0.1, 0.5, 0.9, 1.0] {
let arr_a = create_primitive_array::<Float32Type>(BATCH_SIZE, null_density);
let arr_b = create_primitive_array::<Float32Type>(BATCH_SIZE, null_density);
let scalar = seedable_rng().gen();

c.bench_function(&format!("add({})", null_density), |b| {
b.iter(|| criterion::black_box(add(&arr_a, &arr_b).unwrap()))
});
c.bench_function(&format!("add_checked({})", null_density), |b| {
b.iter(|| criterion::black_box(add_checked(&arr_a, &arr_b).unwrap()))
});
c.bench_function(&format!("add_scalar({})", null_density), |b| {
b.iter(|| criterion::black_box(add_scalar(&arr_a, scalar).unwrap()))
});
c.bench_function(&format!("subtract({})", null_density), |b| {
b.iter(|| criterion::black_box(subtract(&arr_a, &arr_b).unwrap()))
});
c.bench_function(&format!("subtract_checked({})", null_density), |b| {
b.iter(|| criterion::black_box(subtract_checked(&arr_a, &arr_b).unwrap()))
});
c.bench_function(&format!("subtract_scalar({})", null_density), |b| {
b.iter(|| criterion::black_box(subtract_scalar(&arr_a, scalar).unwrap()))
});
c.bench_function(&format!("multiply({})", null_density), |b| {
b.iter(|| criterion::black_box(multiply(&arr_a, &arr_b).unwrap()))
});
c.bench_function(&format!("multiply_checked({})", null_density), |b| {
b.iter(|| criterion::black_box(multiply_checked(&arr_a, &arr_b).unwrap()))
});
c.bench_function(&format!("multiply_scalar({})", null_density), |b| {
b.iter(|| criterion::black_box(multiply_scalar(&arr_a, scalar).unwrap()))
});
c.bench_function(&format!("divide({})", null_density), |b| {
b.iter(|| criterion::black_box(divide(&arr_a, &arr_b).unwrap()))
});
c.bench_function(&format!("divide_checked({})", null_density), |b| {
b.iter(|| criterion::black_box(divide_checked(&arr_a, &arr_b).unwrap()))
});
c.bench_function(&format!("divide_scalar({})", null_density), |b| {
b.iter(|| criterion::black_box(divide_scalar(&arr_a, scalar).unwrap()))
});
c.bench_function(&format!("modulo({})", null_density), |b| {
b.iter(|| criterion::black_box(modulus(&arr_a, &arr_b).unwrap()))
});
c.bench_function(&format!("modulo_scalar({})", null_density), |b| {
b.iter(|| criterion::black_box(modulus_scalar(&arr_a, scalar).unwrap()))
});
}
}

criterion_group!(benches, add_benchmark);
Expand Down
47 changes: 45 additions & 2 deletions arrow/src/array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,51 @@ use super::{
PrimitiveArray,
};

/// an iterator that returns Some(T) or None, that can be used on any [`ArrayAccessor`]
// Note: This implementation is based on std's [Vec]s' [IntoIter].
/// An iterator that returns Some(T) or None, that can be used on any [`ArrayAccessor`]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good explanation for this API. 👍

///
/// # Performance
///
/// [`ArrayIter`] provides an idiomatic way to iterate over an array, however, this
/// comes at the cost of performance. In particular the interleaved handling of
/// the null mask is often sub-optimal.
///
/// If performing an infallible operation, it is typically faster to perform the operation
/// on every index of the array, and handle the null mask separately. For [`PrimitiveArray`]
/// this functionality is provided by [`compute::unary`]
///
/// ```
/// # use arrow::array::PrimitiveArray;
/// # use arrow::compute::unary;
/// # use arrow::datatypes::Int32Type;
///
/// fn add(a: &PrimitiveArray<Int32Type>, b: i32) -> PrimitiveArray<Int32Type> {
/// unary(a, |a| a + b)
/// }
/// ```
///
/// If performing a fallible operation, it isn't possible to perform the operation independently
/// of the null mask, as this might result in a spurious failure on a null index. However,
/// there are more efficient ways to iterate over just the non-null indices, this functionality
/// is provided by [`compute::try_unary`]
///
/// ```
/// # use arrow::array::PrimitiveArray;
/// # use arrow::compute::try_unary;
/// # use arrow::datatypes::Int32Type;
/// # use arrow::error::{ArrowError, Result};
///
/// fn checked_add(a: &PrimitiveArray<Int32Type>, b: i32) -> Result<PrimitiveArray<Int32Type>> {
/// try_unary(a, |a| {
/// a.checked_add(b).ok_or_else(|| {
/// ArrowError::CastError(format!("overflow adding {} to {}", a, b))
/// })
/// })
/// }
/// ```
///
/// [`PrimitiveArray`]: [crate::array::PrimitiveArray]
/// [`compute::unary`]: [crate::compute::unary]
/// [`compute::try_unary`]: [crate::compute::try_unary]
#[derive(Debug)]
pub struct ArrayIter<T: ArrayAccessor> {
array: T,
Expand Down
Loading