Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add eq benchmark for StringArray/StringViewArray #5924

Merged
merged 4 commits into from
Jun 21, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions arrow/benches/comparison_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#[macro_use]
extern crate criterion;
use arrow::util::test_util::seedable_rng;
use criterion::Criterion;

extern crate arrow;
Expand All @@ -27,6 +28,8 @@ use arrow::{array::*, datatypes::Float32Type, datatypes::Int32Type};
use arrow_buffer::IntervalMonthDayNano;
use arrow_string::like::*;
use arrow_string::regexp::regexp_is_match_utf8_scalar;
use rand::rngs::StdRng;
use rand::Rng;

const SIZE: usize = 65536;

Expand Down Expand Up @@ -55,6 +58,14 @@ fn bench_regexp_is_match_utf8_scalar(arr_a: &StringArray, value_b: &str) {
.unwrap();
}

fn make_string_array(size: usize, rng: &mut StdRng) -> impl Iterator<Item = Option<String>> + '_ {
(0..size).map(|_| {
let len = rng.gen_range(0..64);
let bytes = (0..len).map(|_| rng.gen_range(0..128)).collect();
Some(String::from_utf8(bytes).unwrap())
})
}

fn add_benchmark(c: &mut Criterion) {
let arr_a = create_primitive_array_with_seed::<Float32Type>(SIZE, 0.0, 42);
let arr_b = create_primitive_array_with_seed::<Float32Type>(SIZE, 0.0, 43);
Expand All @@ -63,6 +74,7 @@ fn add_benchmark(c: &mut Criterion) {
let arr_month_day_nano_b = create_month_day_nano_array_with_seed(SIZE, 0.0, 43);

let arr_string = create_string_array::<i32>(SIZE, 0.0);

let scalar = Float32Array::from(vec![1.0]);

c.bench_function("eq Float32", |b| b.iter(|| eq(&arr_a, &arr_b)));
Expand Down Expand Up @@ -138,6 +150,45 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| eq(&arr_month_day_nano_b, &scalar).unwrap())
});

let mut rng = seedable_rng();
let mut array_gen = make_string_array(1024 * 1024 * 8, &mut rng);
let string_left = StringArray::from_iter(array_gen);
let string_view_left = StringViewArray::from_iter(string_left.iter());

// reference to the same rng to make sure we generate **different** array data,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

// ow. the left and right will be identical
array_gen = make_string_array(1024 * 1024 * 8, &mut rng);
let string_right = StringArray::from_iter(array_gen);
let string_view_right = StringViewArray::from_iter(string_right.iter());

c.bench_function("eq scalar StringArray", |b| {
b.iter(|| {
eq(
&Scalar::new(StringArray::from_iter_values(["xxxx"])),
&string_left,
)
.unwrap()
})
});

c.bench_function("eq scalar StringViewArray", |b| {
b.iter(|| {
eq(
&Scalar::new(StringViewArray::from_iter_values(["xxxx"])),
&string_view_left,
)
.unwrap()
})
});

c.bench_function("eq StringArray StringArray", |b| {
b.iter(|| eq(&string_left, &string_right).unwrap())
});

c.bench_function("eq StringViewArray StringViewArray", |b| {
b.iter(|| eq(&string_view_left, &string_view_right).unwrap())
});

c.bench_function("like_utf8 scalar equals", |b| {
b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx"))
});
Expand Down
Loading