Skip to content

Commit

Permalink
apacheGH-39740: [C++] Fix filter kernel for month_day_nano intervals
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou committed Jan 25, 2024
1 parent 2e8bd8d commit 9be84f9
Show file tree
Hide file tree
Showing 6 changed files with 336 additions and 141 deletions.
68 changes: 66 additions & 2 deletions cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,13 @@ struct TakeBenchmark {
Bench(values);
}

void FixedSizeBinary() {
const int32_t byte_width = static_cast<int32_t>(state.range(2));
auto values = rand.FixedSizeBinary(args.size, byte_width, args.null_proportion);
Bench(values);
state.counters["byte_width"] = byte_width;
}

void String() {
int32_t string_min_length = 0, string_max_length = 32;
auto values = std::static_pointer_cast<StringArray>(rand.String(
Expand All @@ -149,6 +156,7 @@ struct TakeBenchmark {
for (auto _ : state) {
ABORT_NOT_OK(Take(values, indices).status());
}
state.SetItemsProcessed(state.iterations() * values->length());
}
};

Expand All @@ -166,8 +174,7 @@ struct FilterBenchmark {

void Int64() {
const int64_t array_size = args.size / sizeof(int64_t);
auto values = std::static_pointer_cast<NumericArray<Int64Type>>(
rand.Int64(array_size, -100, 100, args.values_null_proportion));
auto values = rand.Int64(array_size, -100, 100, args.values_null_proportion);
Bench(values);
}

Expand All @@ -181,6 +188,14 @@ struct FilterBenchmark {
Bench(values);
}

void FixedSizeBinary() {
const int32_t byte_width = static_cast<int32_t>(state.range(2));
const int64_t array_size = args.size / byte_width;
auto values = rand.FixedSizeBinary(array_size, byte_width, args.values_null_proportion);
Bench(values);
state.counters["byte_width"] = byte_width;
}

void String() {
int32_t string_min_length = 0, string_max_length = 32;
int32_t string_mean_length = (string_max_length + string_min_length) / 2;
Expand All @@ -202,6 +217,7 @@ struct FilterBenchmark {
for (auto _ : state) {
ABORT_NOT_OK(Filter(values, filter).status());
}
state.SetItemsProcessed(state.iterations() * values->length());
}

void BenchRecordBatch() {
Expand Down Expand Up @@ -236,6 +252,7 @@ struct FilterBenchmark {
for (auto _ : state) {
ABORT_NOT_OK(Filter(batch, filter).status());
}
state.SetItemsProcessed(state.iterations() * num_rows);
}
};

Expand All @@ -255,6 +272,14 @@ static void FilterFSLInt64FilterWithNulls(benchmark::State& state) {
FilterBenchmark(state, true).FSLInt64();
}

static void FilterFixedSizeBinaryFilterNoNulls(benchmark::State& state) {
FilterBenchmark(state, false).FixedSizeBinary();
}

static void FilterFixedSizeBinaryFilterWithNulls(benchmark::State& state) {
FilterBenchmark(state, true).FixedSizeBinary();
}

static void FilterStringFilterNoNulls(benchmark::State& state) {
FilterBenchmark(state, false).String();
}
Expand Down Expand Up @@ -283,6 +308,18 @@ static void TakeInt64MonotonicIndices(benchmark::State& state) {
TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true).Int64();
}

static void TakeFixedSizeBinaryRandomIndicesNoNulls(benchmark::State& state) {
TakeBenchmark(state, false).FixedSizeBinary();
}

static void TakeFixedSizeBinaryRandomIndicesWithNulls(benchmark::State& state) {
TakeBenchmark(state, true).FixedSizeBinary();
}

static void TakeFixedSizeBinaryMonotonicIndices(benchmark::State& state) {
TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true).FixedSizeBinary();
}

static void TakeFSLInt64RandomIndicesNoNulls(benchmark::State& state) {
TakeBenchmark(state, false).FSLInt64();
}
Expand Down Expand Up @@ -315,8 +352,22 @@ void FilterSetArgs(benchmark::internal::Benchmark* bench) {
}
}

void FilterFSBSetArgs(benchmark::internal::Benchmark* bench) {
for (int64_t size : g_data_sizes) {
for (int i = 0; i < static_cast<int>(g_filter_params.size()); ++i) {
// FixedSizeBinary of primitive sizes (powers of two up to 32)
// have a faster path.
for (int32_t byte_width : {8, 9}) {
bench->Args({static_cast<ArgsType>(size), i, byte_width});
}
}
}
}

BENCHMARK(FilterInt64FilterNoNulls)->Apply(FilterSetArgs);
BENCHMARK(FilterInt64FilterWithNulls)->Apply(FilterSetArgs);
BENCHMARK(FilterFixedSizeBinaryFilterNoNulls)->Apply(FilterFSBSetArgs);
BENCHMARK(FilterFixedSizeBinaryFilterWithNulls)->Apply(FilterFSBSetArgs);
BENCHMARK(FilterFSLInt64FilterNoNulls)->Apply(FilterSetArgs);
BENCHMARK(FilterFSLInt64FilterWithNulls)->Apply(FilterSetArgs);
BENCHMARK(FilterStringFilterNoNulls)->Apply(FilterSetArgs);
Expand All @@ -340,9 +391,22 @@ void TakeSetArgs(benchmark::internal::Benchmark* bench) {
}
}

void TakeFSBSetArgs(benchmark::internal::Benchmark* bench) {
for (int64_t size : g_data_sizes) {
for (auto nulls : std::vector<ArgsType>({1000, 10, 2, 1, 0})) {
for (int32_t byte_width : {8, 9}) {
bench->Args({static_cast<ArgsType>(size), nulls, byte_width});
}
}
}
}

BENCHMARK(TakeInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
BENCHMARK(TakeInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
BENCHMARK(TakeInt64MonotonicIndices)->Apply(TakeSetArgs);
BENCHMARK(TakeFixedSizeBinaryRandomIndicesNoNulls)->Apply(TakeFSBSetArgs);
BENCHMARK(TakeFixedSizeBinaryRandomIndicesWithNulls)->Apply(TakeFSBSetArgs);
BENCHMARK(TakeFixedSizeBinaryMonotonicIndices)->Apply(TakeFSBSetArgs);
BENCHMARK(TakeFSLInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
BENCHMARK(TakeFSLInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
BENCHMARK(TakeFSLInt64MonotonicIndices)->Apply(TakeSetArgs);
Expand Down
Loading

0 comments on commit 9be84f9

Please sign in to comment.