From 786bd67d9575e168b7b34a84df3932655bcba57d Mon Sep 17 00:00:00 2001 From: Chendi Xue Date: Wed, 18 Dec 2019 01:57:17 +0800 Subject: [PATCH] [C++] Only create tmp buffer once Signed-off-by: Chendi Xue --- .../compute/kernels/sort_arrays_to_indices.cc | 51 ++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/sort_arrays_to_indices.cc b/cpp/src/arrow/compute/kernels/sort_arrays_to_indices.cc index 2e11dddeead62..3ab25bd888475 100644 --- a/cpp/src/arrow/compute/kernels/sort_arrays_to_indices.cc +++ b/cpp/src/arrow/compute/kernels/sort_arrays_to_indices.cc @@ -103,20 +103,22 @@ class SortArraysToIndicesKernelImpl : public SortArraysToIndicesKernel { private: Comparator compare_; std::vector> typed_arrays_; + uint64_t merge_time = 0; - std::pair merge( - std::vector>::iterator - arrays_valid_range_begin, - std::vector>::iterator - arrays_valid_range_end) { + std::pair merge( + ArrayItemIndex* arrow_buffer, ArrayItemIndex* tmp_buffer, + std::vector>::iterator arrays_valid_range_begin, + std::vector>::iterator arrays_valid_range_end) { auto size = arrays_valid_range_end - arrays_valid_range_begin; - std::pair left; - std::pair right; + std::pair left; + std::pair right; if (size > 2) { auto half_size = size / 2; auto arrays_valid_range_middle = arrays_valid_range_begin + half_size; - left = merge(arrays_valid_range_begin, arrays_valid_range_middle); - right = merge(arrays_valid_range_middle, arrays_valid_range_end); + left = merge(arrow_buffer, tmp_buffer, arrays_valid_range_begin, + arrays_valid_range_middle); + right = merge(arrow_buffer, tmp_buffer, arrays_valid_range_middle, + arrays_valid_range_end); } else if (size == 2) { left = *arrays_valid_range_begin; right = *(arrays_valid_range_end - 1); @@ -127,20 +129,19 @@ class SortArraysToIndicesKernelImpl : public SortArraysToIndicesKernel { auto left_size = left.second - left.first; auto right_size = right.second - right.first; - ArrayItemIndex* left_tmp = new ArrayItemIndex[left_size]; - memcpy(left_tmp, left.first, left_size * sizeof(ArrayItemIndex)); - ArrayItemIndex* right_tmp = new ArrayItemIndex[right_size]; - memcpy(right_tmp, right.first, right_size * sizeof(ArrayItemIndex)); + memcpy(tmp_buffer + left.first, arrow_buffer + left.first, + left_size * sizeof(ArrayItemIndex)); + memcpy(tmp_buffer + right.first, arrow_buffer + right.first, + right_size * sizeof(ArrayItemIndex)); - std::set_union(left_tmp, left_tmp + left_size, right_tmp, right_tmp + right_size, - left.first, [this](ArrayItemIndex left, ArrayItemIndex right) { + std::set_union(tmp_buffer + left.first, tmp_buffer + left.second, + tmp_buffer + right.first, tmp_buffer + right.second, + arrow_buffer + left.first, + [this](ArrayItemIndex left, ArrayItemIndex right) { return typed_arrays_[left.array_id]->GetView(left.id) < typed_arrays_[right.array_id]->GetView(right.id); }); - delete[] left_tmp; - delete[] right_tmp; - assert((left.first + left_size + right_size) == right.second); return std::make_pair(left.first, right.second); } @@ -161,16 +162,17 @@ class SortArraysToIndicesKernelImpl : public SortArraysToIndicesKernel { ArrayItemIndex* indices_begin = reinterpret_cast(indices_buf->mutable_data()); ArrayItemIndex* indices_end = indices_begin + items_total; - std::vector> arrays_valid_range; + std::vector> arrays_valid_range; int64_t array_id = 0; int64_t null_count_total = 0; int64_t indices_i = 0; + uint64_t array_sort = 0; for (auto array : values) { auto typed_array = std::dynamic_pointer_cast(array); typed_arrays_.push_back(typed_array); - auto array_begin = indices_begin + indices_i; + auto array_begin = indices_i; for (int64_t i = 0; i < array->length(); i++) { if (!array->IsNull(i)) { (indices_begin + indices_i)->array_id = array_id; @@ -183,8 +185,8 @@ class SortArraysToIndicesKernelImpl : public SortArraysToIndicesKernel { } } // first round sort - auto array_end = indices_begin + indices_i; - std::stable_sort(array_begin, array_end, + auto array_end = indices_i; + std::stable_sort(indices_begin + array_begin, indices_begin + array_end, [typed_array, this](ArrayItemIndex left, ArrayItemIndex right) { return typed_array->GetView(left.id) < typed_array->GetView(right.id); @@ -194,7 +196,10 @@ class SortArraysToIndicesKernelImpl : public SortArraysToIndicesKernel { } // merge sort - merge(arrays_valid_range.begin(), arrays_valid_range.end()); + ArrayItemIndex* tmp_buffer_begin = new ArrayItemIndex[indices_i](); + merge(indices_begin, tmp_buffer_begin, arrays_valid_range.begin(), + arrays_valid_range.end()); + delete[] tmp_buffer_begin; *offsets = std::make_shared( std::make_shared(sizeof(ArrayItemIndex) / sizeof(int32_t)),