From 196255c7e359e05ea7e5be8375f987bca3352d0a Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 7 Dec 2023 18:09:20 +0100 Subject: [PATCH] GH-39126: [C++][CI] Fix Valgrind failures --- cpp/src/arrow/array/array_dict_test.cc | 2 +- cpp/src/arrow/array/array_test.cc | 1 + cpp/src/arrow/array/builder_binary.cc | 9 ++++---- cpp/src/arrow/array/builder_binary.h | 31 +++++++++++++++++--------- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 2f3ee6e2d49a5..a4c03b5db6371 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1129,7 +1129,7 @@ TEST(TestDictionary, Validate) { arr = std::make_shared(dict_type, indices, MakeArray(invalid_data)); ASSERT_RAISES(Invalid, arr->ValidateFull()); -#if !defined(__APPLE__) +#if !defined(__APPLE__) && !defined(ARROW_VALGRIND) // GH-35712: ASSERT_DEATH would make testing slow on MacOS. ASSERT_DEATH( { diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 974eb54d2caca..e9d478f108584 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -728,6 +728,7 @@ TEST_F(TestArray, TestMakeArrayFromScalar) { } for (auto scalar : scalars) { + ARROW_SCOPED_TRACE("scalar type: ", scalar->type->ToString()); AssertAppendScalar(pool_, scalar); } } diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc index 3ff22d4a3feeb..97fbc72b9aaa2 100644 --- a/cpp/src/arrow/array/builder_binary.cc +++ b/cpp/src/arrow/array/builder_binary.cc @@ -80,10 +80,11 @@ Status BinaryViewBuilder::AppendArraySlice(const ArraySpan& array, int64_t offse Status BinaryViewBuilder::FinishInternal(std::shared_ptr* out) { ARROW_ASSIGN_OR_RAISE(auto null_bitmap, null_bitmap_builder_.FinishWithLength(length_)); ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_)); - BufferVector buffers = {null_bitmap, data}; - for (auto&& buffer : data_heap_builder_.Finish()) { - buffers.push_back(std::move(buffer)); - } + ARROW_ASSIGN_OR_RAISE(auto byte_buffers, data_heap_builder_.Finish()); + BufferVector buffers; + buffers.reserve(byte_buffers.size() + 2); + buffers.insert(buffers.end(), {null_bitmap, data}); + buffers.insert(buffers.end(), byte_buffers.begin(), byte_buffers.end()); *out = ArrayData::Make(type(), length_, std::move(buffers), null_count_); Reset(); return Status::OK(); diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h index 3e87cf2403610..d825f7d32520a 100644 --- a/cpp/src/arrow/array/builder_binary.h +++ b/cpp/src/arrow/array/builder_binary.h @@ -524,16 +524,11 @@ class ARROW_EXPORT StringHeapBuilder { "strings larger than 2GB"); } if (num_bytes > current_remaining_bytes_) { - // Ensure the buffer is fully overwritten to avoid leaking uninitialized - // bytes from the allocator - if (current_remaining_bytes_ > 0) { - std::memset(current_out_buffer_, 0, current_remaining_bytes_); - blocks_.back() = SliceBuffer(blocks_.back(), 0, - blocks_.back()->size() - current_remaining_bytes_); - } + ARROW_RETURN_NOT_OK(FinishLastBlock()); current_remaining_bytes_ = num_bytes > blocksize_ ? num_bytes : blocksize_; - ARROW_ASSIGN_OR_RAISE(std::shared_ptr new_block, - AllocateBuffer(current_remaining_bytes_, alignment_, pool_)); + ARROW_ASSIGN_OR_RAISE( + std::shared_ptr new_block, + AllocateResizableBuffer(current_remaining_bytes_, alignment_, pool_)); current_offset_ = 0; current_out_buffer_ = new_block->mutable_data(); blocks_.emplace_back(std::move(new_block)); @@ -550,7 +545,10 @@ class ARROW_EXPORT StringHeapBuilder { int64_t current_remaining_bytes() const { return current_remaining_bytes_; } - std::vector> Finish() { + Result>> Finish() { + if (!blocks_.empty()) { + ARROW_RETURN_NOT_OK(FinishLastBlock()); + } current_offset_ = 0; current_out_buffer_ = NULLPTR; current_remaining_bytes_ = 0; @@ -558,10 +556,21 @@ class ARROW_EXPORT StringHeapBuilder { } private: + Status FinishLastBlock() { + if (current_remaining_bytes_ > 0) { + // Avoid leaking uninitialized bytes from the allocator + ARROW_RETURN_NOT_OK( + blocks_.back()->Resize(blocks_.back()->size() - current_remaining_bytes_, + /*shrink_to_fit=*/true)); + blocks_.back()->ZeroPadding(); + } + return Status::OK(); + } + MemoryPool* pool_; int64_t alignment_; int64_t blocksize_ = kDefaultBlocksize; - std::vector> blocks_; + std::vector> blocks_; int32_t current_offset_ = 0; uint8_t* current_out_buffer_ = NULLPTR;