From 57cf183767c0b03b859a1faf998cba3930ab5657 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 25 Mar 2024 16:09:08 +0100 Subject: [PATCH] Fix benchmark & tune heuristic --- cpp/src/arrow/memory_pool.cc | 8 ++---- cpp/src/arrow/memory_pool_benchmark.cc | 38 +++++++++++++++++++------- cpp/src/arrow/memory_pool_jemalloc.cc | 7 ++++- 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 2f416602237d2..35f9e422033ba 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -545,13 +545,11 @@ class BaseMemoryPoolImpl : public MemoryPool { } // First try resizing in place if (!Allocator::ResizeInPlace(old_size, new_size, *ptr)) { - // TODO comment - if (std::max(old_size, new_size) >= 32 * 1024) { - // Deallocate then allocate (faster than copying data?) + // For non-small allocations, we prefer deallocate-then-allocate + // over reallocation, because it saves the cost of copying data. + if (std::max(old_size, new_size) >= 256) { Allocator::DeallocateAligned(*ptr, old_size, alignment); RETURN_NOT_OK(Allocator::AllocateAligned(new_size, alignment, ptr)); - } else { - RETURN_NOT_OK(Allocator::ReallocateAligned(old_size, new_size, alignment, ptr)); } } #ifndef NDEBUG diff --git a/cpp/src/arrow/memory_pool_benchmark.cc b/cpp/src/arrow/memory_pool_benchmark.cc index c260bb7af5ffd..4a914889152b9 100644 --- a/cpp/src/arrow/memory_pool_benchmark.cc +++ b/cpp/src/arrow/memory_pool_benchmark.cc @@ -121,24 +121,33 @@ static void BenchmarkReallocateGrowing(benchmark::State& state) { const int64_t max_size = 256 << 10; // 4 kiB: typical increment when resizing a scratch space const int64_t increment = 4096; + MemoryPool* pool = *Alloc::GetAllocator(); + pool->ReleaseUnused(); int64_t nb_reallocs = 0; - + int64_t nb_inplace_reallocs = 0; for (auto _ : state) { + const auto alloc_before = pool->bytes_allocated(); uint8_t* data; int64_t size = 0; ARROW_CHECK_OK(pool->Allocate(size, &data)); - for (; size < max_size; size += increment) { + while (size < max_size) { + const auto old_data = data; + int64_t new_size = size + increment; if constexpr (Copy) { - ARROW_CHECK_OK(pool->Reallocate(size - increment, size, &data)); + ARROW_CHECK_OK(pool->Reallocate(size, new_size, &data)); } else { - ARROW_CHECK_OK(pool->ReallocateNoCopy(size - increment, size, &data)); + ARROW_CHECK_OK(pool->ReallocateNoCopy(size, new_size, &data)); } ++nb_reallocs; + nb_inplace_reallocs += (data == old_data); + size = new_size; } - pool->Free(data, size - increment); + pool->Free(data, size); + ARROW_CHECK_EQ(pool->bytes_allocated(), alloc_before); } state.SetItemsProcessed(nb_reallocs); + state.counters["percent_in_place"] = 100.0 * nb_inplace_reallocs / nb_reallocs; } template @@ -155,24 +164,33 @@ template static void BenchmarkReallocateShrinking(benchmark::State& state) { const int64_t max_size = 256 << 10; // 256 kiB const int64_t increment = 4096; + MemoryPool* pool = *Alloc::GetAllocator(); + pool->ReleaseUnused(); int64_t nb_reallocs = 0; - + int64_t nb_inplace_reallocs = 0; for (auto _ : state) { + const auto alloc_before = pool->bytes_allocated(); uint8_t* data; int64_t size = max_size; ARROW_CHECK_OK(pool->Allocate(size, &data)); - for (; size >= 0; size -= increment) { + while (size >= increment) { + const auto old_data = data; + int64_t new_size = size - increment; if constexpr (Copy) { - ARROW_CHECK_OK(pool->Reallocate(size + increment, size, &data)); + ARROW_CHECK_OK(pool->Reallocate(size, new_size, &data)); } else { - ARROW_CHECK_OK(pool->ReallocateNoCopy(size + increment, size, &data)); + ARROW_CHECK_OK(pool->ReallocateNoCopy(size, new_size, &data)); } ++nb_reallocs; + nb_inplace_reallocs += (data == old_data); + size = new_size; } - pool->Free(data, size + increment); + pool->Free(data, size); + ARROW_CHECK_EQ(pool->bytes_allocated(), alloc_before); } state.SetItemsProcessed(nb_reallocs); + state.counters["percent_in_place"] = 100.0 * nb_inplace_reallocs / nb_reallocs; } template diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index a211879b1bfc9..765c836af61a1 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -126,7 +126,12 @@ bool JemallocAllocator::ResizeInPlace(int64_t old_size, int64_t new_size, uint8_ // No need to pass any alignment since this doesn't move the base pointer int64_t got_size = static_cast(xallocx(ptr, static_cast(new_size), /*extra=*/0, /*flags=*/0)); - return got_size == new_size; + // xallocx returns the physical allocation extent, which isn't really helpful, so + // we use a heuristic: + // - when upsizing, we deem it successful if >= requested size + // - when downsizing, we deem it successful if <= 2 * requested size + // (rather than deallocating then reallocating a smaller block) + return (new_size > old_size) ? (got_size >= new_size) : (got_size > new_size / 2); } void JemallocAllocator::DeallocateAligned(uint8_t* ptr, int64_t size, int64_t alignment) {