diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 2f416602237d2..cd6ecc2387d5b 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -537,6 +537,9 @@ class BaseMemoryPoolImpl : public MemoryPool { if (new_size == old_size) { return Status::OK(); } + if (old_size == 0 || new_size == 0) { + return Reallocate(old_size, new_size, alignment, ptr); + } if (new_size < 0) { return Status::Invalid("negative realloc size"); } @@ -545,9 +548,9 @@ class BaseMemoryPoolImpl : public MemoryPool { } // First try resizing in place if (!Allocator::ResizeInPlace(old_size, new_size, *ptr)) { - // TODO comment - if (std::max(old_size, new_size) >= 32 * 1024) { - // Deallocate then allocate (faster than copying data?) + // For non-small allocations, we prefer deallocate-then-allocate + // over reallocation, because it saves the cost of copying data. + if (std::max(old_size, new_size) >= 256) { Allocator::DeallocateAligned(*ptr, old_size, alignment); RETURN_NOT_OK(Allocator::AllocateAligned(new_size, alignment, ptr)); } else { diff --git a/cpp/src/arrow/memory_pool_benchmark.cc b/cpp/src/arrow/memory_pool_benchmark.cc index c260bb7af5ffd..95080b4915342 100644 --- a/cpp/src/arrow/memory_pool_benchmark.cc +++ b/cpp/src/arrow/memory_pool_benchmark.cc @@ -115,30 +115,39 @@ static void AllocateTouchDeallocate( state.SetBytesProcessed(state.iterations() * nbytes); } +// 256 kiB: typical max size for a scratch space (L2-sized) +static constexpr int64_t kMaxReallocationSize = 256 << 10; +// 4 kiB: typical increment when resizing a scratch space +static constexpr int64_t kReallocationIncrement = 4096; + template static void BenchmarkReallocateGrowing(benchmark::State& state) { - // 256 kiB: typical max size for a scratch space (L2-sized) - const int64_t max_size = 256 << 10; - // 4 kiB: typical increment when resizing a scratch space - const int64_t increment = 4096; MemoryPool* pool = *Alloc::GetAllocator(); + pool->ReleaseUnused(); int64_t nb_reallocs = 0; - + int64_t nb_inplace_reallocs = 0; for (auto _ : state) { + const auto alloc_before = pool->bytes_allocated(); uint8_t* data; int64_t size = 0; ARROW_CHECK_OK(pool->Allocate(size, &data)); - for (; size < max_size; size += increment) { + while (size < kMaxReallocationSize) { + const auto old_data = data; + int64_t new_size = size + kReallocationIncrement; if constexpr (Copy) { - ARROW_CHECK_OK(pool->Reallocate(size - increment, size, &data)); + ARROW_CHECK_OK(pool->Reallocate(size, new_size, &data)); } else { - ARROW_CHECK_OK(pool->ReallocateNoCopy(size - increment, size, &data)); + ARROW_CHECK_OK(pool->ReallocateNoCopy(size, new_size, &data)); } ++nb_reallocs; + nb_inplace_reallocs += (data == old_data); + size = new_size; } - pool->Free(data, size - increment); + pool->Free(data, size); + ARROW_CHECK_EQ(pool->bytes_allocated(), alloc_before); } state.SetItemsProcessed(nb_reallocs); + state.counters["percent_in_place"] = 100.0 * nb_inplace_reallocs / nb_reallocs; } template @@ -153,26 +162,33 @@ static void ReallocateGrowingNoCopy(benchmark::State& state) { template static void BenchmarkReallocateShrinking(benchmark::State& state) { - const int64_t max_size = 256 << 10; // 256 kiB - const int64_t increment = 4096; MemoryPool* pool = *Alloc::GetAllocator(); - int64_t nb_reallocs = 0; + pool->ReleaseUnused(); + int64_t nb_reallocs = 0; + int64_t nb_inplace_reallocs = 0; for (auto _ : state) { + const auto alloc_before = pool->bytes_allocated(); uint8_t* data; - int64_t size = max_size; + int64_t size = kMaxReallocationSize; ARROW_CHECK_OK(pool->Allocate(size, &data)); - for (; size >= 0; size -= increment) { + while (size >= kReallocationIncrement) { + const auto old_data = data; + int64_t new_size = size - kReallocationIncrement; if constexpr (Copy) { - ARROW_CHECK_OK(pool->Reallocate(size + increment, size, &data)); + ARROW_CHECK_OK(pool->Reallocate(size, new_size, &data)); } else { - ARROW_CHECK_OK(pool->ReallocateNoCopy(size + increment, size, &data)); + ARROW_CHECK_OK(pool->ReallocateNoCopy(size, new_size, &data)); } ++nb_reallocs; + nb_inplace_reallocs += (data == old_data); + size = new_size; } - pool->Free(data, size + increment); + pool->Free(data, size); + ARROW_CHECK_EQ(pool->bytes_allocated(), alloc_before); } state.SetItemsProcessed(nb_reallocs); + state.counters["percent_in_place"] = 100.0 * nb_inplace_reallocs / nb_reallocs; } template diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index a211879b1bfc9..faf41d45cab20 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -126,6 +126,10 @@ bool JemallocAllocator::ResizeInPlace(int64_t old_size, int64_t new_size, uint8_ // No need to pass any alignment since this doesn't move the base pointer int64_t got_size = static_cast(xallocx(ptr, static_cast(new_size), /*extra=*/0, /*flags=*/0)); + // FIXME: xallocx resizes to an actual size that's not the actualy requested size. + // Using an exact equality check here is therefore pessimal. The alternative is to + // return the actual allocation size to the caller, as it should be passed + // accurately when deallocating the area. return got_size == new_size; }