Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use thread-safe cache in thread_local_caching_allocator #6539

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions libs/core/allocator_support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ set(allocator_support_compat_headers
)
# cmake-format: on

set(allocator_support_sources)
set(allocator_support_sources thread_local_caching_allocator.cpp)

include(HPX_AddModule)
add_hpx_module(
Expand All @@ -52,6 +52,7 @@ add_hpx_module(
HEADERS ${allocator_support_headers}
COMPAT_HEADERS ${allocator_support_compat_headers}
DEPENDENCIES hpx_dependencies_allocator
MODULE_DEPENDENCIES hpx_concepts hpx_config hpx_preprocessor hpx_type_support
MODULE_DEPENDENCIES hpx_assertion hpx_concepts hpx_config hpx_preprocessor
hpx_type_support
CMAKE_SUBDIRS examples tests
)
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2023 Hartmut Kaiser
// Copyright (c) 2023-2024 Hartmut Kaiser
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
Expand All @@ -8,11 +8,13 @@

#include <hpx/config.hpp>
#include <hpx/allocator_support/config/defines.hpp>
#include <hpx/assert.hpp>

#include <cstddef>
#include <cstdint>
#include <functional>
#include <memory>
#include <new>
#include <stack>
#include <type_traits>
#include <utility>

Expand All @@ -21,14 +23,42 @@
#if defined(HPX_ALLOCATOR_SUPPORT_HAVE_CACHING) && \
!((defined(HPX_HAVE_CUDA) && defined(__CUDACC__)) || \
defined(HPX_HAVE_HIP))

namespace detail {

HPX_CORE_EXPORT void init_allocator_cache(
std::size_t, std::function<void()>&& clear_cache);
HPX_CORE_EXPORT std::pair<void*, std::size_t> allocate_from_cache(
std::size_t) noexcept;
[[nodiscard]] HPX_CORE_EXPORT bool cache_empty(std::size_t) noexcept;
HPX_CORE_EXPORT void return_to_cache(
std::size_t, void* p, std::size_t n);

// maximal number of caches [0...max)
inline constexpr int max_number_of_caches = 16;

///////////////////////////////////////////////////////////////////////
constexpr int next_power_of_two(std::int64_t n) noexcept
{
int i = 0;
for (--n; n > 0; n >>= 1)
{
++i;
}
return i;
}
} // namespace detail

///////////////////////////////////////////////////////////////////////////
template <typename T = char, typename Allocator = std::allocator<T>>
struct thread_local_caching_allocator
{
HPX_NO_UNIQUE_ADDRESS Allocator alloc;

private:
using traits = std::allocator_traits<Allocator>;

public:
using value_type = typename traits::value_type;
using pointer = typename traits::pointer;
using const_pointer = typename traits::const_pointer;
Expand All @@ -50,86 +80,38 @@
using propagate_on_container_swap =
typename traits::propagate_on_container_swap;

private:
struct allocated_cache
explicit thread_local_caching_allocator(
Allocator const& alloc = Allocator{}) noexcept(noexcept(std::
is_nothrow_copy_constructible_v<Allocator>))
: alloc(alloc)
{
explicit allocated_cache(Allocator const& a) noexcept(
noexcept(std::is_nothrow_copy_constructible_v<Allocator>))
: alloc(a)
{
}

allocated_cache(allocated_cache const&) = delete;
allocated_cache(allocated_cache&&) = delete;
allocated_cache& operator=(allocated_cache const&) = delete;
allocated_cache& operator=(allocated_cache&&) = delete;

~allocated_cache()
{
clear_cache();
}

pointer allocate(size_type n)
{
pointer p;
if (data.empty())
// Note: capturing the allocator will be ok only as long as it
// doesn't have any state as this lambda will be possibly called
// very late during destruction of the thread_local cache.
static_assert(std::is_empty_v<Allocator>,
"Please don't use allocators with state in conjunction with "
"the thread_local_caching_allocator");

constexpr std::size_t num_cache =
detail::next_power_of_two(sizeof(T));

static_assert(num_cache < detail::max_number_of_caches,
"This allocator does not support allocating objects larger "
"than 2^16 bytes");

auto f = [=]() mutable {
while (!detail::cache_empty(num_cache))
{
p = traits::allocate(alloc, n);
if (p == nullptr)
auto [p, n] = detail::allocate_from_cache(num_cache);
if (p != nullptr)
{
throw std::bad_alloc();
traits::deallocate(const_cast<Allocator&>(alloc),
static_cast<char*>(p), n);
}
}
else
{
p = data.top().first;
data.pop();
}

++allocated;
return p;
}
};

void deallocate(pointer p, size_type n) noexcept
{
data.push(std::make_pair(p, n));
if (++deallocated > 2 * (allocated + 16))
{
clear_cache();
allocated = 0;
deallocated = 0;
}
}

private:
void clear_cache() noexcept
{
while (!data.empty())
{
traits::deallocate(
alloc, data.top().first, data.top().second);
data.pop();
}
}

HPX_NO_UNIQUE_ADDRESS Allocator alloc;
std::stack<std::pair<T*, size_type>> data;
std::size_t allocated = 0;
std::size_t deallocated = 0;
};

allocated_cache& cache()
{
thread_local allocated_cache allocated_data(alloc);
return allocated_data;
}

public:
explicit thread_local_caching_allocator(
Allocator const& alloc = Allocator{}) noexcept(noexcept(std::
is_nothrow_copy_constructible_v<Allocator>))
: alloc(alloc)
{
detail::init_allocator_cache(num_cache, HPX_MOVE(f));
}

template <typename U, typename Alloc>
Expand All @@ -154,16 +136,32 @@

[[nodiscard]] pointer allocate(size_type n, void const* = nullptr)
{
if (max_size() < n)
constexpr std::size_t num_cache =
detail::next_power_of_two(sizeof(T));
std::size_t N = n * (1ull << num_cache);

if (max_size() < N)
{
throw std::bad_array_new_length();
}
return cache().allocate(n);

auto [p, _] = detail::allocate_from_cache(num_cache);

Check notice

Code scanning / CodeQL

Unused local variable Note

Variable _ is not used.
if (p == nullptr)
{
p = traits::allocate(alloc, N);
if (p == nullptr)
{
throw std::bad_alloc();
}
}
return static_cast<pointer>(p);
}

void deallocate(pointer p, size_type n) noexcept
void deallocate(pointer p, size_type n)
{
cache().deallocate(p, n);
constexpr std::size_t num_cache =
detail::next_power_of_two(sizeof(T));
detail::return_to_cache(num_cache, p, n * (1ull << num_cache));
}

[[nodiscard]] constexpr size_type max_size() noexcept
Expand Down
123 changes: 123 additions & 0 deletions libs/core/allocator_support/src/thread_local_caching_allocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// Copyright (c) 2023-2024 Hartmut Kaiser
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <hpx/config.hpp>
#include <hpx/allocator_support/config/defines.hpp>

#if defined(HPX_ALLOCATOR_SUPPORT_HAVE_CACHING) && \
!((defined(HPX_HAVE_CUDA) && defined(__CUDACC__)) || \
defined(HPX_HAVE_HIP))

#include <hpx/allocator_support/thread_local_caching_allocator.hpp>
#include <hpx/assert.hpp>
#include <hpx/type_support/static_reinit_interface.hpp>

#include <cstddef>
#include <functional>
#include <stack>
#include <utility>

namespace hpx::util::detail {

///////////////////////////////////////////////////////////////////////////
struct allocated_cache
{
explicit allocated_cache() noexcept = default;

void init(std::function<void()>&& clear)
{
if (!clear_cache) // initialize once
{
clear_cache = HPX_MOVE(clear);
util::reinit_register(std::function<void()>(), clear_cache);
}
}

allocated_cache(allocated_cache const&) = delete;
allocated_cache(allocated_cache&&) = delete;
allocated_cache& operator=(allocated_cache const&) = delete;
allocated_cache& operator=(allocated_cache&&) = delete;

~allocated_cache()
{
if (clear_cache)
{
clear_cache();
}
}

std::pair<void*, std::size_t> allocate() noexcept
{
std::pair<void*, std::size_t> p{nullptr, 0};
if (!data.empty())
{
p = data.top();
data.pop();

++allocated;
}
return p;
}

void deallocate(void* p, std::size_t n)
{
data.emplace(p, n);
if (++deallocated > 2 * (allocated + 16))
{
if (clear_cache)
{
clear_cache();
}

allocated = 0;
deallocated = 0;
}
}

[[nodiscard]] bool empty() const noexcept
{
return data.empty();
}

private:
std::stack<std::pair<void*, std::size_t>> data;
std::size_t allocated = 0;
std::size_t deallocated = 0;
std::function<void()> clear_cache;
};

///////////////////////////////////////////////////////////////////////////
allocated_cache& cache(std::size_t n)
{
HPX_ASSERT(n < max_number_of_caches);

thread_local allocated_cache allocated_data[max_number_of_caches];
return allocated_data[n];
}

void init_allocator_cache(
std::size_t n, std::function<void()>&& clear_cache)
{
cache(n).init(HPX_MOVE(clear_cache));
}

std::pair<void*, std::size_t> allocate_from_cache(std::size_t n) noexcept
{
return cache(n).allocate();
}

void return_to_cache(std::size_t n, void* p, std::size_t const size)
{
cache(n).deallocate(p, size);
}

bool cache_empty(std::size_t n) noexcept
{
return cache(n).empty();
}
} // namespace hpx::util::detail

#endif
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2007-2022 Hartmut Kaiser
// Copyright (c) 2007-2024 Hartmut Kaiser
// Copyright (c) 2013 Agustin Berge
// Copyright (c) 2017 Denis Blank
//
Expand Down Expand Up @@ -201,14 +201,16 @@ namespace hpx::lcos::detail {
return async_visit_future(HPX_FORWARD(T, current));
}

// clang-format off
template <typename T, typename N>
auto operator()(hpx::util::async_traverse_detach_tag, T&& current,
N&& next) -> decltype(async_detach_future(HPX_FORWARD(T, current),
HPX_FORWARD(N, next)))
HPX_FORWARD(N, next)))
{
return async_detach_future(
HPX_FORWARD(T, current), HPX_FORWARD(N, next));
}
// clang-format on

template <typename T>
void operator()(hpx::util::async_traverse_complete_tag, T&& pack)
Expand Down
Loading
Loading