Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/arena allocator #4239

Merged
merged 25 commits into from
Jun 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
036882a
Implement the arena allocator.
Shylock-Hg Aug 12, 2021
5ecb5ad
Merge branch 'master' into feature/arena-allocator
Shylock-Hg Aug 12, 2021
88a070a
Add the test cases.
Shylock-Hg Aug 12, 2021
2810518
Merge branch 'master' into feature/arena-allocator
Shylock-Hg Aug 13, 2021
f53132a
Format.
Shylock-Hg Aug 13, 2021
eb14bb0
Fix the allocate size when expand new chunk.
Shylock-Hg Aug 13, 2021
00348ff
Merge branch 'master' into feature/arena-allocator
Shylock-Hg Aug 13, 2021
0bbdcd5
Remove the container for chunks. Remove some debug info in release mo…
Shylock-Hg Aug 13, 2021
1b689b4
Replace uint8_t by byte type for more meanful.
Shylock-Hg Aug 13, 2021
22c2539
Merge branch 'master' into feature/arena-allocator
Shylock-Hg Aug 16, 2021
57bc516
Avoid crash.
Shylock-Hg Aug 16, 2021
2a8a2f2
Add branch predication.
Shylock-Hg Aug 16, 2021
4bd10d9
Merge branch 'master' into feature/arena-allocator
Shylock-Hg Aug 16, 2021
74a0469
Format by clang-format 10
Shylock-Hg Aug 16, 2021
b7b6c8c
Merge branch 'master' into feature/arena-allocator
Shylock-Hg Aug 20, 2021
e5c62ff
Merge branch 'master' into feature/arena-allocator
Shylock-Hg Aug 26, 2021
2523b79
Merge branch 'master' into feature/arena-allocator
Shylock-Hg Mar 25, 2022
16d5f1a
Apply arena allocator.
Shylock-Hg Apr 1, 2022
201eb03
Merge branch 'master' into feature/arena-allocator
Shylock-Hg Apr 1, 2022
0d552b5
Merge branch 'master' into feature/arena-allocator
Shylock-Hg Apr 15, 2022
d96c15e
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Apr 18, 2022
88ec217
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg May 9, 2022
39fa479
Merge branch 'master' into feature/arena-allocator
Sophie-Xie Jun 7, 2022
a018dc5
Rebase.
Shylock-Hg Jun 7, 2022
7629e6f
Merge branch 'master' into feature/arena-allocator
Sophie-Xie Jun 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions src/common/base/Arena.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/* Copyright (c) 2021 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#include "common/base/Arena.h"

#include <cstdint>

namespace nebula {

void* Arena::allocateAligned(const std::size_t alloc) {
DCHECK_NE(alloc, 0); // don't allow zero sized allocation
// replace the modulo operation by bit and
static_assert(kAlignment && !(kAlignment & (kAlignment - 1)), "Align must be power of 2.");
const std::size_t pad =
kAlignment - (reinterpret_cast<uintptr_t>(currentPtr_) & (kAlignment - 1));
const std::size_t consumption = alloc + pad;
if (UNLIKELY(consumption > kMaxChunkSize)) {
DLOG(FATAL) << "Arena can't allocate so large memory.";
return nullptr;
}
if (LIKELY(consumption <= availableSize_)) {
void* ptr = currentPtr_ + pad;
currentPtr_ += consumption;
#ifndef NDEBUG
allocatedSize_ += consumption;
#endif
availableSize_ -= consumption;
return ptr;
} else {
newChunk(std::max(alloc, kMinChunkSize));
// The new operator will allocate the aligned memory
DCHECK_EQ(reinterpret_cast<uintptr_t>(currentPtr_) & (kAlignment - 1), 0);
void* ptr = currentPtr_;
currentPtr_ += alloc;
#ifndef NDEBUG
allocatedSize_ += alloc;
#endif
availableSize_ -= alloc;
return ptr;
}
}

} // namespace nebula
92 changes: 92 additions & 0 deletions src/common/base/Arena.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/* Copyright (c) 2021 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#pragma once

#include <folly/Likely.h>

#include <boost/core/noncopyable.hpp>
#include <cstddef>
#include <limits>
#include <type_traits>

#include "common/base/Logging.h"
#include "common/cpp/helpers.h"

namespace nebula {

// MT-unsafe arena allocator
// It's optimized for many small objects construct/destruct
class Arena : public boost::noncopyable, cpp::NonMovable {
public:
~Arena() {
while (LIKELY(currentChunk_ != nullptr)) {
auto *prev = currentChunk_->prev;
delete[] currentChunk_;
currentChunk_ = prev;
}
#ifndef NDEBUG
allocatedSize_ = 0;
#endif
availableSize_ = 0;
currentPtr_ = nullptr;
}

// The CPU access memory with the alignment,
// So construct object from alignment address will reduce the CPU access count then
// speed up read/write
void *allocateAligned(const std::size_t alloc);

#ifndef NDEBUG
std::size_t allocatedSize() const {
return allocatedSize_;
}
#endif

std::size_t availableSize() const {
return availableSize_;
}

private:
static constexpr std::size_t kMinChunkSize = 4096;
static constexpr std::size_t kMaxChunkSize = std::numeric_limits<uint16_t>::max();
static constexpr std::size_t kAlignment = std::alignment_of<std::max_align_t>::value;

struct Chunk {
explicit Chunk(Chunk *p) : prev{p} {}

union {
Chunk *prev{nullptr};
std::byte aligned[kAlignment];
};
};

// allocate new chunk
// The current pointer will keep alignment
void newChunk(std::size_t size) {
DCHECK_NE(size, 0);
std::byte *ptr = new std::byte[size + sizeof(Chunk)];
currentChunk_ = new (ptr) Chunk(currentChunk_);
availableSize_ = size;
currentPtr_ = (ptr + sizeof(Chunk));
}

Chunk *currentChunk_{nullptr};
// These are debug info
// Remove to speed up in Release build
#ifndef NDEBUG
// total size allocated
std::size_t allocatedSize_{0};
#endif
// total size which available to allocate
std::size_t availableSize_{0};
// The total chunks size
// = allocatedSize_ + availableSize_ + Memory Deprecated (Size can't fit allocation)
// Current pointer to available memory address
std::byte *currentPtr_{nullptr};
};

} // namespace nebula
1 change: 1 addition & 0 deletions src/common/base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ nebula_add_library(
Status.cpp
SanitizerOptions.cpp
SignalHandler.cpp
Arena.cpp
${gdb_debug_script}
)

Expand Down
31 changes: 18 additions & 13 deletions src/common/base/ObjectPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <list>
#include <type_traits>

#include "common/base/Arena.h"
#include "common/base/Logging.h"
#include "common/cpp/helpers.h"

Expand All @@ -26,26 +27,19 @@ class ObjectPool final : private boost::noncopyable, private cpp::NonMovable {
public:
ObjectPool() {}

~ObjectPool() = default;
~ObjectPool() {
clear();
}

void clear() {
SLGuard g(lock_);
objects_.clear();
}

template <typename T>
T *add(T *obj) {
if constexpr (std::is_base_of<Expression, T>::value) {
VLOG(3) << "New expression added into pool: " << obj->toString();
}
SLGuard g(lock_);
objects_.emplace_back(obj);
return obj;
}

template <typename T, typename... Args>
T *makeAndAdd(Args &&... args) {
return add(new T(std::forward<Args>(args)...));
void *ptr = arena_.allocateAligned(sizeof(T));
return add(new (ptr) T(std::forward<Args>(args)...));
}

bool empty() const {
Expand All @@ -58,7 +52,7 @@ class ObjectPool final : private boost::noncopyable, private cpp::NonMovable {
public:
template <typename T>
explicit OwnershipHolder(T *obj)
: obj_(obj), deleteFn_([](void *p) { delete reinterpret_cast<T *>(p); }) {}
: obj_(obj), deleteFn_([](void *p) { reinterpret_cast<T *>(p)->~T(); }) {}

~OwnershipHolder() {
deleteFn_(obj_);
Expand All @@ -69,7 +63,18 @@ class ObjectPool final : private boost::noncopyable, private cpp::NonMovable {
std::function<void(void *)> deleteFn_;
};

template <typename T>
nevermore3 marked this conversation as resolved.
Show resolved Hide resolved
T *add(T *obj) {
if constexpr (std::is_base_of<Expression, T>::value) {
VLOG(3) << "New expression added into pool: " << obj->toString();
}
SLGuard g(lock_);
objects_.emplace_back(obj);
return obj;
}

std::list<OwnershipHolder> objects_;
Arena arena_;

folly::SpinLock lock_;
};
Expand Down
90 changes: 90 additions & 0 deletions src/common/base/test/ArenaBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/* Copyright (c) 2021 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#include <folly/Benchmark.h>
#include <folly/init/Init.h>
#include <folly/memory/Arena.h>

#include <string>
#include <type_traits>

#include "common/base/Arena.h"
#include "common/expression/LabelExpression.h"

namespace nebula {

class TestExpr : public LabelExpression {
public:
explicit TestExpr(const std::string &name = "")
: LabelExpression(reinterpret_cast<ObjectPool *>(1), name) {}
};

BENCHMARK(DefaultAllocator, iters) {
std::size_t round = iters * 1000;
for (std::size_t _ = 0; _ < round; ++_) {
auto *expr = new TestExpr("Label");
delete expr;
}
}

BENCHMARK_RELATIVE(ArenaAllocator, iters) {
std::size_t round = iters * 1000;
Arena a;
for (std::size_t _ = 0; _ < round; ++_) {
auto *ptr = a.allocateAligned(sizeof(TestExpr));
auto *expr = new (ptr) TestExpr("Label");
expr->~TestExpr();
}
}

BENCHMARK_RELATIVE(FollyArenaAllocator, iters) {
std::size_t round = iters * 1000;
folly::SysArena a;
for (std::size_t _ = 0; _ < round; ++_) {
auto *ptr = a.allocate(sizeof(TestExpr));
auto *expr = new (ptr) TestExpr("Label");
expr->~TestExpr();
}
}

BENCHMARK_DRAW_LINE();

} // namespace nebula

int main(int argc, char **argv) {
folly::init(&argc, &argv, true);

folly::runBenchmarks();
return 0;
}

// CPU info
// Brand Raw: Intel(R) Xeon(R) CPU E5-2690 v2 @ 3.00GHz
// Hz Advertised Friendly: 3.0000 GHz
// Hz Actual Friendly: 3.2942 GHz
// Hz Advertised: (3000000000, 0)
// Hz Actual: (3294220000, 0)
// Arch: X86_64
// Bits: 64
// Count: 40
// Arch String Raw: x86_64
// L1 Data Cache Size: 32768
// L1 Instruction Cache Size: 32768
// L2 Cache Size: 262144
// L2 Cache Line Size: 256
// L2 Cache Associativity: 6
// L3 Cache Size: 26214400
//
// Build in Release mode
//
// ============================================================================
// /home/shylock.huang/nebula/src/common/base/test/ArenaBenchmark.cpprelative time/iter iters/s
// ============================================================================
// DefaultAllocator 36.59us 27.33K
// ArenaAllocator 145.89% 25.08us 39.87K
// FollyArenaAllocator 138.96% 26.33us 37.98K
Comment on lines +86 to +88
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Goog job! We should also update the parser benchmarks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark of origin:

============================================================================
/home/shylock.huang/nebula-tmp2/src/parser/test/ParserBenchmark.cpprelative  time/iter  iters/s
============================================================================
SimpleQuery(1_thread)                                        1.20us  836.34K
SimpleQuery(2_thread)                             96.66%     1.24us  808.44K
SimpleQuery(4_thread)                             94.08%     1.27us  786.79K
SimpleQuery(8_thread)                             94.90%     1.26us  793.64K
SimpleQuery(16_thread)                            95.07%     1.26us  795.14K
SimpleQuery(32_thread)                            66.19%     1.81us  553.59K
SimpleQuery(48_thread)                            47.52%     2.52us  397.44K
----------------------------------------------------------------------------
ComplexQuery(1_thread)                                       8.61us  116.12K
ComplexQuery(2_thread)                            97.47%     8.84us  113.18K
ComplexQuery(4_thread)                            95.44%     9.02us  110.83K
ComplexQuery(8_thread)                            97.86%     8.80us  113.64K
ComplexQuery(16_thread)                           98.14%     8.78us  113.96K
ComplexQuery(32_thread)                           95.71%     9.00us  111.14K
ComplexQuery(48_thread)                           60.94%    14.13us   70.76K
----------------------------------------------------------------------------
MatchConflictQuery(1_thread)                                 3.65us  274.17K
MatchConflictQuery(2_thread)                      96.74%     3.77us  265.23K
MatchConflictQuery(4_thread)                      94.19%     3.87us  258.25K
MatchConflictQuery(8_thread)                      93.17%     3.91us  255.45K
MatchConflictQuery(16_thread)                     94.70%     3.85us  259.64K
MatchConflictQuery(32_thread)                     91.22%     4.00us  250.10K
MatchConflictQuery(48_thread)                     57.08%     6.39us  156.50K
============================================================================

Benchmark of current:

============================================================================
/home/shylock.huang/nebula-tmp/src/parser/test/ParserBenchmark.cpprelative  time/iter  iters/s
============================================================================
SimpleQuery(1_thread)                                        1.07us  931.55K
SimpleQuery(2_thread)                             98.63%     1.09us  918.82K
SimpleQuery(4_thread)                             97.50%     1.10us  908.26K
SimpleQuery(8_thread)                             96.84%     1.11us  902.09K
SimpleQuery(16_thread)                            93.79%     1.14us  873.73K
SimpleQuery(32_thread)                            65.51%     1.64us  610.22K
SimpleQuery(48_thread)                            41.94%     2.56us  390.73K
----------------------------------------------------------------------------
ComplexQuery(1_thread)                                       8.34us  119.88K
ComplexQuery(2_thread)                            98.58%     8.46us  118.18K
ComplexQuery(4_thread)                            96.30%     8.66us  115.45K
ComplexQuery(8_thread)                            92.82%     8.99us  111.27K
ComplexQuery(16_thread)                           93.72%     8.90us  112.36K
ComplexQuery(32_thread)                           94.08%     8.87us  112.78K
ComplexQuery(48_thread)                           59.36%    14.05us   71.16K
----------------------------------------------------------------------------
MatchConflictQuery(1_thread)                                 3.83us  261.02K
MatchConflictQuery(2_thread)                      97.54%     3.93us  254.61K
MatchConflictQuery(4_thread)                      99.23%     3.86us  259.00K
MatchConflictQuery(8_thread)                      96.97%     3.95us  253.10K
MatchConflictQuery(16_thread)                     96.94%     3.95us  253.03K
MatchConflictQuery(32_thread)                     91.72%     4.18us  239.40K
MatchConflictQuery(48_thread)                     57.47%     6.67us  150.01K
============================================================================

// ----------------------------------------------------------------------------
// ============================================================================
46 changes: 46 additions & 0 deletions src/common/base/test/ArenaTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/* Copyright (c) 2021 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#include <gtest/gtest.h>

#include <type_traits>

#include "common/base/Arena.h"

namespace nebula {

TEST(ArenaTest, Basic) {
Arena a;

for (int i = 1; i < 4096; i += 8) {
void *ptr = a.allocateAligned(i);
EXPECT_EQ(reinterpret_cast<uintptr_t>(ptr) % std::alignment_of<std::max_align_t>::value, 0);
}
}

TEST(ArenaTest, Construct) {
Arena a;
{
void *ptr = a.allocateAligned(sizeof(std::string));
auto *obj = new (ptr) std::string("Hello World!");
EXPECT_EQ(*obj, "Hello World!");
obj->~basic_string();
}
{
void *ptr = a.allocateAligned(sizeof(int));
auto *obj = new (ptr) int(3); // NOLINT
EXPECT_EQ(*obj, 3);
}
{
for (std::size_t i = 0; i < 1024; ++i) {
void *ptr = a.allocateAligned(sizeof(int));
auto *obj = new (ptr) int(i); // NOLINT
EXPECT_EQ(*obj, i);
}
}
}

} // namespace nebula
30 changes: 30 additions & 0 deletions src/common/base/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,5 +97,35 @@ target_compile_options(range_vs_transform_bm PRIVATE -O3)
nebula_add_test(
NAME object_pool_test
SOURCES ObjectPoolTest.cpp
OBJECTS $<TARGET_OBJECTS:base_obj>
LIBRARIES gtest gtest_main
)

nebula_add_executable(
NAME arena_bm
SOURCES ArenaBenchmark.cpp
OBJECTS
$<TARGET_OBJECTS:base_obj>
$<TARGET_OBJECTS:datatypes_obj>
$<TARGET_OBJECTS:expression_obj>
$<TARGET_OBJECTS:function_manager_obj>
$<TARGET_OBJECTS:agg_function_manager_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:ast_match_path_obj>
$<TARGET_OBJECTS:wkt_wkb_io_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
LIBRARIES
follybenchmark
${THRIFT_LIBRARIES}
)

nebula_add_test(
NAME arena_test
SOURCES ArenaTest.cpp
OBJECTS
$<TARGET_OBJECTS:base_obj>
LIBRARIES
gtest
gtest_main
)
Loading