-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Performance live heap profiling - deallocation code path (#298)
* Performance live heap profiling - deallocation code path Add a bitset to track the addresses that are kept for heap profiling. Removal of the lock in deallocation code path Add a stats on unmatched allocations which could be a source of increased CPU consumption
- Loading branch information
Showing
19 changed files
with
530 additions
and
116 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// Unless explicitly stated otherwise all files in this repository are licensed | ||
// under the Apache License Version 2.0. This product includes software | ||
// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present | ||
// Datadog, Inc. | ||
#pragma once | ||
|
||
#include <atomic> | ||
#include <memory> | ||
#include <stdint.h> | ||
#include <string.h> | ||
|
||
namespace ddprof { | ||
class AddressBitset { | ||
// Number of bits is the number of addresses we can store | ||
// We have one address per individual bit). | ||
// so lets say you have 1111, you can store 4 addresses. | ||
// We hash the address to a number (to have an equal probability of using | ||
// all bits). Then we use the mask to position this address in our bitset. | ||
// Addr -> Hash -> Mask (to get useable bits) -> Position in the bitset | ||
// Note: the hashing step might be bad for cache locality. | ||
public: | ||
// Publish 1 Meg as default | ||
constexpr static unsigned _k_default_bitset_size = 8 * 1024 * 1024; | ||
explicit AddressBitset(unsigned bitset_size = 0) { init(bitset_size); } | ||
AddressBitset(AddressBitset &&other) noexcept; | ||
AddressBitset &operator=(AddressBitset &&other) noexcept; | ||
|
||
AddressBitset(AddressBitset &other) = delete; | ||
AddressBitset &operator=(AddressBitset &other) = delete; | ||
|
||
// returns true if the element was inserted | ||
bool add(uintptr_t addr); | ||
// returns true if the element was removed | ||
bool remove(uintptr_t addr); | ||
void clear(); | ||
int count() const { return _nb_addresses; } | ||
|
||
private: | ||
static constexpr unsigned _k_max_bits_ignored = 4; | ||
unsigned _lower_bits_ignored; | ||
// element type | ||
using Word_t = uint64_t; | ||
constexpr static unsigned _nb_bits_per_word = sizeof(Word_t) * 8; | ||
// 1 Meg divided in uint64's size | ||
// The probability of collision is proportional to the number of elements | ||
// already within the bitset | ||
unsigned _bitset_size = {}; | ||
unsigned _k_nb_words = {}; | ||
unsigned _nb_bits_mask = {}; | ||
// We can not use an actual bitset (for atomicity reasons) | ||
std::unique_ptr<std::atomic<uint64_t>[]> _address_bitset; | ||
std::atomic<int> _nb_addresses = 0; | ||
|
||
void init(unsigned bitset_size); | ||
|
||
void move_from(AddressBitset &other) noexcept; | ||
// This is a kind of hash function | ||
// We remove the lower bits (as the alignment constraints makes them useless) | ||
// We fold the address | ||
// Then we only keep the bits that matter for the order in the bitmap | ||
uint32_t hash_significant_bits(uintptr_t h1) { | ||
uint64_t intermediate = h1 >> _lower_bits_ignored; | ||
uint32_t high = (uint32_t)(intermediate >> 32); | ||
uint32_t low = (uint32_t)intermediate; | ||
uint32_t res = high ^ low; | ||
return res & _nb_bits_mask; | ||
} | ||
}; | ||
} // namespace ddprof |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
// Unless explicitly stated otherwise all files in this repository are licensed | ||
// under the Apache License Version 2.0. This product includes software | ||
// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present | ||
// Datadog, Inc. | ||
#include "address_bitset.hpp" | ||
|
||
#include <algorithm> | ||
#include <bit> | ||
#include <functional> | ||
#include <unlikely.hpp> | ||
|
||
namespace ddprof { | ||
|
||
namespace { | ||
unsigned round_up_to_power_of_two(unsigned num) { | ||
if (num == 0) { | ||
return num; | ||
} | ||
// If num is already a power of two | ||
if ((num & (num - 1)) == 0) { | ||
return num; | ||
} | ||
// not a power of two | ||
unsigned count = 0; | ||
while (num) { | ||
num >>= 1; | ||
count++; | ||
} | ||
return 1 << count; | ||
} | ||
} // namespace | ||
|
||
AddressBitset::AddressBitset(AddressBitset &&other) noexcept { | ||
move_from(other); | ||
} | ||
|
||
AddressBitset &AddressBitset::operator=(AddressBitset &&other) noexcept { | ||
if (this != &other) { | ||
move_from(other); | ||
} | ||
return *this; | ||
} | ||
|
||
void AddressBitset::move_from(AddressBitset &other) noexcept { | ||
_lower_bits_ignored = other._lower_bits_ignored; | ||
_bitset_size = other._bitset_size; | ||
_k_nb_words = other._k_nb_words; | ||
_nb_bits_mask = other._nb_bits_mask; | ||
_address_bitset = std::move(other._address_bitset); | ||
_nb_addresses.store(other._nb_addresses.load()); | ||
|
||
// Reset the state of 'other' | ||
other._bitset_size = 0; | ||
other._k_nb_words = 0; | ||
other._nb_bits_mask = 0; | ||
other._nb_addresses = 0; | ||
} | ||
|
||
void AddressBitset::init(unsigned bitset_size) { | ||
// Due to memory alignment, on 64 bits we can assume that the first 4 | ||
// bits can be ignored | ||
_lower_bits_ignored = _k_max_bits_ignored; | ||
if (_address_bitset) { | ||
_address_bitset.reset(); | ||
} | ||
_bitset_size = round_up_to_power_of_two(bitset_size); | ||
_k_nb_words = (_bitset_size) / (_nb_bits_per_word); | ||
if (_bitset_size) { | ||
_nb_bits_mask = _bitset_size - 1; | ||
_address_bitset = std::make_unique<std::atomic<uint64_t>[]>(_k_nb_words); | ||
} | ||
} | ||
|
||
bool AddressBitset::add(uintptr_t addr) { | ||
const uint32_t significant_bits = hash_significant_bits(addr); | ||
// As per nsavoire's comment, it is better to use separate operators | ||
// than to use the div instruction which generates an extra function call | ||
// Also, the usage of a power of two value allows for bit operations | ||
const unsigned index_array = significant_bits / _nb_bits_per_word; | ||
const unsigned bit_offset = significant_bits % _nb_bits_per_word; | ||
const Word_t bit_in_element = (1UL << bit_offset); | ||
// there is a possible race between checking the value | ||
// and setting it | ||
if (!(_address_bitset[index_array].fetch_or(bit_in_element) & | ||
bit_in_element)) { | ||
// check that the element was not already set | ||
++_nb_addresses; | ||
return true; | ||
} | ||
// Collision, element was already set | ||
return false; | ||
} | ||
|
||
bool AddressBitset::remove(uintptr_t addr) { | ||
const int significant_bits = hash_significant_bits(addr); | ||
const unsigned index_array = significant_bits / _nb_bits_per_word; | ||
const unsigned bit_offset = significant_bits % _nb_bits_per_word; | ||
const Word_t bit_in_element = (1UL << bit_offset); | ||
if (_address_bitset[index_array].fetch_and(~bit_in_element) & | ||
bit_in_element) { | ||
_nb_addresses.fetch_sub(1, std::memory_order_relaxed); | ||
// in the unlikely event of a clear right at the wrong time, we could | ||
// have a negative number of elements (though count desyncs are acceptable) | ||
return true; | ||
} | ||
return false; | ||
} | ||
|
||
void AddressBitset::clear() { | ||
for (unsigned i = 0; i < _k_nb_words; ++i) { | ||
const Word_t original_value = _address_bitset[i].exchange(0); | ||
// Count number of set bits in original_value | ||
const int num_set_bits = std::popcount(original_value); | ||
if (num_set_bits > 0) { | ||
_nb_addresses.fetch_sub(num_set_bits, std::memory_order_relaxed); | ||
} | ||
} | ||
} | ||
|
||
} // namespace ddprof |
Oops, something went wrong.