diff --git a/include/lib/address_bitset.hpp b/include/lib/address_bitset.hpp index a83e81136..f9ffae4d8 100644 --- a/include/lib/address_bitset.hpp +++ b/include/lib/address_bitset.hpp @@ -20,22 +20,22 @@ class AddressBitset { // Note: the hashing step might be bad for cache locality. public: // Publish 1 Meg as default - constexpr static unsigned _k_default_max_addresses = 8 * 1024 * 1024; + constexpr static unsigned _k_default_bitset_size = 8 * 1024 * 1024; AddressBitset(unsigned max_addresses = 0) { init(max_addresses); } void init(unsigned max_addresses); // returns true if the element was inserted - bool set(uintptr_t addr); + bool add(uintptr_t addr); // returns true if the element was removed - bool unset(uintptr_t addr); + bool remove(uintptr_t addr); void clear(); - int nb_addresses() const { return _nb_addresses; } + int count() const { return _nb_addresses; } private: static constexpr unsigned _k_max_bits_ignored = 4; unsigned _lower_bits_ignored; // element type - using Elt_t = uint64_t; - constexpr static unsigned _nb_bits_per_elt = sizeof(Elt_t) * 8; + using Word_t = uint64_t; + constexpr static unsigned _nb_bits_per_elt = sizeof(Word_t) * 8; // 1 Meg divided in uint64's size // The probability of collision is proportional to the number of elements // already within the bitset @@ -46,11 +46,13 @@ class AddressBitset { std::unique_ptr[]> _address_bitset; std::atomic _nb_addresses = 0; + static unsigned int count_set_bits(Word_t w); + // This is a kind of hash function // We remove the lower bits (as the alignment constraints makes them useless) // We fold the address // Then we only keep the bits that matter for the order in the bitmap - uint32_t remove_lower_bits(uintptr_t h1) { + uint32_t hash_significant_bits(uintptr_t h1) { uint64_t intermediate = h1 >> _lower_bits_ignored; uint32_t high = (uint32_t)(intermediate >> 32); uint32_t low = (uint32_t)intermediate; diff --git a/include/lib/lib_logger.hpp b/include/lib/lib_logger.hpp index 5e2d8c51f..c27fab22e 100644 --- a/include/lib/lib_logger.hpp +++ b/include/lib/lib_logger.hpp @@ -10,13 +10,24 @@ #include namespace ddprof { -template -void log_once(char const *const format, Args... args) { + #ifdef NDEBUG - static std::once_flag flag; - std::call_once(flag, [&, format]() { fprintf(stderr, format, args...); }); +template +void log_once_helper(std::once_flag &flag, Func &&func) { + std::call_once(flag, std::forward(func)); #else - fprintf(stderr, format, args...); +template void log_once_helper(std::once_flag &, Func &&func) { + func(); #endif } + +// create a once flag for the line and file where this is called: +#define LOG_ONCE(format, ...) \ + do { \ + static std::once_flag UNIQUE_ONCE_FLAG_##__LINE__##__FILE__; \ + ddprof::log_once_helper(UNIQUE_ONCE_FLAG_##__LINE__##__FILE__, [&]() { \ + fprintf(stderr, (format), ##__VA_ARGS__); \ + }); \ + } while (0) + } // namespace ddprof diff --git a/src/lib/address_bitset.cc b/src/lib/address_bitset.cc index e8b8a7401..3f601968a 100644 --- a/src/lib/address_bitset.cc +++ b/src/lib/address_bitset.cc @@ -10,6 +10,25 @@ namespace ddprof { +namespace { +unsigned round_to_power_of_two(unsigned num) { + if (num == 0) { + return num; + } + // If max_addresses is already a power of two + if ((num & (num - 1)) == 0) { + return num; + } + // not a power of two + unsigned count = 0; + while (num) { + num >>= 1; + count++; + } + return 1 << count; +} +} // namespace + void AddressBitset::init(unsigned max_addresses) { // Due to memory alignment, on 64 bits we can assume that the first 4 // bits can be ignored @@ -17,7 +36,7 @@ void AddressBitset::init(unsigned max_addresses) { if (_address_bitset) { _address_bitset.reset(); } - _nb_bits = max_addresses; + _nb_bits = round_to_power_of_two(max_addresses); _k_nb_elements = (_nb_bits) / (_nb_bits_per_elt); if (_nb_bits) { _nb_bits_mask = _nb_bits - 1; @@ -25,14 +44,14 @@ void AddressBitset::init(unsigned max_addresses) { } } -bool AddressBitset::set(uintptr_t addr) { - uint32_t significant_bits = remove_lower_bits(addr); +bool AddressBitset::add(uintptr_t addr) { + uint32_t significant_bits = hash_significant_bits(addr); // As per nsavoire's comment, it is better to use separate operators // than to use the div instruction which generates an extra function call // Also, the usage of a power of two value allows for bit operations - unsigned index_array = significant_bits / 64; - unsigned bit_offset = significant_bits % 64; - uint64_t bit_in_element = (1UL << bit_offset); + unsigned index_array = significant_bits / sizeof(Word_t); + unsigned bit_offset = significant_bits % sizeof(Word_t); + Word_t bit_in_element = (1UL << bit_offset); // there is a possible race between checking the value // and setting it if (!(_address_bitset[index_array].fetch_or(bit_in_element) & @@ -45,26 +64,39 @@ bool AddressBitset::set(uintptr_t addr) { return false; } -bool AddressBitset::unset(uintptr_t addr) { - uint64_t hash_addr = remove_lower_bits(addr); - int significant_bits = hash_addr & _nb_bits_mask; - unsigned index_array = significant_bits / 64; - unsigned bit_offset = significant_bits % 64; - uint64_t bit_in_element = (1UL << bit_offset); +bool AddressBitset::remove(uintptr_t addr) { + int significant_bits = hash_significant_bits(addr); + unsigned index_array = significant_bits / sizeof(Word_t); + unsigned bit_offset = significant_bits % sizeof(Word_t); + Word_t bit_in_element = (1UL << bit_offset); if ((_address_bitset[index_array].fetch_xor(bit_in_element) & - bit_in_element) && - likely(_nb_addresses.load(std::memory_order_relaxed) >= 0)) { - --_nb_addresses; // fetch_add - 1 + bit_in_element)) { + _nb_addresses.fetch_sub(1, std::memory_order_relaxed); + // in the unlikely event of a clear right at the wrong time, we could + // have a negative number of elements (though count desyncs are acceptable) return true; } return false; } +unsigned int AddressBitset::count_set_bits(Word_t w) { + unsigned int count = 0; + while (w) { + count += w & 1; + w >>= 1; + } + return count; +} + void AddressBitset::clear() { for (unsigned i = 0; i < _k_nb_elements; ++i) { - _address_bitset[i].store(0, std::memory_order_relaxed); + Word_t original_value = _address_bitset[i].exchange(0); + // Count number of set bits in original_value + int num_set_bits = count_set_bits(original_value); + if (num_set_bits > 0) { + _nb_addresses.fetch_sub(num_set_bits, std::memory_order_relaxed); + } } - _nb_addresses.store(0); } } // namespace ddprof diff --git a/src/lib/allocation_tracker.cc b/src/lib/allocation_tracker.cc index ee0058e0c..e3c117bf6 100644 --- a/src/lib/allocation_tracker.cc +++ b/src/lib/allocation_tracker.cc @@ -67,7 +67,7 @@ TrackerThreadLocalState *AllocationTracker::init_tl_state() { if (res_set) { // should return 0 - log_once("Error: Unable to store tl_state. error %d \n", res_set); + LOG_ONCE("Error: Unable to store tl_state. error %d \n", res_set); delete tl_state; tl_state = nullptr; } @@ -169,7 +169,7 @@ void AllocationTracker::allocation_tracking_free() { } TrackerThreadLocalState *tl_state = get_tl_state(); if (unlikely(!tl_state)) { - log_once("Error: Unable to find tl_state during %s\n", __FUNCTION__); + LOG_ONCE("Error: Unable to find tl_state during %s\n", __FUNCTION__); instance->free(); return; } @@ -234,8 +234,8 @@ void AllocationTracker::track_allocation(uintptr_t addr, size_t size, uint64_t total_size = nsamples * sampling_interval; if (_state.track_deallocations) { - if (_allocated_address_set.set(addr)) { - if (unlikely(_allocated_address_set.nb_addresses() > + if (_allocated_address_set.add(addr)) { + if (unlikely(_allocated_address_set.count() > ddprof::liveallocation::kMaxTracked)) { // Check if we reached max number of elements // Clear elements if we reach too many @@ -243,9 +243,9 @@ void AllocationTracker::track_allocation(uintptr_t addr, size_t size, if (IsDDResOK(push_clear_live_allocation(tl_state))) { _allocated_address_set.clear(); // still set this as we are pushing the allocation to ddprof - _allocated_address_set.set(addr); + _allocated_address_set.add(addr); } else { - log_once( + LOG_ONCE( "Error: %s", "Stop allocation profiling. Unable to clear live allocation \n"); free(); @@ -259,7 +259,7 @@ void AllocationTracker::track_allocation(uintptr_t addr, size_t size, bool success = IsDDResOK(push_alloc_sample(addr, total_size, tl_state)); free_on_consecutive_failures(success); if (unlikely(!success) && _state.track_deallocations) { - _allocated_address_set.unset(addr); + _allocated_address_set.remove(addr); } } @@ -273,7 +273,7 @@ void AllocationTracker::track_deallocation(uintptr_t addr, return; } - if (!_state.track_deallocations || !_allocated_address_set.unset(addr)) { + if (!_state.track_deallocations || !_allocated_address_set.remove(addr)) { return; } @@ -502,7 +502,7 @@ void AllocationTracker::notify_thread_start() { if (unlikely(!tl_state)) { tl_state = init_tl_state(); if (!tl_state) { - log_once("Error: Unable to start allocation profiling on thread %d", + LOG_ONCE("Error: Unable to start allocation profiling on thread %d", ddprof::gettid()); return; } @@ -522,7 +522,7 @@ void AllocationTracker::notify_fork() { if (unlikely(!tl_state)) { // The state should already exist if we forked. // This would mean that we were not able to create the state before forking - log_once("Error: Unable to retrieve tl state after fork thread %d", + LOG_ONCE("Error: Unable to retrieve tl state after fork thread %d", ddprof::gettid()); return; } else { diff --git a/src/lib/dd_profiling.cc b/src/lib/dd_profiling.cc index b0be341f0..fd42c8892 100644 --- a/src/lib/dd_profiling.cc +++ b/src/lib/dd_profiling.cc @@ -272,8 +272,7 @@ int ddprof_start_profiling_internal() { // fails ? g_state.allocation_profiling_started = true; } else { - ddprof::log_once("Error: %s", - "Failure to start allocation profiling\n"); + LOG_ONCE("Error: %s", "Failure to start allocation profiling\n"); } } } catch (const ddprof::DDException &e) { return -1; } @@ -281,7 +280,7 @@ int ddprof_start_profiling_internal() { if (g_state.allocation_profiling_started) { int res = pthread_atfork(nullptr, nullptr, notify_fork); if (res) { - ddprof::log_once("Error:%s", "Unable to setup notify fork"); + LOG_ONCE("Error:%s", "Unable to setup notify fork"); assert(0); } } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5dd0d3986..656bf4cc6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -355,6 +355,8 @@ add_unit_test(pthread_tls-ut pthread_tls-ut.cc) add_unit_test(address_bitset-ut address_bitset-ut.cc ../src/lib/address_bitset.cc) +add_unit_test(lib_logger-ut ./lib_logger-ut.cc) + add_benchmark(savecontext-bench savecontext-bench.cc ../src/lib/pthread_fixes.cc ../src/lib/savecontext.cc ../src/lib/saveregisters.cc) diff --git a/test/address_bitset-ut.cc b/test/address_bitset-ut.cc index 9320b8820..59c872fb1 100644 --- a/test/address_bitset-ut.cc +++ b/test/address_bitset-ut.cc @@ -14,14 +14,14 @@ namespace ddprof { TEST(address_bitset, simple) { - AddressBitset address_bitset(AddressBitset::_k_default_max_addresses); - EXPECT_TRUE(address_bitset.set(0xbadbeef)); - EXPECT_FALSE(address_bitset.set(0xbadbeef)); - EXPECT_TRUE(address_bitset.unset(0xbadbeef)); + AddressBitset address_bitset(AddressBitset::_k_default_bitset_size); + EXPECT_TRUE(address_bitset.add(0xbadbeef)); + EXPECT_FALSE(address_bitset.add(0xbadbeef)); + EXPECT_TRUE(address_bitset.remove(0xbadbeef)); } TEST(address_bitset, many_addresses) { - AddressBitset address_bitset(AddressBitset::_k_default_max_addresses); + AddressBitset address_bitset(AddressBitset::_k_default_bitset_size); std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution dis( @@ -31,15 +31,15 @@ TEST(address_bitset, many_addresses) { unsigned nb_elements = 100000; for (unsigned i = 0; i < nb_elements; ++i) { uintptr_t addr = dis(gen); - if (address_bitset.set(addr)) { + if (address_bitset.add(addr)) { addresses.push_back(addr); } } EXPECT_TRUE(nb_elements - (nb_elements / 10) < addresses.size()); for (auto addr : addresses) { - EXPECT_TRUE(address_bitset.unset(addr)); + EXPECT_TRUE(address_bitset.remove(addr)); } - EXPECT_EQ(0, address_bitset.nb_addresses()); + EXPECT_EQ(0, address_bitset.count()); } // This test to tune the hash approach diff --git a/test/lib_logger-ut.cc b/test/lib_logger-ut.cc new file mode 100644 index 000000000..cfcf5f598 --- /dev/null +++ b/test/lib_logger-ut.cc @@ -0,0 +1,22 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include + +#include "lib/lib_logger.hpp" + +namespace ddprof { + +void foo_log(const std::string &str) { LOG_ONCE("%s\n", str.c_str()); } + +TEST(LibLogger, simple) { + LOG_ONCE("something\n"); + LOG_ONCE("else\n"); + foo_log("one"); // this shows + foo_log("two"); // this does not show + const char *some_string = "some_string"; + LOG_ONCE("Some string = %s\n", some_string); +} +} // namespace ddprof diff --git a/test/reentry_guard-ut.cc b/test/reentry_guard-ut.cc index 4b733581e..1c0f6f0aa 100644 --- a/test/reentry_guard-ut.cc +++ b/test/reentry_guard-ut.cc @@ -26,8 +26,8 @@ TEST(ReentryGuardTest, null_init) { guard.register_guard(&reentry_guard); EXPECT_TRUE(guard); { - ddprof::ReentryGuard guard(&reentry_guard); - EXPECT_FALSE(guard); + ddprof::ReentryGuard guard2(&reentry_guard); + EXPECT_FALSE(guard2); } } }