Skip to content

Commit

Permalink
[fix](memory) fix mem tracker grace exit 2 (apache#22003)
Browse files Browse the repository at this point in the history
fix: apache#21136
mem tracker group uses class static variables instead of global variables

https://stackoverflow.com/questions/2204608/does-c-call-destructors-for-global-and-class-static-variables
TODO: A mem tracker manager is required, don't use global variables, it will sad

==3623982==ERROR: AddressSanitizer: heap-use-after-free on address 0x60f0000056b8 at pc 0x56478bbe3ae0 bp 0x7f20953d2270 sp 0x7f20953d2268
READ of size 8 at 0x60f0000056b8 thread T41 (memory_tracker_)
*** Query id: 0-0 ***
*** Aborted at 1689749969 (unix time) try "date -d @1689749969" if you are using GNU date ***
*** Current BE git commitID: b3e9cad ***
*** SIGSEGV address not mapped to object (@0x0) received by PID 3623982 (TID 3624277 OR 0x7f19e06dd640) from PID 0; stack trace: ***
    #0 0x56478bbe3adf in std::__shared_ptr::operator bool() const /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:1295:16
    #1 0x56478bbe306e in doris::MemTracker::refresh_profile_counter() /doris/be/src/runtime/memory/mem_tracker.h:149:13
    #2 0x56478bbec669 in doris::MemTrackerLimiter::refresh_all_tracker_profile() /doris/be/src/runtime/memory/mem_tracker_limiter.cpp:119:22
    apache#3 0x564788f53fa0 in doris::Daemon::memory_tracker_profile_refresh_thread() /doris/be/src/common/daemon.cpp:295:9
    apache#4 0x564788f5d04b in doris::Daemon::start()::$_4::operator()() const /doris/be/src/common/daemon.cpp:473:30
    apache#5 0x564788f5cff6 in void std::__invoke_impl(std::__invoke_other, doris::Daemon::start()::$_4&) /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14
    apache#6 0x564788f5cf78 in std::enable_if, void>::type std::__invoke_r(doris::Daemon::start()::$_4&) /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111:2
    apache#7 0x564788f5cdae in std::_Function_handler::_M_invoke(std::_Any_data const&) /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9
    apache#8 0x56478903f576 in std::function::operator()() const /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560:9
    apache#9 0x56478c4a35af in doris::Thread::supervise_thread(void*) /doris/be/src/util/thread.cpp:465:5
    apache#10 0x7f217c8a244f in start_thread nptl/pthread_create.c:473:8
    apache#11 0x7f217cb27d52 in __clone misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:95

0x60f0000056b8 is located 56 bytes inside of 168-byte region [0x60f000005680,0x60f000005728)
freed by thread T0 here:
    #0 0x564788e7280d in operator delete(void*) (/mnt/hdd01/dorisTestEnv/NEREIDS_ASAN/be/lib/doris_be+0x1758280d) (BuildId: 219493cc924323ee)
    #1 0x56478acec1d5 in std::default_delete::operator()(doris::MemTrackerLimiter*) const /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:85:2
    #2 0x56478ace9faf in std::unique_ptr >::~unique_ptr() /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:361:4
    apache#3 0x56478ace1471 in doris::ShardedLRUCache::~ShardedLRUCache() /doris/be/src/olap/lru_cache.cpp:581:1
    apache#4 0x56478ace14c8 in doris::ShardedLRUCache::~ShardedLRUCache() /doris/be/src/olap/lru_cache.cpp:572:37
    apache#5 0x56478acd0984 in std::default_delete::operator()(doris::Cache*) const /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:85:2
    apache#6 0x56478acceddf in std::unique_ptr >::~unique_ptr() /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:361:4
    apache#7 0x56478ad96dc6 in doris::StoragePageCache::~StoragePageCache() /doris/be/src/olap/page_cache.h:78:7
    apache#8 0x7f217ca54146 in __run_exit_handlers stdlib/exit.c:108:8

previously allocated by thread T0 here:
    #0 0x564788e71fad in operator new(unsigned long) (/mnt/hdd01/dorisTestEnv/NEREIDS_ASAN/be/lib/doris_be+0x17581fad) (BuildId: 219493cc924323ee)
    #1 0x56478ace9c90 in std::_MakeUniq::__single_object std::make_unique, std::allocator > const&>(doris::MemTrackerLimiter::Type&&, std::__cxx11::basic_string, std::allocator > const&) /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:962:30
    #2 0x56478acde930 in doris::ShardedLRUCache::ShardedLRUCache(std::__cxx11::basic_string, std::allocator > const&, unsigned long, doris::LRUCacheType, unsigned int, unsigned int) /doris/be/src/olap/lru_cache.cpp:526:20
    apache#3 0x56478ace22e1 in doris::new_lru_cache(std::__cxx11::basic_string, std::allocator > const&, unsigned long, doris::LRUCacheType, unsigned int) /doris/be/src/olap/lru_cache.cpp:670:16
    apache#4 0x56478ad91da2 in doris::StoragePageCache::StoragePageCache(unsigned long, int, long, unsigned int) /doris/be/src/olap/page_cache.cpp:47:17
    apache#5 0x56478ad9156e in doris::StoragePageCache::create_global_cache(unsigned long, int, long, unsigned int) /doris/be/src/olap/page_cache.cpp:31:29
    apache#6 0x56478b98b3d3 in doris::ExecEnv::_init_mem_env() /doris/be/src/runtime/exec_env_init.cpp:251:5
    apache#7 0x56478b98946c in doris::ExecEnv::_init(std::vector > const&) /doris/be/src/runtime/exec_env_init.cpp:182:5
    apache#8 0x56478b987139 in doris::ExecEnv::init(doris::ExecEnv*, std::vector > const&) /doris/be/src/runtime/exec_env_init.cpp:98:17
    apache#9 0x564788e79b50 in main /doris/be/src/service/doris_main.cpp:429:5
    apache#10 0x7f217ca38564 in __libc_start_main csu/../csu/libc-start.c:332:16

Thread T41 (memory_tracker_) created by T0 here:
    #0 0x564788e1fcaa in pthread_create (/mnt/hdd01/dorisTestEnv/NEREIDS_ASAN/be/lib/doris_be+0x1752fcaa) (BuildId: 219493cc924323ee)
    #1 0x56478c4a2366 in doris::Thread::start_thread(std::__cxx11::basic_string, std::allocator > const&, std::__cxx11::basic_string, std::allocator > const&, std::function const&, unsigned long, scoped_refptr*) /doris/be/src/util/thread.cpp:419:15
    #2 0x564788f59b91 in doris::Status doris::Thread::create(std::__cxx11::basic_string, std::allocator > const&, std::__cxx11::basic_string, std::allocator > const&, doris::Daemon::start()::$_4 const&, scoped_refptr*) /doris/be/src/util/thread.h:50:16
    apache#3 0x564788f58165 in doris::Daemon::start() /doris/be/src/common/daemon.cpp:471:10
    apache#4 0x564788e79a96 in main /doris/be/src/service/doris_main.cpp:420:12
    apache#5 0x7f217ca38564 in __libc_start_main csu/../csu/libc-start.c:332:16
  • Loading branch information
xinyiZzz authored and xiaokang committed Jul 20, 2023
1 parent d6bc088 commit f9166b7
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 18 deletions.
10 changes: 2 additions & 8 deletions be/src/runtime/memory/mem_tracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,16 @@

#include <mutex>

#include "common/daemon.h"
#include "runtime/memory/mem_tracker_limiter.h"
#include "runtime/thread_context.h"

namespace doris {

struct TrackerGroup {
std::list<MemTracker*> trackers;
std::mutex group_lock;
};

// Save all MemTrackers in use to maintain the weak relationship between MemTracker and MemTrackerLimiter.
// When MemTrackerLimiter prints statistics, all MemTracker statistics with weak relationship will be printed together.
// Each group corresponds to several MemTrackerLimiters and has a lock.
// Multiple groups are used to reduce the impact of locks.
static std::vector<TrackerGroup> mem_tracker_pool(1000);
std::vector<MemTracker::TrackerGroup> MemTracker::mem_tracker_pool(1000);

MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile, MemTrackerLimiter* parent,
const std::string& profile_counter_name)
Expand Down Expand Up @@ -82,7 +76,7 @@ void MemTracker::bind_parent(MemTrackerLimiter* parent) {
}

MemTracker::~MemTracker() {
if (_parent_group_num != -1 && !k_doris_exit) {
if (_parent_group_num != -1) {
std::lock_guard<std::mutex> l(mem_tracker_pool[_parent_group_num].group_lock);
if (_tracker_group_it != mem_tracker_pool[_parent_group_num].trackers.end()) {
mem_tracker_pool[_parent_group_num].trackers.erase(_tracker_group_it);
Expand Down
7 changes: 7 additions & 0 deletions be/src/runtime/memory/mem_tracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ class MemTracker {
int64_t peak_consumption = 0;
};

struct TrackerGroup {
std::list<MemTracker*> trackers;
std::mutex group_lock;
};

// A counter that keeps track of the current and peak value seen.
// Relaxed ordering, not accurate in real time.
class MemCounter {
Expand Down Expand Up @@ -182,6 +187,8 @@ class MemTracker {
// Use _parent_label to correlate with parent limiter tracker.
std::string _parent_label = "-";

static std::vector<TrackerGroup> mem_tracker_pool;

// Iterator into mem_tracker_pool for this object. Stored to have O(1) remove.
std::list<MemTracker*>::iterator _tracker_group_it;
};
Expand Down
6 changes: 3 additions & 3 deletions be/src/runtime/memory/mem_tracker_limiter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include <queue>
#include <utility>

#include "common/daemon.h"
#include "runtime/exec_env.h"
#include "runtime/fragment_mgr.h"
#include "runtime/load_channel_mgr.h"
Expand All @@ -42,7 +41,8 @@ namespace doris {
// Save all MemTrackerLimiters in use.
// Each group corresponds to several MemTrackerLimiters and has a lock.
// Multiple groups are used to reduce the impact of locks.
static std::vector<TrackerLimiterGroup> mem_tracker_limiter_pool(MEM_TRACKER_GROUP_NUM);
std::vector<MemTrackerLimiter::TrackerLimiterGroup> MemTrackerLimiter::mem_tracker_limiter_pool(
MEM_TRACKER_GROUP_NUM);

std::atomic<bool> MemTrackerLimiter::_enable_print_log_process_usage {true};

Expand Down Expand Up @@ -91,7 +91,7 @@ MemTrackerLimiter::~MemTrackerLimiter() {
// in real time. Merge its consumption into orphan when parent is process, to avoid repetition.
ExecEnv::GetInstance()->orphan_mem_tracker()->consume(_consumption->current_value());
_consumption->set(0);
if (!k_doris_exit) {
{
std::lock_guard<std::mutex> l(mem_tracker_limiter_pool[_group_num].group_lock);
if (_tracker_limiter_group_it != mem_tracker_limiter_pool[_group_num].trackers.end()) {
mem_tracker_limiter_pool[_group_num].trackers.erase(_tracker_limiter_group_it);
Expand Down
14 changes: 7 additions & 7 deletions be/src/runtime/memory/mem_tracker_limiter.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,6 @@ class TaskGroup;
using TaskGroupPtr = std::shared_ptr<TaskGroup>;
} // namespace taskgroup

class MemTrackerLimiter;

struct TrackerLimiterGroup {
std::list<MemTrackerLimiter*> trackers;
std::mutex group_lock;
};

// Track and limit the memory usage of process and query.
// Contains an limit, arranged into a tree structure.
//
Expand All @@ -76,6 +69,11 @@ class MemTrackerLimiter final : public MemTracker {
6 // Experimental memory statistics, usually inaccurate, used for debugging, and expect to add other types in the future.
};

struct TrackerLimiterGroup {
std::list<MemTrackerLimiter*> trackers;
std::mutex group_lock;
};

inline static std::unordered_map<Type, std::shared_ptr<RuntimeProfile::HighWaterMarkCounter>>
TypeMemSum = {{Type::GLOBAL,
std::make_shared<RuntimeProfile::HighWaterMarkCounter>(TUnit::BYTES)},
Expand Down Expand Up @@ -272,6 +270,8 @@ class MemTrackerLimiter final : public MemTracker {
bool _enable_print_log_usage = false;
static std::atomic<bool> _enable_print_log_process_usage;

static std::vector<TrackerLimiterGroup> mem_tracker_limiter_pool;

// Iterator into mem_tracker_limiter_pool for this object. Stored to have O(1) remove.
std::list<MemTrackerLimiter*>::iterator _tracker_limiter_group_it;
};
Expand Down

0 comments on commit f9166b7

Please sign in to comment.