Skip to content

Commit

Permalink
Add more debug print to DBTestWithParam.ThreadStatusSingleCompaction (
Browse files Browse the repository at this point in the history
#12661)

Summary:
This test is flaky and a recent failure prints the following:
```
[ RUN      ] DBTestWithParam/DBTestWithParam.ThreadStatusSingleCompaction/0
thread id: 1842811, thread status:
thread id: 1842803, thread status:
db/db_test.cc:4697: Failure
Expected equality of these values:
  op_count
    Which is: 0
  expected_count
    Which is: 1
[  FAILED  ] DBTestWithParam/DBTestWithParam.ThreadStatusSingleCompaction/0, where GetParam() = (1, false) (307 ms)
```
Empty thread status implies that operation_type of the threads are all OP_UNKNOWN. From https://github.com/facebook/rocksdb/blob/3ed46e0668f840bea490e29beeac7777c50ae8fb/monitoring/thread_status_updater.cc#L197, this can be due to thread_data->operation_type being OP_UNKNOWN or that thread_data->cf_key it not in `cf_info_map_`, potentially due to how cf_key_ is accessed with relaxed memory order. This PR adds some debug print to print the cf_name to check this.

This PR also prints num_running_compaction and lsm state to check if a compaction is indeed running, and removes some not needed options and ensures that exactly 4 L0 files are created.

Pull Request resolved: #12661

Test Plan:
- Cannot repro the failure locally: `gtest-parallel --repeat=10000 --workers=200 ./db_test --gtest_filter="*ThreadStatusSingleCompaction*"`
- New failure message will look like:
```
[ RUN      ] DBTestWithParam/DBTestWithParam.ThreadStatusSingleCompaction/0
op_count: 1, expected_count 2
thread id: 6104100864, thread status: , cf_name
thread id: 6103527424, thread status: Compaction, cf_name default
running compaction: 1 lsm state: 4
db/db_test.cc:4885: Failure
Value of: match
  Actual: false
Expected: true
[  FAILED  ] DBTestWithParam/DBTestWithParam.ThreadStatusSingleCompaction/0, where GetParam() = (1, false) (115 ms)
```

Reviewed By: hx235

Differential Revision: D57422755

Pulled By: cbi42

fbshipit-source-id: 635663f26052b20e485dfa06a7c0f1f318ac1099
  • Loading branch information
cbi42 authored and facebook-github-bot committed May 17, 2024
1 parent 131c8cc commit ffd7930
Showing 1 changed file with 22 additions and 17 deletions.
39 changes: 22 additions & 17 deletions db/db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4677,24 +4677,27 @@ TEST_F(DBTest, DynamicMemtableOptions) {

#ifdef ROCKSDB_USING_THREAD_STATUS
namespace {
void VerifyOperationCount(Env* env, ThreadStatus::OperationType op_type,
bool VerifyOperationCount(Env* env, ThreadStatus::OperationType op_type,
int expected_count) {
int op_count = 0;
std::vector<ThreadStatus> thread_list;
ASSERT_OK(env->GetThreadList(&thread_list));
EXPECT_OK(env->GetThreadList(&thread_list));
for (const auto& thread : thread_list) {
if (thread.operation_type == op_type) {
op_count++;
}
}
if (op_count != expected_count) {
fprintf(stderr, "op_count: %d, expected_count %d\n", op_count,
expected_count);
for (const auto& thread : thread_list) {
fprintf(stderr, "thread id: %" PRIu64 ", thread status: %s\n",
fprintf(stderr, "thread id: %" PRIu64 ", thread status: %s, cf_name %s\n",
thread.thread_id,
thread.GetOperationName(thread.operation_type).c_str());
thread.GetOperationName(thread.operation_type).c_str(),
thread.cf_name.c_str());
}
}
ASSERT_EQ(op_count, expected_count);
return op_count == expected_count;
}
} // anonymous namespace

Expand Down Expand Up @@ -4794,11 +4797,11 @@ TEST_F(DBTest, ThreadStatusFlush) {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

CreateAndReopenWithCF({"pikachu"}, options);
VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
ASSERT_TRUE(VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0));

ASSERT_OK(Put(1, "foo", "v1"));
ASSERT_EQ("v1", Get(1, "foo"));
VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
ASSERT_TRUE(VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0));

uint64_t num_running_flushes = 0;
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningFlushes,
Expand All @@ -4811,7 +4814,7 @@ TEST_F(DBTest, ThreadStatusFlush) {
// The first sync point is to make sure there's one flush job
// running when we perform VerifyOperationCount().
TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1");
VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1);
ASSERT_TRUE(VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1));
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningFlushes,
&num_running_flushes));
ASSERT_EQ(num_running_flushes, 1);
Expand All @@ -4822,17 +4825,11 @@ TEST_F(DBTest, ThreadStatusFlush) {
}

TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) {
const int kTestKeySize = 16;
const int kTestValueSize = 984;
const int kEntrySize = kTestKeySize + kTestValueSize;
const int kEntriesPerBuffer = 100;
Options options;
options.create_if_missing = true;
options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
options.compaction_style = kCompactionStyleLevel;
options.target_file_size_base = options.write_buffer_size;
options.max_bytes_for_level_base = options.target_file_size_base * 2;
options.max_bytes_for_level_multiplier = 2;
options.compression = kNoCompression;
options = CurrentOptions(options);
options.env = env_;
Expand Down Expand Up @@ -4867,18 +4864,26 @@ TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) {
&num_running_compactions));
ASSERT_EQ(num_running_compactions, 0);
TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0");
ASSERT_GE(NumTableFilesAtLevel(0),
ASSERT_EQ(NumTableFilesAtLevel(0),
options.level0_file_num_compaction_trigger);

// This makes sure at least one compaction is running.
TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:1");

if (options.enable_thread_tracking) {
// expecting one single L0 to L1 compaction
VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 1);
// This test is flaky and fails here.
bool match = VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 1);
if (!match) {
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
&num_running_compactions));
fprintf(stderr, "running compaction: %" PRIu64 " lsm state: %s\n",
num_running_compactions, FilesPerLevel().c_str());
}
ASSERT_TRUE(match);
} else {
// If thread tracking is not enabled, compaction count should be 0.
VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0);
ASSERT_TRUE(VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0));
}
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
&num_running_compactions));
Expand Down

0 comments on commit ffd7930

Please sign in to comment.