From c4ee9603625a63dd8381b3d5656fa1aec44349d7 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Thu, 3 Feb 2022 13:26:56 -0800 Subject: [PATCH 1/3] clear bg error later --- db/db_impl/db_impl.cc | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 196b428a307..a7b44c61256 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -400,14 +400,6 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { JobContext job_context(0); FindObsoleteFiles(&job_context, true); - if (s.ok()) { - s = error_handler_.ClearBGError(); - } else { - // NOTE: this is needed to pass ASSERT_STATUS_CHECKED - // in the DBSSTTest.DBWithMaxSpaceAllowedRandomized test. - // See https://github.com/facebook/rocksdb/pull/7715#issuecomment-754947952 - error_handler_.GetRecoveryError().PermitUncheckedError(); - } mutex_.Unlock(); job_context.manifest_file_number = 1; @@ -422,16 +414,31 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { // during previous error handling. if (file_deletion_disabled) { // Always return ok - s = EnableFileDeletions(/*force=*/true); - if (!s.ok()) { + Status enable_file_deletion_s = EnableFileDeletions(/*force=*/true); + if (!enable_file_deletion_s.ok()) { ROCKS_LOG_INFO( immutable_db_options_.info_log, "DB resume requested but could not enable file deletions [%s]", - s.ToString().c_str()); + enable_file_deletion_s.ToString().c_str()); } } + } + + mutex_.Lock(); + if (s.ok()) { + s = error_handler_.ClearBGError(); + } else { + // NOTE: this is needed to pass ASSERT_STATUS_CHECKED + // in the DBSSTTest.DBWithMaxSpaceAllowedRandomized test. + // See https://github.com/facebook/rocksdb/pull/7715#issuecomment-754947952 + error_handler_.GetRecoveryError().PermitUncheckedError(); + } + mutex_.Unlock(); + + if (s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB"); } + mutex_.Lock(); // Check for shutdown again before scheduling further compactions, // since we released and re-acquired the lock above From ea4cc0352d7278553c6724f94aaeca37156aee46 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Thu, 24 Feb 2022 14:25:30 -0800 Subject: [PATCH 2/3] Address feedback: hold lock longer, more comment, add to HISTORY --- HISTORY.md | 3 +++ db/db_impl/db_impl.cc | 15 ++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index eb138935b3d..c823bd7d454 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,9 @@ ### New Features * Allow WriteBatchWithIndex to index a WriteBatch that includes keys with user-defined timestamps. The index itself does not have timestamp. +### Bug Fixes +* Fixed a data race on `VersionSet` in `DBImpl::ResumeImpl()` + ## 7.0.0 (02/20/2022) ### Bug Fixes * Fixed a major bug in which batched MultiGet could return old values for keys deleted by DeleteRange when memtable Bloom filter is enabled (memtable_prefix_bloom_size_ratio > 0). (The fix includes a substantial MultiGet performance improvement in the unusual case of both memtable_whole_key_filtering and prefix_extractor.) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index a7b44c61256..32f79fcc86c 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -414,18 +414,22 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { // during previous error handling. if (file_deletion_disabled) { // Always return ok - Status enable_file_deletion_s = EnableFileDeletions(/*force=*/true); - if (!enable_file_deletion_s.ok()) { + s = EnableFileDeletions(/*force=*/true); + assert(s.ok()); + if (!s.ok()) { ROCKS_LOG_INFO( immutable_db_options_.info_log, "DB resume requested but could not enable file deletions [%s]", - enable_file_deletion_s.ToString().c_str()); + s.ToString().c_str()); } } } mutex_.Lock(); if (s.ok()) { + // This will notify and unblock threads waiting for error recovery to + // finish. Those previouly waiting threads can now proceed, which may + // include closing the db. s = error_handler_.ClearBGError(); } else { // NOTE: this is needed to pass ASSERT_STATUS_CHECKED @@ -433,13 +437,14 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { // See https://github.com/facebook/rocksdb/pull/7715#issuecomment-754947952 error_handler_.GetRecoveryError().PermitUncheckedError(); } - mutex_.Unlock(); if (s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB"); + } else { + ROCKS_LOG_INFO(immutable_db_options_.info_log, "Failed to resume DB [%s]", + s.ToString().c_str()); } - mutex_.Lock(); // Check for shutdown again before scheduling further compactions, // since we released and re-acquired the lock above if (shutdown_initiated_) { From b1b9977e16d331ba063ad5edf846c13d3d2b62d5 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Fri, 25 Feb 2022 10:14:48 -0800 Subject: [PATCH 3/3] Address feedback: history, error logging --- HISTORY.md | 2 +- db/db_impl/db_impl.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index c823bd7d454..f1e27e64719 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,7 +4,7 @@ * Allow WriteBatchWithIndex to index a WriteBatch that includes keys with user-defined timestamps. The index itself does not have timestamp. ### Bug Fixes -* Fixed a data race on `VersionSet` in `DBImpl::ResumeImpl()` +* * Fixed a data race on `versions_` between `DBImpl::ResumeImpl()` and threads waiting for recovery to complete (#9496) ## 7.0.0 (02/20/2022) ### Bug Fixes diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 32f79fcc86c..0f00f33d043 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -415,12 +415,12 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { if (file_deletion_disabled) { // Always return ok s = EnableFileDeletions(/*force=*/true); - assert(s.ok()); if (!s.ok()) { ROCKS_LOG_INFO( immutable_db_options_.info_log, "DB resume requested but could not enable file deletions [%s]", s.ToString().c_str()); + assert(false); } } }