Skip to content

Commit

Permalink
Add ticker stats for read corruption retries (#12923)
Browse files Browse the repository at this point in the history
Summary:
Add a couple of ticker stats for corruption retry count and successful retries. This PR also eliminates an extra read attempt when there's a checksum mismatch in a block read from the prefetch buffer.

Pull Request resolved: #12923

Test Plan: Update existing tests

Reviewed By: jowlyzhang

Differential Revision: D61024687

Pulled By: anand1976

fbshipit-source-id: 3a08403580ab244000e0d480b7ee0f5a03d76b06
  • Loading branch information
anand1976 committed Aug 13, 2024
1 parent dec94ec commit c062d4e
Show file tree
Hide file tree
Showing 10 changed files with 81 additions and 2 deletions.
6 changes: 6 additions & 0 deletions db/db_impl/db_impl_open.cc
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,12 @@ Status DBImpl::Recover(
/*no_error_if_files_missing=*/false, is_retry,
&desc_status);
desc_status.PermitUncheckedError();
if (is_retry) {
RecordTick(stats_, FILE_READ_CORRUPTION_RETRY_COUNT);
if (desc_status.ok()) {
RecordTick(stats_, FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT);
}
}
if (can_retry) {
// If we're opening for the first time and the failure is likely due to
// a corrupt MANIFEST file (could result in either the log::Reader
Expand Down
27 changes: 27 additions & 0 deletions db/db_io_failure_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ class DBIOCorruptionTest
DBIOCorruptionTest() : DBIOFailureTest() {
BlockBasedTableOptions bbto;
options_ = CurrentOptions();
options_.statistics = CreateDBStatistics();

base_env_ = env_;
EXPECT_NE(base_env_, nullptr);
Expand All @@ -727,6 +728,8 @@ class DBIOCorruptionTest

Status ReopenDB() { return TryReopen(options_); }

Statistics* stats() { return options_.statistics.get(); }

protected:
std::unique_ptr<Env> env_guard_;
std::shared_ptr<CorruptionFS> fs_;
Expand All @@ -749,8 +752,12 @@ TEST_P(DBIOCorruptionTest, GetReadCorruptionRetry) {
if (std::get<2>(GetParam())) {
ASSERT_OK(s);
ASSERT_EQ(val, "val1");
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);
} else {
ASSERT_TRUE(s.IsCorruption());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
}
}

Expand All @@ -773,8 +780,12 @@ TEST_P(DBIOCorruptionTest, IterReadCorruptionRetry) {
}
if (std::get<2>(GetParam())) {
ASSERT_OK(iter->status());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);
} else {
ASSERT_TRUE(iter->status().IsCorruption());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
}
delete iter;
}
Expand All @@ -799,9 +810,13 @@ TEST_P(DBIOCorruptionTest, MultiGetReadCorruptionRetry) {
if (std::get<2>(GetParam())) {
ASSERT_EQ(values[0].ToString(), "val1");
ASSERT_EQ(values[1].ToString(), "val2");
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);
} else {
ASSERT_TRUE(statuses[0].IsCorruption());
ASSERT_TRUE(statuses[1].IsCorruption());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
}
}

Expand All @@ -818,6 +833,9 @@ TEST_P(DBIOCorruptionTest, CompactionReadCorruptionRetry) {
Status s = dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
if (std::get<2>(GetParam())) {
ASSERT_OK(s);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);

std::string val;
ReadOptions ro;
Expand All @@ -826,6 +844,7 @@ TEST_P(DBIOCorruptionTest, CompactionReadCorruptionRetry) {
ASSERT_EQ(val, "val1");
} else {
ASSERT_TRUE(s.IsCorruption());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
}
}

Expand All @@ -838,6 +857,9 @@ TEST_P(DBIOCorruptionTest, FlushReadCorruptionRetry) {
Status s = Flush();
if (std::get<2>(GetParam())) {
ASSERT_OK(s);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);

std::string val;
ReadOptions ro;
Expand All @@ -846,6 +868,7 @@ TEST_P(DBIOCorruptionTest, FlushReadCorruptionRetry) {
ASSERT_EQ(val, "val1");
} else {
ASSERT_NOK(s);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
}
}

Expand All @@ -862,8 +885,12 @@ TEST_P(DBIOCorruptionTest, ManifestCorruptionRetry) {

if (std::get<2>(GetParam())) {
ASSERT_OK(ReopenDB());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);
} else {
ASSERT_EQ(ReopenDB(), Status::Corruption());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
}
SyncPoint::GetInstance()->DisableProcessing();
}
Expand Down
5 changes: 5 additions & 0 deletions include/rocksdb/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,11 @@ enum Tickers : uint32_t {
// Footer corruption detected when opening an SST file for reading
SST_FOOTER_CORRUPTION_COUNT,

// Counters for file read retries with the verify_and_reconstruct_read
// file system option after detecting a checksum mismatch
FILE_READ_CORRUPTION_RETRY_COUNT,
FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT,

TICKER_ENUM_MAX
};

Expand Down
9 changes: 9 additions & 0 deletions java/rocksjni/portal.h
Original file line number Diff line number Diff line change
Expand Up @@ -5269,6 +5269,10 @@ class TickerTypeJni {
return -0x53;
case ROCKSDB_NAMESPACE::Tickers::SST_FOOTER_CORRUPTION_COUNT:
return -0x55;
case ROCKSDB_NAMESPACE::Tickers::FILE_READ_CORRUPTION_RETRY_COUNT:
return -0x56;
case ROCKSDB_NAMESPACE::Tickers::FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT:
return -0x57;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// -0x54 is the max value at this time. Since these values are exposed
// directly to Java clients, we'll keep the value the same till the next
Expand Down Expand Up @@ -5726,6 +5730,11 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::PREFETCH_HITS;
case -0x55:
return ROCKSDB_NAMESPACE::Tickers::SST_FOOTER_CORRUPTION_COUNT;
case -0x56:
return ROCKSDB_NAMESPACE::Tickers::FILE_READ_CORRUPTION_RETRY_COUNT;
case -0x57:
return ROCKSDB_NAMESPACE::Tickers::
FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT;
case -0x54:
// -0x54 is the max value at this time. Since these values are exposed
// directly to Java clients, we'll keep the value the same till the next
Expand Down
4 changes: 4 additions & 0 deletions java/src/main/java/org/rocksdb/TickerType.java
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,10 @@ public enum TickerType {

SST_FOOTER_CORRUPTION_COUNT((byte) -0x55),

FILE_READ_CORRUPTION_RETRY_COUNT((byte) -0x56),

FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT((byte) -0x57),

TICKER_ENUM_MAX((byte) -0x54);

private final byte value;
Expand Down
4 changes: 4 additions & 0 deletions monitoring/statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{PREFETCH_BYTES_USEFUL, "rocksdb.prefetch.bytes.useful"},
{PREFETCH_HITS, "rocksdb.prefetch.hits"},
{SST_FOOTER_CORRUPTION_COUNT, "rocksdb.footer.corruption.count"},
{FILE_READ_CORRUPTION_RETRY_COUNT,
"rocksdb.file.read.corruption.retry.count"},
{FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT,
"rocksdb.file.read.corruption.retry.success.count"},
};

const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
Expand Down
4 changes: 4 additions & 0 deletions table/block_based/block_based_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,10 @@ Status BlockBasedTable::Open(
s = ReadFooterFromFile(retry_opts, file.get(), *ioptions.fs,
prefetch_buffer.get(), file_size, &footer,
kBlockBasedTableMagicNumber);
RecordTick(ioptions.stats, FILE_READ_CORRUPTION_RETRY_COUNT);
if (s.ok()) {
RecordTick(ioptions.stats, FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT);
}
}
}
if (!s.ok()) {
Expand Down
9 changes: 8 additions & 1 deletion table/block_based/block_based_table_reader_sync_and_async.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,13 +223,16 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
s = VerifyBlockChecksum(footer, data, handle.size(),
rep_->file->file_name(), handle.offset());
RecordTick(ioptions.stats, BLOCK_CHECKSUM_COMPUTE_COUNT);
if (!s.ok()) {
RecordTick(ioptions.stats, BLOCK_CHECKSUM_MISMATCH_COUNT);
}
TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
if (!s.ok() &&
CheckFSFeatureSupport(ioptions.fs.get(),
FSSupportedOps::kVerifyAndReconstructRead)) {
assert(s.IsCorruption());
assert(!ioptions.allow_mmap_reads);
RecordTick(ioptions.stats, BLOCK_CHECKSUM_MISMATCH_COUNT);
RecordTick(ioptions.stats, FILE_READ_CORRUPTION_RETRY_COUNT);

// Repeat the read for this particular block using the regular
// synchronous Read API. We can use the same chunk of memory
Expand All @@ -246,6 +249,10 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
assert(result.size() == BlockSizeWithTrailer(handle));
s = VerifyBlockChecksum(footer, data, handle.size(),
rep_->file->file_name(), handle.offset());
if (s.ok()) {
RecordTick(ioptions.stats,
FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT);
}
} else {
s = io_s;
}
Expand Down
14 changes: 13 additions & 1 deletion table/block_fetcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ inline bool BlockFetcher::TryGetFromPrefetchBuffer() {
if (io_status_.ok()) {
got_from_prefetch_buffer_ = true;
used_buf_ = const_cast<char*>(slice_.data());
} else if (!(io_status_.IsCorruption() && retry_corrupt_read_)) {
} else if (io_status_.IsCorruption()) {
// Returning true apparently indicates we either got some data from
// the prefetch buffer, or we tried and encountered an error.
return true;
}
}
Expand Down Expand Up @@ -334,9 +336,15 @@ void BlockFetcher::ReadBlock(bool retry) {
ProcessTrailerIfPresent();
}

if (retry) {
RecordTick(ioptions_.stats, FILE_READ_CORRUPTION_RETRY_COUNT);
}
if (io_status_.ok()) {
InsertCompressedBlockToPersistentCacheIfNeeded();
fs_buf_ = std::move(read_req.fs_scratch);
if (retry) {
RecordTick(ioptions_.stats, FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT);
}
} else {
ReleaseFileSystemProvidedBuffer(&read_req);
direct_io_buf_.reset();
Expand All @@ -355,7 +363,11 @@ IOStatus BlockFetcher::ReadBlockContents() {
return IOStatus::OK();
}
if (TryGetFromPrefetchBuffer()) {
if (io_status_.IsCorruption() && retry_corrupt_read_) {
ReadBlock(/*retry=*/true);
}
if (!io_status_.ok()) {
assert(!fs_buf_);
return io_status_;
}
} else if (!TryGetSerializedBlockFromPersistentCache()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add ticker stats to count file read retries due to checksum mismatch

0 comments on commit c062d4e

Please sign in to comment.