Skip to content

Commit

Permalink
[#3707] A workaround for encryption counter overflow
Browse files Browse the repository at this point in the history
Summary:
This is a workaround for the encrypted file corruption issue described
at #3707. When reading a
block from an encrypted SSTable and getting a checksum mismatch, we now
try to increment byte 11 of the initialization vector (carrying over
into earlier bytes when encountering 0xff) and decrypt and verify
checksum once again. Similarly, when reading an SSTable footer, we use
magic number comparison instead of checksum verification.  This
workaround is turned on by default and is controlled using the new
`--encryption_counter_overflow_read_path_workaround` flag.

This diff is not fixing the underlying ignored overflow issue yet. While
it would be easy to do so, the old read path would not be able to read
files written with the new write path. Instead, we will deploy this
workaround, disable encryption, perform major compactions on all data,
and then deploy the real fix and re-enable encryption.

Also we restrict the range of the unsigned 32-bit randomly-generated
initial counter value to [0, 0x7fffffff] by default. This reduces the
effective key size by 1 bit but eliminates the overflow issue for all
files up to 32 GiB in size. This range could be customized using the new
flags: `--encryption_counter_min` and `--encryption_counter_max` (both
of these bounds are inclusive).

Also avoid storing a reference to a shared pointer in
TableReaderOptions.  This fixes an ASAN issue in the new test.

Test Plan:
Jenkins
New test, encrypted_sstable-test, that fails without the fix and passes
with the fix.

Reviewers: rahuldesirazu, kannan, sergei, bogdan

Reviewed By: sergei

Subscribers: ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D8020
  • Loading branch information
mbautin authored and rahuldesirazu committed Mar 25, 2020
1 parent 699ca2c commit 68d78d0
Show file tree
Hide file tree
Showing 23 changed files with 709 additions and 126 deletions.
1 change: 0 additions & 1 deletion ent/src/yb/tserver/CMakeLists-include.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,3 @@ set(TSERVER_EXTENSIONS_TESTS
remote_bootstrap_rocksdb_session-test_ent
remote_bootstrap_rocksdb_client-test_ent
PARENT_SCOPE)

210 changes: 143 additions & 67 deletions src/yb/rocksdb/table/format.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,15 @@
#include "yb/rocksdb/util/perf_context_imp.h"
#include "yb/rocksdb/util/xxhash.h"

#include "yb/util/encryption_util.h"
#include "yb/util/encrypted_file.h"
#include "yb/util/format.h"
#include "yb/util/mem_tracker.h"
#include "yb/util/string_util.h"

using yb::Format;
using yb::Result;

namespace rocksdb {

extern const uint64_t kLegacyBlockBasedTableMagicNumber;
Expand All @@ -55,8 +60,6 @@ const uint64_t kPlainTableMagicNumber = 0;
#endif
const uint32_t DefaultStackBufferSize = 5000;

using yb::Format;

void BlockHandle::AppendEncodedTo(std::string* dst) const {
// Sanity check that all fields have been set
DCHECK_NE(offset_, kUint64FieldNotSet);
Expand Down Expand Up @@ -161,9 +164,9 @@ Footer::Footer(uint64_t _table_magic_number, uint32_t _version)
}

Status Footer::DecodeFrom(Slice* input) {
assert(!HasInitializedTableMagicNumber());
assert(input != nullptr);
assert(input->size() >= kMinEncodedLength);
DSCHECK(!HasInitializedTableMagicNumber(), IllegalState, "Decoding into the same footer twice");
DSCHECK(input != nullptr, IllegalState, "input can't be null");
DSCHECK_GE(input->size(), kMinEncodedLength, Corruption, "Footer size is too small");

const char *magic_ptr = input->cend() - kMagicNumberLengthByte;
const uint32_t magic_lo = DecodeFixed32(magic_ptr);
Expand Down Expand Up @@ -234,90 +237,163 @@ std::string Footer::ToString() const {
return result;
}

Status ReadFooterFromFile(RandomAccessFileReader* file, uint64_t file_size,
Footer* footer, uint64_t enforce_table_magic_number) {
Status CheckSSTableFileSize(RandomAccessFileReader* file, uint64_t file_size) {
if (file_size < Footer::kMinEncodedLength) {
return STATUS(Corruption, "file is too short to be an sstable");
return STATUS_FORMAT(Corruption,
"File is too short to be an SSTable: $0",
file->file()->filename());
}
return Status::OK();
}

Status ReadFooterFromFile(
RandomAccessFileReader* file, uint64_t file_size, Footer* footer,
uint64_t enforce_table_magic_number) {
RETURN_NOT_OK(CheckSSTableFileSize(file, file_size));

char footer_space[Footer::kMaxEncodedLength];
Slice footer_input;
size_t read_offset =
(file_size > Footer::kMaxEncodedLength)
? static_cast<size_t>(file_size - Footer::kMaxEncodedLength)
: 0;
Status s = file->Read(read_offset, Footer::kMaxEncodedLength, &footer_input,
footer_space);
if (!s.ok()) return s;

// Check that we actually read the whole footer from the file. It may be
// that size isn't correct.
if (footer_input.size() < Footer::kMinEncodedLength) {
return STATUS(Corruption, "file is too short to be an sstable");
}
const size_t read_size = std::min<size_t>(Footer::kMaxEncodedLength, file_size);
struct FooterValidator : public yb::ReadValidator {
FooterValidator(RandomAccessFileReader* file_,
Footer* footer_,
uint64_t enforce_table_magic_number_)
: file(file_),
footer(footer_),
enforce_table_magic_number(enforce_table_magic_number_) {}

CHECKED_STATUS Validate(const Slice& read_result) const override {
// Check that we actually read the whole footer from the file. It may be that size isn't
// correct.
RETURN_NOT_OK(CheckSSTableFileSize(file, read_result.size()));
Slice mutable_read_result(read_result);
*footer = Footer();
RETURN_NOT_OK(footer->DecodeFrom(&mutable_read_result));
if (enforce_table_magic_number != 0 &&
enforce_table_magic_number != footer->table_magic_number()) {
return STATUS_FORMAT(Corruption, "Bad table magic number: $0, expected: $1",
footer->table_magic_number(),
enforce_table_magic_number);
}
return Status::OK();
}
RandomAccessFileReader* file;
Footer* const footer;
const uint64_t enforce_table_magic_number;
} validator(file, footer, enforce_table_magic_number);

s = footer->DecodeFrom(&footer_input);
if (!s.ok()) {
return s;
}
if (enforce_table_magic_number != 0 &&
enforce_table_magic_number != footer->table_magic_number()) {
return STATUS(Corruption, "Bad table magic number");
}
return Status::OK();
return file->ReadAndValidate(read_offset, read_size, &footer_input, footer_space, validator);
}

// Without anonymous namespace here, we fail the warning -Wmissing-prototypes
namespace {

// Read a block and check its CRC
// contents is the result of reading.
// According to the implementation of file->Read, contents may not point to buf
Status ReadBlock(RandomAccessFileReader* file, const Footer& footer,
const ReadOptions& options, const BlockHandle& handle,
Slice* contents, /* result of reading */ char* buf) {
size_t n = static_cast<size_t>(handle.size());
Status s;
struct ChecksumData {
uint32_t expected;
uint32_t actual;
};

Result<ChecksumData> ComputeChecksum(
RandomAccessFileReader* file,
const Footer& footer,
const BlockHandle& handle,
const Slice& src_data,
uint32_t raw_expected_checksum) {
switch (footer.checksum()) {
case kCRC32c:
return ChecksumData {
.expected = crc32c::Unmask(raw_expected_checksum),
.actual = crc32c::Value(src_data.data(), src_data.size())
};
case kxxHash:
if (src_data.size() > std::numeric_limits<int>::max()) {
return STATUS_FORMAT(
Corruption, "Block too large for xxHash ($0 bytes, but must be $1 or smaller)",
src_data.size(), std::numeric_limits<int>::max());
}
return ChecksumData {
.expected = raw_expected_checksum,
.actual = XXH32(src_data.data(), static_cast<int>(src_data.size()), 0)
};
case kNoChecksum:
return ChecksumData {
.expected = raw_expected_checksum,
.actual = raw_expected_checksum
};
}
return STATUS_FORMAT(
Corruption, "Unknown checksum type in file: $0, block handle: $1",
file->file()->filename(), handle.ToDebugString());
}

Status VerifyBlockChecksum(
RandomAccessFileReader* file,
const Footer& footer,
const BlockHandle& handle,
const char* data,
const size_t block_size) {
PERF_TIMER_GUARD(block_checksum_time);
const uint32_t raw_expected_checksum = DecodeFixed32(data + block_size + 1);
auto checksum = VERIFY_RESULT(
ComputeChecksum(file, footer, handle, Slice(data, block_size + 1), raw_expected_checksum));
if (checksum.actual != checksum.expected) {
return STATUS_FORMAT(
Corruption, "Block checksum mismatch in file: $0, block handle: $1",
file->file()->filename(), handle.ToDebugString());
}
return Status::OK();
}

// Read a block and check its CRC. When this function returns, *contents will contain the result of
// reading.
Status ReadBlock(
RandomAccessFileReader* file, const Footer& footer, const ReadOptions& options,
const BlockHandle& handle, Slice* contents, /* result of reading */ char* buf) {
*contents = Slice(buf, buf);
const size_t expected_read_size = static_cast<size_t>(handle.size()) + kBlockTrailerSize;
Status s;
{
PERF_TIMER_GUARD(block_read_time);
s = file->Read(handle.offset(), n + kBlockTrailerSize, contents, buf);
struct BlockChecksumValidator : public yb::ReadValidator {
BlockChecksumValidator(
RandomAccessFileReader* file_, const Footer& footer_, const ReadOptions& options_,
const BlockHandle& handle_, size_t expected_read_size_)
: file(file_),
footer(footer_),
options(options_),
handle(handle_),
expected_read_size(expected_read_size_) {}

CHECKED_STATUS Validate(const Slice& read_result) const override {
if (read_result.size() != expected_read_size) {
return STATUS_FORMAT(
Corruption, "Truncated block read in file: $0, block handle: $1, expected size: $2",
file->file()->filename(), handle.ToDebugString(), expected_read_size);
}

if (options.verify_checksums) {
return VerifyBlockChecksum(file, footer, handle, read_result.cdata(), handle.size());
}
return Status::OK();
};

RandomAccessFileReader* file;
const Footer& footer;
const ReadOptions& options;
const BlockHandle& handle;
const size_t expected_read_size;
} validator(file, footer, options, handle, expected_read_size);

s = file->ReadAndValidate(handle.offset(), expected_read_size, contents, buf, validator);
}

PERF_COUNTER_ADD(block_read_count, 1);
PERF_COUNTER_ADD(block_read_byte, n + kBlockTrailerSize);
PERF_COUNTER_ADD(block_read_byte, expected_read_size);

if (!s.ok()) {
return s;
}
if (contents->size() != n + kBlockTrailerSize) {
return STATUS(Corruption, "truncated block read");
}

// Check the crc of the type and the block contents
const char* data = contents->cdata(); // Pointer to where Read put the data
if (options.verify_checksums) {
PERF_TIMER_GUARD(block_checksum_time);
uint32_t value = DecodeFixed32(data + n + 1);
uint32_t actual = 0;
switch (footer.checksum()) {
case kCRC32c:
value = crc32c::Unmask(value);
actual = crc32c::Value(data, n + 1);
break;
case kxxHash:
actual = XXH32(data, static_cast<int>(n) + 1, 0);
break;
default:
s = STATUS(Corruption, "unknown checksum type");
}
if (s.ok() && actual != value) {
s = STATUS(Corruption, "block checksum mismatch");
}
if (!s.ok()) {
return s;
}
}
return s;
}

Expand Down
4 changes: 2 additions & 2 deletions src/yb/rocksdb/table/format.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class Footer {
// initialized via @ReadFooterFromFile().
// Use this when you plan to load Footer with DecodeFrom(). Never use this
// when you plan to AppendEncodedTo.
Footer() : Footer(kInvalidTableMagicNumber, 0) {}
Footer() : Footer(kInvalidTableMagicNumber, /* version= */ 0) {}

// Use this constructor when you plan to write out the footer using
// AppendEncodedTo(). Never use this constructor with DecodeFrom().
Expand Down Expand Up @@ -189,7 +189,7 @@ class Footer {
ChecksumType checksum_;
BlockHandle metaindex_handle_;
BlockHandle data_index_handle_;
uint64_t table_magic_number_ = 0;
uint64_t table_magic_number_ = kInvalidTableMagicNumber;
};

// Read the footer from file
Expand Down
2 changes: 1 addition & 1 deletion src/yb/rocksdb/table/mock_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ uint32_t MockTableFactory::GetAndWriteNextID(WritableFileWriter* file) const {
uint32_t MockTableFactory::GetIDFromFile(RandomAccessFileReader* file) const {
char buf[4];
Slice result;
file->Read(0, 4, &result, buf);
CHECK_OK(file->Read(0, 4, &result, buf));
assert(result.size() == 4);
return DecodeFixed32(buf);
}
Expand Down
6 changes: 3 additions & 3 deletions src/yb/rocksdb/table/table_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,15 @@ struct TableReaderOptions {

const ImmutableCFOptions& ioptions;
const EnvOptions& env_options;
const InternalKeyComparatorPtr& internal_comparator;
const InternalKeyComparatorPtr internal_comparator;
// This is only used for BlockBasedTable (reader)
bool skip_filters;
};

struct TableBuilderOptions {
TableBuilderOptions(
const ImmutableCFOptions& _ioptions,
const std::shared_ptr<const InternalKeyComparator>& _internal_comparator,
const InternalKeyComparatorPtr& _internal_comparator,
const IntTblPropCollectorFactories& _int_tbl_prop_collector_factories,
CompressionType _compression_type,
const CompressionOptions& _compression_opts,
Expand All @@ -77,7 +77,7 @@ struct TableBuilderOptions {
skip_filters(_skip_filters) {}

const ImmutableCFOptions& ioptions;
std::shared_ptr<const InternalKeyComparator> internal_comparator;
InternalKeyComparatorPtr internal_comparator;
const IntTblPropCollectorFactories* int_tbl_prop_collector_factories;
CompressionType compression_type;
const CompressionOptions& compression_opts;
Expand Down
17 changes: 17 additions & 0 deletions src/yb/rocksdb/util/file_reader_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,23 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
return s;
}

Status RandomAccessFileReader::ReadAndValidate(
uint64_t offset, size_t n, Slice* result, char* scratch, const yb::ReadValidator& validator) {
uint64_t elapsed = 0;
Status s;
{
StopWatch sw(env_, stats_, hist_type_,
(stats_ != nullptr) ? &elapsed : nullptr);
IOSTATS_TIMER_GUARD(read_nanos);
s = file_->ReadAndValidate(offset, n, result, scratch, validator);
IOSTATS_ADD_IF_POSITIVE(bytes_read, result->size());
}
if (stats_ != nullptr && file_read_hist_ != nullptr) {
file_read_hist_->Add(elapsed);
}
return s;
}

Status WritableFileWriter::Append(const Slice& data) {
const char* src = data.cdata();
size_t left = data.size();
Expand Down
4 changes: 3 additions & 1 deletion src/yb/rocksdb/util/file_reader_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ class RandomAccessFileReader {
RandomAccessFileReader(const RandomAccessFileReader&) = delete;
RandomAccessFileReader& operator=(const RandomAccessFileReader&) = delete;

Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;
CHECKED_STATUS Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;
CHECKED_STATUS ReadAndValidate(
uint64_t offset, size_t n, Slice* result, char* scratch, const yb::ReadValidator& validator);

RandomAccessFile* file() { return file_.get(); }
};
Expand Down
1 change: 0 additions & 1 deletion src/yb/rocksutil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,3 @@ ADD_YB_TEST(yb_rocksdb_logger-test)

ADD_YB_TEST(rocksdb_encrypted_env-test)
target_link_libraries(rocksdb_encrypted_env-test encryption_test_util)

2 changes: 1 addition & 1 deletion src/yb/rocksutil/rocksdb_encrypted_file_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class RocksDBEncryptedFileFactory : public rocksdb::RocksDBFileFactoryWrapper {
Status NewWritableFile(const std::string& fname, std::unique_ptr<rocksdb::WritableFile>* result,
const rocksdb::EnvOptions& options) override {
std::unique_ptr<rocksdb::WritableFile> underlying;
RETURN_NOT_OK(RocksDBFileFactoryWrapper::NewWritableFile(fname, &underlying, options));
RETURN_NOT_OK(RocksDBFileFactoryWrapper::NewWritableFile(fname, &underlying, options));
return RocksDBEncryptedWritableFile::Create(
result, header_manager_.get(), std::move(underlying));
}
Expand Down
Loading

0 comments on commit 68d78d0

Please sign in to comment.