Skip to content

Commit

Permalink
[#24310] DocDB: Fix wrong retention time could be picked when snapsho…
Browse files Browse the repository at this point in the history
…t schedule is enabled

Summary:
When snapshot schedule is active, we retain records from being compacted until there is snapshot that contains this record.
Due to race condition we could get into situation where tablet replica consider snapshot as taken while it does not have it.

TServer receives last schedule snapshot time via heartbeat mechanism.
There are 2 places that could lead to described bug.
1) Master sends last snapshot time for schedule that is not yet complete.
2) Snapshot is taken on leader and one follower, while another follower did not complete snapshot yet.

Fixed by switching to maintain last snapshot time locally and store this information in filesystem.

When snapshot is created for some schedule, we create file in snapshots folder named: last_snapshot.[SCHEDULE_ID].[SNAPSHOT_TIME].
And remove previous file for the same schedule.
During tablet bootstrap we list all files in snapshot folder, recovering latest snapshot times and removing outdated files.
Jira: DB-13199

Test Plan: ./yb_build.sh release -n 400 --gtest_filter YbAdminSnapshotScheduleTest.PgsqlAddColumnCompactWithPackedRow -- -p 8

Reviewers: timur

Reviewed By: timur

Subscribers: ybase

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D40893
  • Loading branch information
spolitov committed Dec 26, 2024
1 parent 63f9ba0 commit 6404af4
Show file tree
Hide file tree
Showing 11 changed files with 192 additions and 84 deletions.
19 changes: 10 additions & 9 deletions src/yb/docdb/consensus_frontier-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ TEST_F(ConsensusFrontierTest, TestUpdates) {
EXPECT_TRUE(frontier.Equals(frontier));
EXPECT_EQ(
"{ op_id: 0.0 hybrid_time: <invalid> "
"history_cutoff: { cotables cutoff: <invalid>, primary cutoff: <invalid> } "
"history_cutoff: { cotables_cutoff_ht: <invalid> primary_cutoff_ht: <invalid> } "
"max_value_level_ttl_expiration_time: <invalid> primary_schema_version: <nullopt> "
"cotable_schema_versions: [] global_filter: <invalid> cotables_filter: [] }",
frontier.ToString());
Expand All @@ -62,7 +62,8 @@ TEST_F(ConsensusFrontierTest, TestUpdates) {
ConsensusFrontier frontier{{1, 1}, 1000_usec_ht, { 500_usec_ht, 500_usec_ht }};
EXPECT_EQ(
"{ op_id: 1.1 hybrid_time: { physical: 1000 } "
"history_cutoff: { cotables cutoff: { physical: 500 }, primary cutoff: { physical: 500 } } "
"history_cutoff: { cotables_cutoff_ht: { physical: 500 } "
"primary_cutoff_ht: { physical: 500 } } "
"max_value_level_ttl_expiration_time: <invalid> primary_schema_version: <nullopt> "
"cotable_schema_versions: [] global_filter: <invalid> cotables_filter: [] }",
frontier.ToString());
Expand Down Expand Up @@ -122,7 +123,7 @@ TEST_F(ConsensusFrontierTest, TestUpdates) {
EXPECT_EQ(
PbToString(pb),
"{ op_id: 0.0 hybrid_time: <min> "
"history_cutoff: { cotables cutoff: <invalid>, primary cutoff: <invalid> } "
"history_cutoff: { cotables_cutoff_ht: <invalid> primary_cutoff_ht: <invalid> } "
"max_value_level_ttl_expiration_time: <invalid> primary_schema_version: <nullopt> "
"cotable_schema_versions: [] global_filter: <invalid> cotables_filter: [] }");

Expand All @@ -131,33 +132,33 @@ TEST_F(ConsensusFrontierTest, TestUpdates) {
EXPECT_EQ(
PbToString(pb),
"{ op_id: 2.3 hybrid_time: <min> "
"history_cutoff: { cotables cutoff: <invalid>, primary cutoff: <invalid> } "
"history_cutoff: { cotables_cutoff_ht: <invalid> primary_cutoff_ht: <invalid> } "
"max_value_level_ttl_expiration_time: <invalid> primary_schema_version: <nullopt> "
"cotable_schema_versions: [] global_filter: <invalid> cotables_filter: [] }");

pb.set_hybrid_time(100000);
EXPECT_EQ(
PbToString(pb),
"{ op_id: 2.3 hybrid_time: { physical: 24 logical: 1696 } "
"history_cutoff: { cotables cutoff: <invalid>, primary cutoff: <invalid> } "
"history_cutoff: { cotables_cutoff_ht: <invalid> primary_cutoff_ht: <invalid> } "
"max_value_level_ttl_expiration_time: <invalid> primary_schema_version: <nullopt> "
"cotable_schema_versions: [] global_filter: <invalid> cotables_filter: [] }");

pb.set_primary_cutoff_ht(200000);
EXPECT_EQ(
PbToString(pb),
"{ op_id: 2.3 hybrid_time: { physical: 24 logical: 1696 } "
"history_cutoff: { cotables cutoff: <invalid>, "
"primary cutoff: { physical: 48 logical: 3392 } } "
"history_cutoff: { cotables_cutoff_ht: <invalid> "
"primary_cutoff_ht: { physical: 48 logical: 3392 } } "
"max_value_level_ttl_expiration_time: <invalid> primary_schema_version: <nullopt> "
"cotable_schema_versions: [] global_filter: <invalid> cotables_filter: [] }");

pb.set_cotables_cutoff_ht(200000);
EXPECT_EQ(
PbToString(pb),
"{ op_id: 2.3 hybrid_time: { physical: 24 logical: 1696 } "
"history_cutoff: { cotables cutoff: { physical: 48 logical: 3392 }, "
"primary cutoff: { physical: 48 logical: 3392 } } "
"history_cutoff: { cotables_cutoff_ht: { physical: 48 logical: 3392 } "
"primary_cutoff_ht: { physical: 48 logical: 3392 } } "
"max_value_level_ttl_expiration_time: <invalid> primary_schema_version: <nullopt> "
"cotable_schema_versions: [] global_filter: <invalid> cotables_filter: [] }");
}
Expand Down
14 changes: 12 additions & 2 deletions src/yb/docdb/docdb_compaction_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,23 @@ std::optional<dockv::PackedRowVersion> PackedRowVersion(TableType table_type, bo
struct HistoryCutoff {
// Set only on the master and applies to cotables.
HybridTime cotables_cutoff_ht;

// Used everywhere else i.e. for the sys catalog table on the master,
// colocated tables on tservers and non-colocated tables on tservers.
HybridTime primary_cutoff_ht;

void MakeAtLeast(const HistoryCutoff& rhs) {
cotables_cutoff_ht.MakeAtLeast(rhs.cotables_cutoff_ht);
primary_cutoff_ht.MakeAtLeast(rhs.primary_cutoff_ht);
}

void MakeAtMost(const HistoryCutoff& rhs) {
cotables_cutoff_ht.MakeAtMost(rhs.cotables_cutoff_ht);
primary_cutoff_ht.MakeAtMost(rhs.primary_cutoff_ht);
}

std::string ToString() const {
return Format("{ cotables cutoff: $0, primary cutoff: $1 }",
cotables_cutoff_ht, primary_cutoff_ht);
return YB_STRUCT_TO_STRING(cotables_cutoff_ht, primary_cutoff_ht);
}
};

Expand Down
10 changes: 4 additions & 6 deletions src/yb/tablet/operations/history_cutoff_operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,10 @@ consensus::LWHistoryCutoffPB* RequestTraits<consensus::LWHistoryCutoffPB>::Mutab
}

Status HistoryCutoffOperation::Apply(int64_t leader_term) {
auto primary_cutoff = request()->has_primary_cutoff_ht() ?
HybridTime(request()->primary_cutoff_ht()) : HybridTime();
auto cotables_cutoff = request()->has_cotables_cutoff_ht() ?
HybridTime(request()->cotables_cutoff_ht()) : HybridTime();
docdb::HistoryCutoff history_cutoff(
{ cotables_cutoff, primary_cutoff });
docdb::HistoryCutoff history_cutoff = {
.cotables_cutoff_ht = HybridTime::FromPB(request()->cotables_cutoff_ht()),
.primary_cutoff_ht = HybridTime::FromPB(request()->primary_cutoff_ht())
};

VLOG_WITH_PREFIX(2) << "History cutoff replicated " << op_id() << ": " << history_cutoff;

Expand Down
22 changes: 17 additions & 5 deletions src/yb/tablet/tablet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,7 @@ using namespace std::literals; // NOLINT

using rocksdb::SequenceNumber;

namespace yb {
namespace tablet {
namespace yb::tablet {

using strings::Substitute;

Expand Down Expand Up @@ -647,8 +646,9 @@ Tablet::Tablet(const TabletInitData& data)
log_prefix_suffix_(data.log_prefix_suffix),
is_sys_catalog_(data.is_sys_catalog),
txns_enabled_(data.txns_enabled),
allowed_history_cutoff_provider_(data.allowed_history_cutoff_provider),
retention_policy_(std::make_shared<TabletRetentionPolicy>(
clock_, data.allowed_history_cutoff_provider, metadata_.get())),
clock_, [this](const auto&) { return AllowedHistoryCutoff(); }, metadata_.get())),
full_compaction_pool_(data.full_compaction_pool),
admin_triggered_compaction_pool_(data.admin_triggered_compaction_pool),
ts_post_split_compaction_added_(std::move(data.post_split_compaction_added)),
Expand Down Expand Up @@ -976,6 +976,7 @@ std::string Tablet::LogPrefix(docdb::StorageDbType db_type) const {
Status Tablet::OpenKeyValueTablet() {
auto common_options = VERIFY_RESULT(CommonRocksDBOptions());

RETURN_NOT_OK(snapshots_->Open());
RETURN_NOT_OK(OpenRegularDB(common_options));
RETURN_NOT_OK(OpenIntentsDB(common_options));

Expand Down Expand Up @@ -5564,6 +5565,18 @@ Status Tablet::GetTabletKeyRanges(
FATAL_INVALID_ENUM_VALUE(Direction, direction);
}

docdb::HistoryCutoff Tablet::AllowedHistoryCutoff() {
docdb::HistoryCutoff result = {
.cotables_cutoff_ht = HybridTime::kInvalid,
.primary_cutoff_ht = HybridTime::kMax,
};
if (allowed_history_cutoff_provider_) {
result.MakeAtMost(allowed_history_cutoff_provider_(metadata_.get()));
}
result.primary_cutoff_ht.MakeAtMost(snapshots_->AllowedHistoryCutoff());
return result;
}

// ------------------------------------------------------------------------------------------------

Result<ScopedReadOperation> ScopedReadOperation::Create(
Expand Down Expand Up @@ -5635,5 +5648,4 @@ Result<google::protobuf::RepeatedPtrField<tablet::FilePB>> ListFiles(const std::
return result;
}

} // namespace tablet
} // namespace yb
} // namespace yb::tablet
3 changes: 3 additions & 0 deletions src/yb/tablet/tablet.h
Original file line number Diff line number Diff line change
Expand Up @@ -1244,6 +1244,8 @@ class Tablet : public AbstractTablet,
REQUIRES(vector_indexes_mutex_);
docdb::VectorIndexesPtr VectorIndexesList() const EXCLUDES(vector_indexes_mutex_);

docdb::HistoryCutoff AllowedHistoryCutoff();

mutable std::mutex flush_filter_mutex_;
std::function<rocksdb::MemTableFilter()> mem_table_flush_filter_factory_
GUARDED_BY(flush_filter_mutex_);
Expand Down Expand Up @@ -1274,6 +1276,7 @@ class Tablet : public AbstractTablet,
std::function<void()> num_sst_files_changed_listener_
GUARDED_BY(num_sst_files_changed_listener_mutex_);

AllowedHistoryCutoffProvider allowed_history_cutoff_provider_;
std::shared_ptr<TabletRetentionPolicy> retention_policy_;

std::mutex full_compaction_token_mutex_;
Expand Down
33 changes: 7 additions & 26 deletions src/yb/tablet/tablet_retention_policy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,29 +93,10 @@ TabletRetentionPolicy::TabletRetentionPolicy(
}
}

void TabletRetentionPolicy::MakeAtLeast(HistoryCutoff value) {
if (value.cotables_cutoff_ht) {
committed_history_cutoff_information_.cotables_cutoff_ht = std::max(
committed_history_cutoff_information_.cotables_cutoff_ht,
value.cotables_cutoff_ht);
}
if (value.primary_cutoff_ht) {
committed_history_cutoff_information_.primary_cutoff_ht = std::max(
committed_history_cutoff_information_.primary_cutoff_ht,
value.primary_cutoff_ht);
}
}

HistoryCutoff TabletRetentionPolicy::UpdateCommittedHistoryCutoff(
HistoryCutoff value) {
HistoryCutoff TabletRetentionPolicy::UpdateCommittedHistoryCutoff(HistoryCutoff value) {
std::lock_guard lock(mutex_);
if (!value.cotables_cutoff_ht && !value.primary_cutoff_ht) {
return committed_history_cutoff_information_;
}

VLOG_WITH_PREFIX(4) << __func__ << "(" << value << ")";

MakeAtLeast(value);
committed_history_cutoff_information_.MakeAtLeast(value);
return committed_history_cutoff_information_;
}

Expand All @@ -129,7 +110,7 @@ HistoryRetentionDirective TabletRetentionPolicy::GetRetentionDirective() {
} else {
history_cutoff = EffectiveHistoryCutoff();
VLOG(4) << "Effective history cutoff " << history_cutoff;
MakeAtLeast(history_cutoff);
committed_history_cutoff_information_.MakeAtLeast(history_cutoff);
}
}

Expand Down Expand Up @@ -185,14 +166,14 @@ HybridTime TabletRetentionPolicy::GetEarliestAllowedReadHt() {
committed_history_cutoff_information_.primary_cutoff_ht);
}

HistoryCutoff TabletRetentionPolicy::HistoryCutoffToPropagate(
HybridTime last_write_ht) {
HistoryCutoff TabletRetentionPolicy::HistoryCutoffToPropagate(HybridTime last_write_ht) {
std::lock_guard lock(mutex_);

auto now = CoarseMonoClock::now();

VLOG_WITH_PREFIX(4) << __func__ << "(" << last_write_ht << "), left to wait: "
<< MonoDelta(next_history_cutoff_propagation_ - now);
VLOG_WITH_PREFIX_AND_FUNC(4)
<< ", last_write_ht: " << last_write_ht << ", left to wait: "
<< MonoDelta(next_history_cutoff_propagation_ - now);
HybridTime earliest_ht = GetEarliestAllowedReadHt();
if (disable_counter_ != 0 || !FLAGS_enable_history_cutoff_propagation ||
now < next_history_cutoff_propagation_ || last_write_ht <= earliest_ht) {
Expand Down
1 change: 0 additions & 1 deletion src/yb/tablet/tablet_retention_policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ class TabletRetentionPolicy : public docdb::HistoryRetentionPolicy {
docdb::HistoryCutoff SanitizeHistoryCutoff(
docdb::HistoryCutoff proposed_history_cutoff) REQUIRES(mutex_);

void MakeAtLeast(docdb::HistoryCutoff value) REQUIRES(mutex_);
HybridTime GetEarliestAllowedReadHt() REQUIRES(mutex_);

const std::string& LogPrefix() const {
Expand Down
Loading

0 comments on commit 6404af4

Please sign in to comment.