Skip to content

Commit

Permalink
Refactor atomic flush result installation to MANIFEST (facebook#4791)
Browse files Browse the repository at this point in the history
Summary:
as titled.
Since different bg flush threads can flush different sets of column families
(due to column family creation and drop), we decide not to let one thread
perform atomic flush result installation for other threads. Bg flush threads
will install their atomic flush results sequentially to MANIFEST, using
a conditional variable, i.e. atomic_flush_install_cv_ to coordinate.
Pull Request resolved: facebook#4791

Differential Revision: D13498930

Pulled By: riversand963

fbshipit-source-id: dd7482fc41f4bd22dad1e1ef7d4764ef424688d7
  • Loading branch information
riversand963 authored and facebook-github-bot committed Jan 4, 2019
1 parent 77a8d4d commit a07175a
Show file tree
Hide file tree
Showing 9 changed files with 311 additions and 542 deletions.
2 changes: 1 addition & 1 deletion db/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
preserve_deletes_(options.preserve_deletes),
closed_(false),
error_handler_(this, immutable_db_options_, &mutex_),
atomic_flush_commit_in_progress_(false) {
atomic_flush_install_cv_(&mutex_) {
// !batch_per_trx_ implies seq_per_batch_ because it is only unset for
// WriteUnprepared, which should use seq_per_batch_.
assert(batch_per_txn_ || seq_per_batch_);
Expand Down
19 changes: 10 additions & 9 deletions db/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1630,15 +1630,16 @@ class DBImpl : public DB {

ErrorHandler error_handler_;

// True if the DB is committing atomic flush.
// TODO (yanqin) the current impl assumes that the entire DB belongs to
// a single atomic flush group. In the future we need to add a new class
// (struct) similar to the following to make it more general.
// struct AtomicFlushGroup {
// bool commit_in_progress_;
// std::vector<MemTableList*> imm_lists;
// };
bool atomic_flush_commit_in_progress_;
// Conditional variable to coordinate installation of atomic flush results.
// With atomic flush, each bg thread installs the result of flushing multiple
// column families, and different threads can flush different column
// families. It's difficult to rely on one thread to perform batch
// installation for all threads. This is different from the non-atomic flush
// case.
// atomic_flush_install_cv_ makes sure that threads install atomic flush
// results sequentially. Flush results of memtables with lower IDs get
// installed to MANIFEST first.
InstrumentedCondVar atomic_flush_install_cv_;
};

extern Options SanitizeOptions(const std::string& db,
Expand Down
74 changes: 53 additions & 21 deletions db/db_impl_compaction_flush.cc
Original file line number Diff line number Diff line change
Expand Up @@ -404,34 +404,65 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
}
}
}
}

if (s.ok()) {
autovector<const autovector<MemTable*>*> mems_list;
for (int i = 0; i != num_cfs; ++i) {
if (cfds[i]->IsDropped()) {
continue;
}
if (s.ok()) {
auto wait_to_install_func = [&]() {
bool ready = true;
for (size_t i = 0; i != cfds.size(); ++i) {
const auto& mems = jobs[i].GetMemTables();
mems_list.emplace_back(&mems);
}
autovector<ColumnFamilyData*> all_cfds;
autovector<MemTableList*> imm_lists;
autovector<const MutableCFOptions*> mutable_cf_options_list;
for (auto cfd : *versions_->GetColumnFamilySet()) {
if (cfd->IsDropped()) {
if (cfds[i]->IsDropped()) {
// If the column family is dropped, then do not wait.
continue;
} else if (!mems.empty() &&
cfds[i]->imm()->GetEarliestMemTableID() < mems[0]->GetID()) {
// If a flush job needs to install the flush result for mems and
// mems[0] is not the earliest memtable, it means another thread must
// be installing flush results for the same column family, then the
// current thread needs to wait.
ready = false;
break;
} else if (mems.empty() && cfds[i]->imm()->GetEarliestMemTableID() <=
bg_flush_args[i].max_memtable_id_) {
// If a flush job does not need to install flush results, then it has
// to wait until all memtables up to max_memtable_id_ (inclusive) are
// installed.
ready = false;
break;
}
all_cfds.emplace_back(cfd);
imm_lists.emplace_back(cfd->imm());
mutable_cf_options_list.emplace_back(cfd->GetLatestMutableCFOptions());
}
return ready;
};

bool resuming_from_bg_err = error_handler_.IsDBStopped();
while ((!error_handler_.IsDBStopped() ||
error_handler_.GetRecoveryError().ok()) &&
!wait_to_install_func()) {
atomic_flush_install_cv_.Wait();
}

s = resuming_from_bg_err ? error_handler_.GetRecoveryError()
: error_handler_.GetBGError();
}

s = MemTableList::TryInstallMemtableFlushResults(
imm_lists, all_cfds, mutable_cf_options_list, mems_list,
&atomic_flush_commit_in_progress_, &logs_with_prep_tracker_,
versions_.get(), &mutex_, file_meta, &job_context->memtables_to_free,
directories_.GetDbDir(), log_buffer);
if (s.ok()) {
autovector<ColumnFamilyData*> tmp_cfds;
autovector<const autovector<MemTable*>*> mems_list;
autovector<const MutableCFOptions*> mutable_cf_options_list;
for (int i = 0; i != num_cfs; ++i) {
const auto& mems = jobs[i].GetMemTables();
if (!cfds[i]->IsDropped() && !mems.empty()) {
tmp_cfds.emplace_back(cfds[i]);
mems_list.emplace_back(&mems);
mutable_cf_options_list.emplace_back(
cfds[i]->GetLatestMutableCFOptions());
}
}

s = InstallMemtableAtomicFlushResults(
nullptr /* imm_lists */, tmp_cfds, mutable_cf_options_list, mems_list,
versions_.get(), &mutex_, file_meta, &job_context->memtables_to_free,
directories_.GetDbDir(), log_buffer);
}

if (s.ok() || s.IsShutdownInProgress()) {
Expand Down Expand Up @@ -2104,6 +2135,7 @@ void DBImpl::BackgroundCallFlush() {
bg_flush_scheduled_--;
// See if there's more work to be done
MaybeScheduleFlushOrCompaction();
atomic_flush_install_cv_.SignalAll();
bg_cv_.SignalAll();
// IMPORTANT: there should be no code after calling SignalAll. This call may
// signal the DB destructor that it's OK to proceed with destruction. In
Expand Down
9 changes: 2 additions & 7 deletions db/flush_job_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,6 @@ TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) {
*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber);
mem->SetID(i);
mem->Ref();
mem->TEST_AtomicFlushSequenceNumber() = 123;

for (size_t j = 0; j != num_keys_per_memtable; ++j) {
std::string key(ToString(j + i * num_keys_per_memtable));
Expand Down Expand Up @@ -325,17 +324,13 @@ TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) {
const auto& mems = flush_jobs[i].GetMemTables();
mems_list.push_back(&mems);
}
autovector<MemTableList*> imm_lists;
autovector<const MutableCFOptions*> mutable_cf_options_list;
for (auto cfd : all_cfds) {
imm_lists.push_back(cfd->imm());
mutable_cf_options_list.push_back(cfd->GetLatestMutableCFOptions());
}

bool atomic_flush_commit_in_progress = false;
Status s = MemTableList::TryInstallMemtableFlushResults(
imm_lists, all_cfds, mutable_cf_options_list, mems_list,
&atomic_flush_commit_in_progress, nullptr /* logs_prep_tracker */,
Status s = InstallMemtableAtomicFlushResults(
nullptr /* imm_lists */, all_cfds, mutable_cf_options_list, mems_list,
versions_.get(), &mutex_, file_metas, &job_context.memtables_to_free,
nullptr /* db_directory */, nullptr /* log_buffer */);
ASSERT_OK(s);
Expand Down
12 changes: 7 additions & 5 deletions db/memtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,15 @@ class MemTable {

uint64_t GetID() const { return id_; }

SequenceNumber& TEST_AtomicFlushSequenceNumber() {
return atomic_flush_seqno_;
}
void SetFlushCompleted(bool completed) { flush_completed_ = completed; }

uint64_t GetFileNumber() const { return file_number_; }

void TEST_SetFlushCompleted(bool completed) { flush_completed_ = completed; }
void SetFileNumber(uint64_t file_num) { file_number_ = file_num; }

void TEST_SetFileNumber(uint64_t file_num) { file_number_ = file_num; }
void SetFlushInProgress(bool in_progress) {
flush_in_progress_ = in_progress;
}

private:
enum FlushStateEnum { FLUSH_NOT_REQUESTED, FLUSH_REQUESTED, FLUSH_SCHEDULED };
Expand Down
Loading

0 comments on commit a07175a

Please sign in to comment.