Skip to content

Commit

Permalink
check last sample time when gc
Browse files Browse the repository at this point in the history
  • Loading branch information
jason committed Oct 23, 2019
1 parent 73650ef commit 0667bb8
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 4 deletions.
4 changes: 4 additions & 0 deletions pika/HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# pika for codis
## 3.0.4-3.2 (2019-10-23)
### bug fix
* 修复服务重启后,每个blob的可回收率的分数被清为0,只有被访问到的sst文件中的blob数据会重新计算,导致很多blob文件分数虽然高,但采样时又不满足回收条件的文件一直被选中,后面的无效数据很多但分数为0的真正需要回收的blob文件无法被删除,从而导致gc速度就变慢的bug

## 3.0.4-3.1 (2019-10-10)
### New Features
* 周期检测操作系统free内存剩余大小,如果小于用户设定值,则清理。只有master时执行该操作
Expand Down
2 changes: 1 addition & 1 deletion pika/include/pika_version.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
#define PIKA_MINOR 0
#define PIKA_PATCH 4
#define PIKA_XMLY_MAJOR 3
#define PIKA_XMLY_MINOR 1
#define PIKA_XMLY_MINOR 2

#endif // INCLUDE_PIKA_VERSION_H_
8 changes: 8 additions & 0 deletions pika/third/rocksdb/utilities/titandb/blob_format.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <atomic>

#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
Expand Down Expand Up @@ -140,6 +142,9 @@ class BlobFileMeta {
void AddDiscardableSize(uint64_t _discardable_size);
double GetDiscardableRatio() const;

int64_t last_sample_time() const { return last_sample_time_; }
void set_last_sample_time(int64_t timestamp) { last_sample_time_ = timestamp; }

private:
// Persistent field
uint64_t file_number_{0};
Expand All @@ -150,6 +155,9 @@ class BlobFileMeta {

uint64_t discardable_size_{0};
// bool marked_for_gc_{false};

// last gc sample time
std::atomic<int64_t> last_sample_time_{0};
};

// Blob file footer format:
Expand Down
15 changes: 13 additions & 2 deletions pika/third/rocksdb/utilities/titandb/blob_gc_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ Status BlobGCJob::Run() {
tmp.append(" ");
}
tmp.append(std::to_string(f->file_number()));
tmp.append("(");
std::ostringstream score;
score.precision(2);
score << f->GetDiscardableRatio();
tmp.append(score.str());
tmp.append(")");
}

std::string tmp2;
Expand All @@ -122,7 +128,7 @@ Status BlobGCJob::Run() {

Status BlobGCJob::SampleCandidateFiles() {
std::vector<BlobFileMeta*> result;
for (const auto& file : blob_gc_->inputs()) {
for (auto& file : blob_gc_->inputs()) {
if (DoSample(file)) {
result.push_back(file);
}
Expand All @@ -135,7 +141,7 @@ Status BlobGCJob::SampleCandidateFiles() {
return Status::OK();
}

bool BlobGCJob::DoSample(const BlobFileMeta* file) {
bool BlobGCJob::DoSample(BlobFileMeta* file) {
if (file->GetDiscardableRatio() >=
blob_gc_->titan_cf_options().blob_file_discardable_ratio) {
return true;
Expand All @@ -149,6 +155,11 @@ bool BlobGCJob::DoSample(const BlobFileMeta* file) {
return true;
}

// update file sample time
int64_t unix_time;
Env::Default()->GetCurrentTime(&unix_time);
file->set_last_sample_time(unix_time);

Status s;
uint64_t sample_size_window = static_cast<uint64_t>(
file->file_size() * blob_gc_->titan_cf_options().sample_file_size_ratio);
Expand Down
2 changes: 1 addition & 1 deletion pika/third/rocksdb/utilities/titandb/blob_gc_job.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class BlobGCJob {
std::atomic_bool* shuting_down_{nullptr};

Status SampleCandidateFiles();
bool DoSample(const BlobFileMeta* file);
bool DoSample(BlobFileMeta* file);
Status DoRunGC();
Status BuildIterator(std::unique_ptr<BlobFileMergeIterator>* result);
bool DiscardEntry(const Slice& key, const BlobIndex& blob_index);
Expand Down
35 changes: 35 additions & 0 deletions pika/third/rocksdb/utilities/titandb/blob_gc_picker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
namespace rocksdb {
namespace titandb {

static const int64_t ONE_DAY_SECONDS = 86400;

BasicBlobGCPicker::BasicBlobGCPicker(TitanDBOptions db_options,
TitanCFOptions cf_options)
: db_options_(db_options), cf_options_(cf_options) {}
Expand All @@ -17,6 +19,10 @@ std::unique_ptr<BlobGC> BasicBlobGCPicker::PickBlobGC(
uint64_t batch_size = 0;
// ROCKS_LOG_INFO(db_options_.info_log, "blob file num:%lu gc score:%lu",
// blob_storage->NumBlobFiles(), blob_storage->gc_score().size());

int64_t unix_time;
Env::Default()->GetCurrentTime(&unix_time);

for (auto& gc_score : blob_storage->gc_score()) {
auto blob_file = blob_storage->FindFile(gc_score.file_number).lock();
assert(blob_file);
Expand All @@ -36,12 +42,30 @@ std::unique_ptr<BlobGC> BasicBlobGCPicker::PickBlobGC(
blob_file->file_number());
continue;
}

// if the file has sampled last time, but not gc, we will skip the file
if (blob_file->GetDiscardableRatio() < cf_options_.blob_file_discardable_ratio) {
if (0 != blob_file->last_sample_time()
&& unix_time - blob_file->last_sample_time() < ONE_DAY_SECONDS) {
ROCKS_LOG_DEBUG(db_options_.info_log, "Titan GC skip the file[%lu]",
gc_score.file_number);
continue;
}
}

blob_files.push_back(blob_file.get());

batch_size += blob_file->file_size();
if (batch_size >= cf_options_.max_gc_batch_size) break;
}

// reset file last_sample_time if we have checked out to the end of all blob files
if (blob_files.empty()) {
ROCKS_LOG_INFO(db_options_.info_log, "Titan GC check to the end of all blob files, blob file num:%lu gc score:%lu",
blob_storage->NumBlobFiles(), blob_storage->gc_score().size());
ResetAllBlobFileSampleTime(blob_storage);
}

if (blob_files.empty() || batch_size < cf_options_.min_gc_batch_size)
return nullptr;

Expand All @@ -56,5 +80,16 @@ bool BasicBlobGCPicker::CheckBlobFile(BlobFileMeta* blob_file) const {
return true;
}

void BasicBlobGCPicker::ResetAllBlobFileSampleTime(BlobStorage* blob_storage) {
for (auto& gc_score : blob_storage->gc_score()) {
auto blob_file = blob_storage->FindFile(gc_score.file_number).lock();
assert(blob_file);
if (!CheckBlobFile(blob_file.get())) {
continue;
}
blob_file->set_last_sample_time(0);
}
}

} // namespace titandb
} // namespace rocksdb
2 changes: 2 additions & 0 deletions pika/third/rocksdb/utilities/titandb/blob_gc_picker.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class BasicBlobGCPicker final : public BlobGCPicker {
// Check if blob_file needs to gc, return true means we need pick this
// file for gc
bool CheckBlobFile(BlobFileMeta* blob_file) const;

void ResetAllBlobFileSampleTime(BlobStorage* blob_storage);
};

} // namespace titandb
Expand Down

0 comments on commit 0667bb8

Please sign in to comment.