diff --git a/pika/HISTORY.md b/pika/HISTORY.md index 5f3e8cf..4be764a 100644 --- a/pika/HISTORY.md +++ b/pika/HISTORY.md @@ -1,4 +1,8 @@ # pika for codis +## 3.0.4-3.2 (2019-10-23) +### bug fix +* 修复服务重启后,每个blob的可回收率的分数被清为0,只有被访问到的sst文件中的blob数据会重新计算,导致很多blob文件分数虽然高,但采样时又不满足回收条件的文件一直被选中,后面的无效数据很多但分数为0的真正需要回收的blob文件无法被删除,从而导致gc速度就变慢的bug + ## 3.0.4-3.1 (2019-10-10) ### New Features * 周期检测操作系统free内存剩余大小,如果小于用户设定值,则清理。只有master时执行该操作 diff --git a/pika/include/pika_version.h b/pika/include/pika_version.h index e76df5e..3eb899a 100644 --- a/pika/include/pika_version.h +++ b/pika/include/pika_version.h @@ -10,6 +10,6 @@ #define PIKA_MINOR 0 #define PIKA_PATCH 4 #define PIKA_XMLY_MAJOR 3 -#define PIKA_XMLY_MINOR 1 +#define PIKA_XMLY_MINOR 2 #endif // INCLUDE_PIKA_VERSION_H_ diff --git a/pika/third/rocksdb/utilities/titandb/blob_format.h b/pika/third/rocksdb/utilities/titandb/blob_format.h index e67d5d2..9a3cba8 100644 --- a/pika/third/rocksdb/utilities/titandb/blob_format.h +++ b/pika/third/rocksdb/utilities/titandb/blob_format.h @@ -1,5 +1,7 @@ #pragma once +#include + #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" @@ -140,6 +142,9 @@ class BlobFileMeta { void AddDiscardableSize(uint64_t _discardable_size); double GetDiscardableRatio() const; + int64_t last_sample_time() const { return last_sample_time_; } + void set_last_sample_time(int64_t timestamp) { last_sample_time_ = timestamp; } + private: // Persistent field uint64_t file_number_{0}; @@ -150,6 +155,9 @@ class BlobFileMeta { uint64_t discardable_size_{0}; // bool marked_for_gc_{false}; + + // last gc sample time + std::atomic last_sample_time_{0}; }; // Blob file footer format: diff --git a/pika/third/rocksdb/utilities/titandb/blob_gc_job.cc b/pika/third/rocksdb/utilities/titandb/blob_gc_job.cc index 86736a3..fe3c401 100644 --- a/pika/third/rocksdb/utilities/titandb/blob_gc_job.cc +++ b/pika/third/rocksdb/utilities/titandb/blob_gc_job.cc @@ -98,6 +98,12 @@ Status BlobGCJob::Run() { tmp.append(" "); } tmp.append(std::to_string(f->file_number())); + tmp.append("("); + std::ostringstream score; + score.precision(2); + score << f->GetDiscardableRatio(); + tmp.append(score.str()); + tmp.append(")"); } std::string tmp2; @@ -122,7 +128,7 @@ Status BlobGCJob::Run() { Status BlobGCJob::SampleCandidateFiles() { std::vector result; - for (const auto& file : blob_gc_->inputs()) { + for (auto& file : blob_gc_->inputs()) { if (DoSample(file)) { result.push_back(file); } @@ -135,7 +141,7 @@ Status BlobGCJob::SampleCandidateFiles() { return Status::OK(); } -bool BlobGCJob::DoSample(const BlobFileMeta* file) { +bool BlobGCJob::DoSample(BlobFileMeta* file) { if (file->GetDiscardableRatio() >= blob_gc_->titan_cf_options().blob_file_discardable_ratio) { return true; @@ -149,6 +155,11 @@ bool BlobGCJob::DoSample(const BlobFileMeta* file) { return true; } + // update file sample time + int64_t unix_time; + Env::Default()->GetCurrentTime(&unix_time); + file->set_last_sample_time(unix_time); + Status s; uint64_t sample_size_window = static_cast( file->file_size() * blob_gc_->titan_cf_options().sample_file_size_ratio); diff --git a/pika/third/rocksdb/utilities/titandb/blob_gc_job.h b/pika/third/rocksdb/utilities/titandb/blob_gc_job.h index 45cdc0f..3256443 100644 --- a/pika/third/rocksdb/utilities/titandb/blob_gc_job.h +++ b/pika/third/rocksdb/utilities/titandb/blob_gc_job.h @@ -58,7 +58,7 @@ class BlobGCJob { std::atomic_bool* shuting_down_{nullptr}; Status SampleCandidateFiles(); - bool DoSample(const BlobFileMeta* file); + bool DoSample(BlobFileMeta* file); Status DoRunGC(); Status BuildIterator(std::unique_ptr* result); bool DiscardEntry(const Slice& key, const BlobIndex& blob_index); diff --git a/pika/third/rocksdb/utilities/titandb/blob_gc_picker.cc b/pika/third/rocksdb/utilities/titandb/blob_gc_picker.cc index 972291b..c1d9fbe 100644 --- a/pika/third/rocksdb/utilities/titandb/blob_gc_picker.cc +++ b/pika/third/rocksdb/utilities/titandb/blob_gc_picker.cc @@ -3,6 +3,8 @@ namespace rocksdb { namespace titandb { +static const int64_t ONE_DAY_SECONDS = 86400; + BasicBlobGCPicker::BasicBlobGCPicker(TitanDBOptions db_options, TitanCFOptions cf_options) : db_options_(db_options), cf_options_(cf_options) {} @@ -17,6 +19,10 @@ std::unique_ptr BasicBlobGCPicker::PickBlobGC( uint64_t batch_size = 0; // ROCKS_LOG_INFO(db_options_.info_log, "blob file num:%lu gc score:%lu", // blob_storage->NumBlobFiles(), blob_storage->gc_score().size()); + + int64_t unix_time; + Env::Default()->GetCurrentTime(&unix_time); + for (auto& gc_score : blob_storage->gc_score()) { auto blob_file = blob_storage->FindFile(gc_score.file_number).lock(); assert(blob_file); @@ -36,12 +42,30 @@ std::unique_ptr BasicBlobGCPicker::PickBlobGC( blob_file->file_number()); continue; } + + // if the file has sampled last time, but not gc, we will skip the file + if (blob_file->GetDiscardableRatio() < cf_options_.blob_file_discardable_ratio) { + if (0 != blob_file->last_sample_time() + && unix_time - blob_file->last_sample_time() < ONE_DAY_SECONDS) { + ROCKS_LOG_DEBUG(db_options_.info_log, "Titan GC skip the file[%lu]", + gc_score.file_number); + continue; + } + } + blob_files.push_back(blob_file.get()); batch_size += blob_file->file_size(); if (batch_size >= cf_options_.max_gc_batch_size) break; } + // reset file last_sample_time if we have checked out to the end of all blob files + if (blob_files.empty()) { + ROCKS_LOG_INFO(db_options_.info_log, "Titan GC check to the end of all blob files, blob file num:%lu gc score:%lu", + blob_storage->NumBlobFiles(), blob_storage->gc_score().size()); + ResetAllBlobFileSampleTime(blob_storage); + } + if (blob_files.empty() || batch_size < cf_options_.min_gc_batch_size) return nullptr; @@ -56,5 +80,16 @@ bool BasicBlobGCPicker::CheckBlobFile(BlobFileMeta* blob_file) const { return true; } +void BasicBlobGCPicker::ResetAllBlobFileSampleTime(BlobStorage* blob_storage) { + for (auto& gc_score : blob_storage->gc_score()) { + auto blob_file = blob_storage->FindFile(gc_score.file_number).lock(); + assert(blob_file); + if (!CheckBlobFile(blob_file.get())) { + continue; + } + blob_file->set_last_sample_time(0); + } +} + } // namespace titandb } // namespace rocksdb diff --git a/pika/third/rocksdb/utilities/titandb/blob_gc_picker.h b/pika/third/rocksdb/utilities/titandb/blob_gc_picker.h index 1331fdf..356d717 100644 --- a/pika/third/rocksdb/utilities/titandb/blob_gc_picker.h +++ b/pika/third/rocksdb/utilities/titandb/blob_gc_picker.h @@ -40,6 +40,8 @@ class BasicBlobGCPicker final : public BlobGCPicker { // Check if blob_file needs to gc, return true means we need pick this // file for gc bool CheckBlobFile(BlobFileMeta* blob_file) const; + + void ResetAllBlobFileSampleTime(BlobStorage* blob_storage); }; } // namespace titandb