From e00e2d90bbf3785adc5a2bd41a3bf55cbf84822a Mon Sep 17 00:00:00 2001 From: MatthewVon Date: Sat, 3 May 2014 14:27:06 -0400 Subject: [PATCH 1/3] prototype code to vary sizes of mmap files based upon function. --- db/builder.cc | 11 ++++++++++- db/db_impl.cc | 30 ++++++++++++++++++++++++------ db/repair.cc | 2 +- db/version_set.cc | 4 ++-- include/leveldb/env.h | 23 +++++++++++++---------- table/format.cc | 2 +- util/cache2.cc | 13 +++++++++++-- util/env.cc | 2 +- util/env_posix.cc | 20 ++++++++++++-------- 9 files changed, 75 insertions(+), 32 deletions(-) diff --git a/db/builder.cc b/db/builder.cc index aa7e5d72..0b9be145 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -39,7 +39,16 @@ Status BuildTable(const std::string& dbname, std::string fname = TableFileName(options, meta->number, meta->level); if (iter->Valid()) { WritableFile* file; - s = env->NewWritableFile(fname, &file); + size_t map_size; + + // large buffers, try for a little bit bigger than half hoping + // for two writes ... not three + if (10*1024*1024 < options.write_buffer_size) + map_size=(options.write_buffer_size/6)*4; + else + map_size=(options.write_buffer_size*12)/10; // integer multiply 1.2 + + s = env->NewWritableFile(fname, &file, map_size); if (!s.ok()) { return s; } diff --git a/db/db_impl.cc b/db/db_impl.cc index d30e60a0..50a40c12 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -248,7 +248,7 @@ Status DBImpl::NewDB() { const std::string manifest = DescriptorFileName(dbname_, 1); WritableFile* file; - Status s = env_->NewWritableFile(manifest, &file); + Status s = env_->NewWritableFile(manifest, &file, 4*1024L); if (!s.ok()) { return s; } @@ -1108,7 +1108,7 @@ Status DBImpl::OpenCompactionOutputFile( // Make the output file std::string fname = TableFileName(options_, file_number, compact->compaction->level()+1); - Status s = env_->NewWritableFile(fname, &compact->outfile); + Status s = env_->NewWritableFile(fname, &compact->outfile, gMapSize); if (s.ok()) { Options options; options=options_; @@ -1214,9 +1214,9 @@ DBImpl::MaybeRaiseBlockSize( file_data_size=versions_->MaxFileSizeForLevel(CompactionStuff.level()); keys_per_file=file_data_size / avg_value_size; - if (75000 < keys_per_file) + if (300000 < keys_per_file) { - keys_per_file = 75000; + keys_per_file = 300000; file_data_size = avg_value_size * keys_per_file; } // if @@ -1862,9 +1862,18 @@ Status DBImpl::MakeRoomForWrite(bool force) { // Attempt to switch to a new memtable and trigger compaction of old assert(versions_->PrevLogNumber() == 0); uint64_t new_log_number = versions_->NewFileNumber(); + size_t map_size; + + // large buffers, try for a little bit bigger than half hoping + // for two writes ... not three + if (10*1024*1024 < options_.write_buffer_size) + map_size=(options_.write_buffer_size/6)*4; + else + map_size=(options_.write_buffer_size*12)/10; // integer multiply 1.2 WritableFile* lfile = NULL; gPerfCounters->Inc(ePerfWriteNewMem); - s = env_->NewWriteOnlyFile(LogFileName(dbname_, new_log_number), &lfile); + s = env_->NewWriteOnlyFile(LogFileName(dbname_, new_log_number), &lfile, + map_size); if (!s.ok()) { // Avoid chewing through file number space in a tight loop. versions_->ReuseFileNumber(new_log_number); @@ -2041,8 +2050,17 @@ Status DB::Open(const Options& options, const std::string& dbname, if (s.ok()) { uint64_t new_log_number = impl->versions_->NewFileNumber(); WritableFile* lfile; + size_t map_size; + + // large buffers, try for a little bit bigger than half hoping + // for two writes ... not three + if (10*1024*1024 < options.write_buffer_size) + map_size=(options.write_buffer_size/6)*4; + else + map_size=(options.write_buffer_size*12)/10; // integer multiply 1.2 + s = options.env->NewWriteOnlyFile(LogFileName(dbname, new_log_number), - &lfile); + &lfile, map_size); if (s.ok()) { edit.SetLogNumber(new_log_number); impl->logfile_ = lfile; diff --git a/db/repair.cc b/db/repair.cc index 735e16f6..f6fca3ed 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -408,7 +408,7 @@ class Repairer { Status WriteDescriptor() { std::string tmp = TempFileName(dbname_, 1); WritableFile* file; - Status status = env_->NewWritableFile(tmp, &file); + Status status = env_->NewWritableFile(tmp, &file, 4096); if (!status.ok()) { return status; } diff --git a/db/version_set.cc b/db/version_set.cc index e66d7bd7..1715ae7d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -884,7 +884,7 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { assert(descriptor_file_ == NULL); new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_); edit->SetNextFile(next_file_number_); - s = env_->NewWritableFile(new_manifest_file, &descriptor_file_); + s = env_->NewWritableFile(new_manifest_file, &descriptor_file_, 4*1024L); if (s.ok()) { descriptor_log_ = new log::Writer(descriptor_file_); s = WriteSnapshot(descriptor_log_); @@ -1853,7 +1853,7 @@ bool Compaction::ShouldStopBefore(const Slice& internal_key, size_t key_count) { // to meet file open speed goals else { - ret_flag=(75000NewRandomAccessFile(f, r); } - Status NewWritableFile(const std::string& f, WritableFile** r) { - return target_->NewWritableFile(f, r); + Status NewWritableFile(const std::string& f, WritableFile** r, size_t s=0) { + return target_->NewWritableFile(f, r, s); } - Status NewAppendableFile(const std::string& f, WritableFile** r) { - return target_->NewAppendableFile(f, r); + Status NewAppendableFile(const std::string& f, WritableFile** r, size_t s=0) { + return target_->NewAppendableFile(f, r, s); } - Status NewWriteOnlyFile(const std::string& f, WritableFile** r) { - return target_->NewWriteOnlyFile(f, r); + Status NewWriteOnlyFile(const std::string& f, WritableFile** r, size_t s=0) { + return target_->NewWriteOnlyFile(f, r, s); } bool FileExists(const std::string& f) { return target_->FileExists(f); } Status GetChildren(const std::string& dir, std::vector* r) { diff --git a/table/format.cc b/table/format.cc index a0a59607..d34c164e 100644 --- a/table/format.cc +++ b/table/format.cc @@ -190,7 +190,7 @@ Status ReadBlock(RandomAccessFile* file, // create / append file to hold removed blocks new_name+="/BLOCKS.bad"; - s2=options.GetEnv()->NewAppendableFile(new_name, &bad_file); + s2=options.GetEnv()->NewAppendableFile(new_name, &bad_file, 4*1024); if (s2.ok()) { // need a try/catch diff --git a/util/cache2.cc b/util/cache2.cc index ae762984..40471abb 100644 --- a/util/cache2.cc +++ b/util/cache2.cc @@ -432,14 +432,23 @@ DoubleCache::DoubleCache( : m_FileCache(NULL), m_BlockCache(NULL), m_IsInternalDB(options.is_internal_db), m_PlentySpace(true), m_Overhead(0), m_TotalAllocation(0), - m_FileTimeout(4*24*60*60), // default is 4 days + m_FileTimeout(10*24*60*60), // default is 10 days m_BlockCacheThreshold(options.block_cache_threshold), m_SizeCachedFiles(0) { // fixed allocation for recovery log and info LOG: 20M each // (with 64 or open databases, this is a serious number) // and fixed allocation for two write buffers - m_Overhead=options.write_buffer_size*2 + gMapSize*2; + size_t map_size; + + // large buffers, try for a little bit bigger than half hoping + // for two writes ... not three + if (10*1024*1024 < options.write_buffer_size) + map_size=(options.write_buffer_size/6)*4; + else + map_size=(options.write_buffer_size*12)/10; // integer multiply 1.2 + + m_Overhead=options.write_buffer_size*2 + map_size + 4096; m_TotalAllocation=gFlexCache.GetDBCacheCapacity(m_IsInternalDB); if (m_Overhead < m_TotalAllocation) diff --git a/util/env.cc b/util/env.cc index c2600e96..b2601e71 100644 --- a/util/env.cc +++ b/util/env.cc @@ -37,7 +37,7 @@ static Status DoWriteStringToFile(Env* env, const Slice& data, const std::string& fname, bool should_sync) { WritableFile* file; - Status s = env->NewWritableFile(fname, &file); + Status s = env->NewWritableFile(fname, &file, 4*1024L); if (!s.ok()) { return s; } diff --git a/util/env_posix.cc b/util/env_posix.cc index 6c0ada14..1b7765c5 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -282,11 +282,12 @@ class PosixMmapFile : public WritableFile { public: PosixMmapFile(const std::string& fname, int fd, size_t page_size, size_t file_offset=0L, - bool is_async=false) + bool is_async=false, + size_t map_size=gMapSize) : filename_(fname), fd_(fd), page_size_(page_size), - map_size_(Roundup(gMapSize, page_size)), + map_size_(Roundup(map_size, page_size)), base_(NULL), limit_(NULL), dst_(NULL), @@ -546,20 +547,22 @@ class PosixEnv : public Env { } virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) { + WritableFile** result, + size_t map_size) { Status s; const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); if (fd < 0) { *result = NULL; s = IOError(fname, errno); } else { - *result = new PosixMmapFile(fname, fd, page_size_, 0, false); + *result = new PosixMmapFile(fname, fd, page_size_, 0, false, map_size); } return s; } virtual Status NewAppendableFile(const std::string& fname, - WritableFile** result) { + WritableFile** result, + size_t map_size) { Status s; const int fd = open(fname.c_str(), O_CREAT | O_RDWR, 0644); if (fd < 0) { @@ -571,7 +574,7 @@ class PosixEnv : public Env { s = GetFileSize(fname, &size); if (s.ok()) { - *result = new PosixMmapFile(fname, fd, page_size_, size); + *result = new PosixMmapFile(fname, fd, page_size_, size, false, map_size); } // if else { @@ -583,14 +586,15 @@ class PosixEnv : public Env { } virtual Status NewWriteOnlyFile(const std::string& fname, - WritableFile** result) { + WritableFile** result, + size_t map_size) { Status s; const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); if (fd < 0) { *result = NULL; s = IOError(fname, errno); } else { - *result = new PosixMmapFile(fname, fd, page_size_, 0, true); + *result = new PosixMmapFile(fname, fd, page_size_, 0, true, map_size); } return s; } From 41b7bb87648feb81d7b2f1884b7b9fdc2cced897 Mon Sep 17 00:00:00 2001 From: MatthewVon Date: Wed, 7 May 2014 14:53:24 -0400 Subject: [PATCH 2/3] clean up redundant code to single RecoveryMmapSize() call. Add suggested option from github user licenser. Fix all unit tests. --- db/corruption_test.cc | 4 ++- db/db_bench.cc | 2 +- db/db_impl.cc | 32 +++++------------- db/db_test.cc | 4 +-- helpers/memenv/memenv.cc | 2 +- helpers/memenv/memenv_test.cc | 10 +++--- include/leveldb/env.h | 7 +++- include/leveldb/options.h | 3 ++ util/cache2.cc | 11 ++----- util/env_posix.cc | 19 +++++++++++ util/flexcache_test.cc | 62 +++++++++++++++++------------------ util/options.cc | 2 ++ util/testutil.h | 4 +-- 13 files changed, 86 insertions(+), 76 deletions(-) diff --git a/db/corruption_test.cc b/db/corruption_test.cc index 5c9e1d9d..095f5560 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -222,7 +222,9 @@ TEST(CorruptionTest, NewFileErrorDuringWrite) { const int num = 3 + (Options().write_buffer_size / kValueSize); std::string value_storage; Status s; - for (int i = 0; s.ok() && i < num; i++) { + for (int i = 0; + s.ok() && i < num && 0==env_.num_writable_file_errors_; + i++) { WriteBatch batch; batch.Put("a", Value(100, &value_storage)); s = db_->Write(WriteOptions(), &batch); diff --git a/db/db_bench.cc b/db/db_bench.cc index 0db97a41..497868a8 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -908,7 +908,7 @@ class Benchmark { char fname[100]; snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, ++heap_counter_); WritableFile* file; - Status s = Env::Default()->NewWritableFile(fname, &file); + Status s = Env::Default()->NewWritableFile(fname, &file, 2<<20); if (!s.ok()) { fprintf(stderr, "%s\n", s.ToString().c_str()); return; diff --git a/db/db_impl.cc b/db/db_impl.cc index 50a40c12..951b3988 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -131,11 +131,14 @@ Options SanitizeOptions(const std::string& dbname, ClipToRange(&result.block_size, 1<<10, 4<<20); if (src.limited_developer_mem) - { gMapSize=2*1024*1024L; - if (2*1024*1024L < result.write_buffer_size) // let unit tests be smaller - result.write_buffer_size=2*1024*1024L; - } // if + + // alternate means to change gMapSize ... more generic + if (0!=src.mmap_size) + gMapSize=src.mmap_size; + + if (gMapSize < result.write_buffer_size) // let unit tests be smaller + result.write_buffer_size=gMapSize; // Validate tiered storage options tiered_dbname=MakeTieredDbname(dbname, result); @@ -155,7 +158,6 @@ Options SanitizeOptions(const std::string& dbname, result.block_cache = block_cache; } - return result; } @@ -1862,18 +1864,11 @@ Status DBImpl::MakeRoomForWrite(bool force) { // Attempt to switch to a new memtable and trigger compaction of old assert(versions_->PrevLogNumber() == 0); uint64_t new_log_number = versions_->NewFileNumber(); - size_t map_size; - // large buffers, try for a little bit bigger than half hoping - // for two writes ... not three - if (10*1024*1024 < options_.write_buffer_size) - map_size=(options_.write_buffer_size/6)*4; - else - map_size=(options_.write_buffer_size*12)/10; // integer multiply 1.2 WritableFile* lfile = NULL; gPerfCounters->Inc(ePerfWriteNewMem); s = env_->NewWriteOnlyFile(LogFileName(dbname_, new_log_number), &lfile, - map_size); + options_.env->RecoveryMmapSize(&options_)); if (!s.ok()) { // Avoid chewing through file number space in a tight loop. versions_->ReuseFileNumber(new_log_number); @@ -2050,17 +2045,8 @@ Status DB::Open(const Options& options, const std::string& dbname, if (s.ok()) { uint64_t new_log_number = impl->versions_->NewFileNumber(); WritableFile* lfile; - size_t map_size; - - // large buffers, try for a little bit bigger than half hoping - // for two writes ... not three - if (10*1024*1024 < options.write_buffer_size) - map_size=(options.write_buffer_size/6)*4; - else - map_size=(options.write_buffer_size*12)/10; // integer multiply 1.2 - s = options.env->NewWriteOnlyFile(LogFileName(dbname, new_log_number), - &lfile, map_size); + &lfile, options.env->RecoveryMmapSize(&options)); if (s.ok()) { edit.SetLogNumber(new_log_number); impl->logfile_ = lfile; diff --git a/db/db_test.cc b/db/db_test.cc index 34595e5f..f945b57b 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -71,7 +71,7 @@ class SpecialEnv : public EnvWrapper { count_random_reads_ = false; } - Status NewWritableFile(const std::string& f, WritableFile** r) { + Status NewWritableFile(const std::string& f, WritableFile** r, size_t map_size) { class SSTableFile : public WritableFile { private: SpecialEnv* env_; @@ -105,7 +105,7 @@ class SpecialEnv : public EnvWrapper { return Status::IOError("simulated write error"); } - Status s = target()->NewWritableFile(f, r); + Status s = target()->NewWritableFile(f, r, 2<<20); if (s.ok()) { if (strstr(f.c_str(), ".sst") != NULL) { *r = new SSTableFile(this, *r); diff --git a/helpers/memenv/memenv.cc b/helpers/memenv/memenv.cc index 2082083b..efad9524 100644 --- a/helpers/memenv/memenv.cc +++ b/helpers/memenv/memenv.cc @@ -257,7 +257,7 @@ class InMemoryEnv : public EnvWrapper { } virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) { + WritableFile** result, size_t) { MutexLock lock(&mutex_); if (file_map_.find(fname) != file_map_.end()) { DeleteFileInternal(fname); diff --git a/helpers/memenv/memenv_test.cc b/helpers/memenv/memenv_test.cc index 5f336264..38ee6ac3 100644 --- a/helpers/memenv/memenv_test.cc +++ b/helpers/memenv/memenv_test.cc @@ -41,7 +41,7 @@ TEST(MemEnvTest, Basics) { ASSERT_EQ(0, children.size()); // Create a file. - ASSERT_OK(env_->NewWritableFile(dbname + "/f", &writable_file)); + ASSERT_OK(env_->NewWritableFile(dbname + "/f", &writable_file, 2<<20)); delete writable_file; // Check that the file exists. @@ -53,7 +53,7 @@ TEST(MemEnvTest, Basics) { ASSERT_EQ("f", children[0]); // Write to the file. - ASSERT_OK(env_->NewWritableFile(dbname + "/f", &writable_file)); + ASSERT_OK(env_->NewWritableFile(dbname + "/f", &writable_file, 2<<20)); ASSERT_OK(writable_file->Append("abc")); delete writable_file; @@ -98,7 +98,7 @@ TEST(MemEnvTest, ReadWrite) { ASSERT_OK(env_->CreateDir(dbname + "")); - ASSERT_OK(env_->NewWritableFile(dbname + "/f", &writable_file)); + ASSERT_OK(env_->NewWritableFile(dbname + "/f", &writable_file, 2<<20)); ASSERT_OK(writable_file->Append("hello ")); ASSERT_OK(writable_file->Append("world")); delete writable_file; @@ -145,7 +145,7 @@ TEST(MemEnvTest, Misc) { ASSERT_TRUE(!test_dir.empty()); WritableFile* writable_file; - ASSERT_OK(env_->NewWritableFile("/a/b", &writable_file)); + ASSERT_OK(env_->NewWritableFile("/a/b", &writable_file, 2<<20)); // These are no-ops, but we test they return success. ASSERT_OK(writable_file->Sync()); @@ -167,7 +167,7 @@ TEST(MemEnvTest, LargeWrite) { } WritableFile* writable_file; - ASSERT_OK(env_->NewWritableFile(dbname + "/f", &writable_file)); + ASSERT_OK(env_->NewWritableFile(dbname + "/f", &writable_file, 2<<20)); ASSERT_OK(writable_file->Append("foo")); ASSERT_OK(writable_file->Append(write_data)); delete writable_file; diff --git a/include/leveldb/env.h b/include/leveldb/env.h index 70b35b64..952467a7 100644 --- a/include/leveldb/env.h +++ b/include/leveldb/env.h @@ -22,13 +22,14 @@ namespace leveldb { +class AppendableFile; class FileLock; +class Options; class Logger; class RandomAccessFile; class SequentialFile; class Slice; class WritableFile; -class AppendableFile; class Env { public: @@ -182,6 +183,10 @@ class Env { // Riak specific: Get object that is tracking various software counters virtual PerformanceCounters * GetPerformanceCounters() {return(gPerfCounters);}; + // Riak specific: Request size of recovery memory map, potentially using + // Options data for the decision. Default 2Mbyte is Google's original size. + virtual size_t RecoveryMmapSize(const struct Options *) const {return(2*1024*1024L);}; + private: // No copying allowed Env(const Env&); diff --git a/include/leveldb/options.h b/include/leveldb/options.h index 0423e226..18abd2d8 100644 --- a/include/leveldb/options.h +++ b/include/leveldb/options.h @@ -183,6 +183,9 @@ struct Options { // Default: false bool limited_developer_mem; + // The size of each MMAped file, choose 0 for the default (20M) + uint64_t mmap_size; + // Riak option to adjust aggressive delete behavior. // - zero disables aggressive delete // - positive value indicates how many deletes must exist diff --git a/util/cache2.cc b/util/cache2.cc index 40471abb..3a22ebd6 100644 --- a/util/cache2.cc +++ b/util/cache2.cc @@ -439,16 +439,9 @@ DoubleCache::DoubleCache( // fixed allocation for recovery log and info LOG: 20M each // (with 64 or open databases, this is a serious number) // and fixed allocation for two write buffers - size_t map_size; - // large buffers, try for a little bit bigger than half hoping - // for two writes ... not three - if (10*1024*1024 < options.write_buffer_size) - map_size=(options.write_buffer_size/6)*4; - else - map_size=(options.write_buffer_size*12)/10; // integer multiply 1.2 - - m_Overhead=options.write_buffer_size*2 + map_size + 4096; + m_Overhead=options.write_buffer_size*2 + + options.env->RecoveryMmapSize(&options) + 4096; m_TotalAllocation=gFlexCache.GetDBCacheCapacity(m_IsInternalDB); if (m_Overhead < m_TotalAllocation) diff --git a/util/env_posix.cc b/util/env_posix.cc index 1b7765c5..08d7f605 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -779,6 +779,25 @@ class PosixEnv : public Env { {return(gImmThreads->m_WorkQueueAtomic + gWriteThreads->m_WorkQueueAtomic + gLevel0Threads->m_WorkQueueAtomic + gCompactionThreads->m_WorkQueueAtomic);}; + virtual size_t RecoveryMmapSize(const struct Options * options) const + { + size_t map_size; + + if (NULL!=options) + { + // large buffers, try for a little bit bigger than half hoping + // for two writes ... not three + if (10*1024*1024 < options->write_buffer_size) + map_size=(options->write_buffer_size/6)*4; + else + map_size=(options->write_buffer_size*12)/10; // integer multiply 1.2 + } // if + else + map_size=2*1024*1024L; + + return(map_size); + }; + private: void PthreadCall(const char* label, int result) { diff --git a/util/flexcache_test.cc b/util/flexcache_test.cc index b31ff30a..d4b49bd0 100644 --- a/util/flexcache_test.cc +++ b/util/flexcache_test.cc @@ -44,8 +44,8 @@ TEST(FlexCacheTest, UserSizing) { options.create_if_missing=true; options.filter_policy=NewBloomFilterPolicy2(16); - options.total_leveldb_mem=300*1024*1024L; - options.write_buffer_size=4*1024*1024L; + options.total_leveldb_mem=1000*1024*1024L; + options.write_buffer_size=45*1024*1024L; // verify accounting with one database dbname = test::TmpDir() + "/flexcache0"; @@ -54,10 +54,10 @@ TEST(FlexCacheTest, UserSizing) { ASSERT_EQ(1, DBList()->GetDBCount(false)); db[0]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(252*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(922742784L, atoi(value.c_str())); db[0]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(250*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(920645632L, atoi(value.c_str())); // verify accounting with three databases dbname = test::TmpDir() + "/flexcache1"; @@ -69,32 +69,32 @@ TEST(FlexCacheTest, UserSizing) { ASSERT_EQ(3, DBList()->GetDBCount(false)); db[0]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(52*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(223692117L, atoi(value.c_str())); db[0]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(50*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(221594965L, atoi(value.c_str())); db[1]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(52*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(223692117L, atoi(value.c_str())); db[1]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(50*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(221594965L, atoi(value.c_str())); db[2]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(52*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(223692117L, atoi(value.c_str())); db[2]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(50*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(221594965L, atoi(value.c_str())); // verify accounting after two databases go away delete db[0]; delete db[2]; db[1]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(252*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(922742784L, atoi(value.c_str())); db[1]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(250*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(920645632L, atoi(value.c_str())); // rebuild from zero to ten databases, verify accounting delete db[1]; @@ -112,10 +112,10 @@ TEST(FlexCacheTest, UserSizing) { for(loop=0; loop<10; ++loop) { db[loop]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(252*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(188739584l, atoi(value.c_str())); db[loop]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(250*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(186642432L, atoi(value.c_str())); } // for for (loop=0; loop<10; ++loop) @@ -141,8 +141,8 @@ TEST(FlexCacheTest, MixedSizing) { options.create_if_missing=true; options.filter_policy=NewBloomFilterPolicy2(16); - options.total_leveldb_mem=300*1024*1024L; - options.write_buffer_size=4*1024*1024L; + options.total_leveldb_mem=1000*1024*1024L; + options.write_buffer_size=45*1024*1024L; // verify accounting with one user & one internal dbname = test::TmpDir() + "/flexcache0"; @@ -152,46 +152,46 @@ TEST(FlexCacheTest, MixedSizing) { ASSERT_EQ(0, DBList()->GetDBCount(true)); db[0]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(252*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(922742784l, atoi(value.c_str())); db[0]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(250*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(920645632L, atoi(value.c_str())); - // raise memory and add internal + // add internal dbname = test::TmpDir() + "/flexcache1"; options.is_internal_db=true; - options.total_leveldb_mem=600*1024*1024L; + options.total_leveldb_mem=1600*1024*1024L; st=DB::Open(options, dbname, &db[1]); ASSERT_OK(st); ASSERT_EQ(1, DBList()->GetDBCount(false)); ASSERT_EQ(1, DBList()->GetDBCount(true)); db[0]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(432*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(1216344064l, atoi(value.c_str())); db[0]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(430*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(1214246912L, atoi(value.c_str())); db[1]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(72*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(209711104l, atoi(value.c_str())); db[1]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(70*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(207613952L, atoi(value.c_str())); delete db[0]; ASSERT_EQ(0, DBList()->GetDBCount(false)); ASSERT_EQ(1, DBList()->GetDBCount(true)); db[1]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(72*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(209711104L, atoi(value.c_str())); db[1]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(70*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(207613952L, atoi(value.c_str())); delete db[1]; // rebuild from zero to ten databases, verify accounting - options.total_leveldb_mem=3000*1024*1024L; + options.total_leveldb_mem=4000*1024*1024L; for(loop=0; loop<10; ++loop) { @@ -210,18 +210,18 @@ TEST(FlexCacheTest, MixedSizing) { if (0==(loop %2)) { db[loop]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(432*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(545255424l, atoi(value.c_str())); db[loop]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(430*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(543158272L, atoi(value.c_str())); } // if else { db[loop]->GetProperty("leveldb.block-cache", &value); - ASSERT_EQ(72*1024*1024l, atoi(value.c_str())); + ASSERT_EQ(41938944l, atoi(value.c_str())); db[loop]->GetProperty("leveldb.file-cache", &value); - ASSERT_EQ(70*1024*1024L, atoi(value.c_str())); + ASSERT_EQ(39841792L, atoi(value.c_str())); } // else } // for diff --git a/util/options.cc b/util/options.cc index b20ee0ed..06c03e26 100644 --- a/util/options.cc +++ b/util/options.cc @@ -37,6 +37,7 @@ Options::Options() total_leveldb_mem(0), block_cache_threshold(32<<20), limited_developer_mem(false), + mmap_size(0), delete_threshold(1000), fadvise_willneed(false), tiered_slow_level(0) @@ -68,6 +69,7 @@ Options::Dump( Log(log," Options.total_leveldb_mem: %" PRIu64, total_leveldb_mem); Log(log," Options.block_cache_threshold: %" PRIu64, block_cache_threshold); Log(log," Options.limited_developer_mem: %s", limited_developer_mem ? "true" : "false"); + Log(log," Options.mmap_size: %" PRIu64, mmap_size); Log(log," Options.delete_threshold: %" PRIu64, delete_threshold); Log(log," Options.fadvise_willneed: %s", fadvise_willneed ? "true" : "false"); Log(log," Options.tiered_slow_level: %d", tiered_slow_level); diff --git a/util/testutil.h b/util/testutil.h index 824e655b..e84323b7 100644 --- a/util/testutil.h +++ b/util/testutil.h @@ -37,13 +37,13 @@ class ErrorEnv : public EnvWrapper { num_writable_file_errors_(0) { } virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) { + WritableFile** result, size_t map_size) { if (writable_file_error_) { ++num_writable_file_errors_; *result = NULL; return Status::IOError(fname, "fake error"); } - return target()->NewWritableFile(fname, result); + return target()->NewWritableFile(fname, result, map_size); } }; From 237fcfea7b99315bef0a201b6e9ff3d70e53c94e Mon Sep 17 00:00:00 2001 From: MatthewVon Date: Thu, 8 May 2014 11:00:35 -0400 Subject: [PATCH 3/3] oops, forgot on place that needed RecoveryMmapSize() added. Nice to have a good code reviewer. Thank you ajs. --- db/builder.cc | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/db/builder.cc b/db/builder.cc index 0b9be145..e7753d0d 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -39,16 +39,9 @@ Status BuildTable(const std::string& dbname, std::string fname = TableFileName(options, meta->number, meta->level); if (iter->Valid()) { WritableFile* file; - size_t map_size; - // large buffers, try for a little bit bigger than half hoping - // for two writes ... not three - if (10*1024*1024 < options.write_buffer_size) - map_size=(options.write_buffer_size/6)*4; - else - map_size=(options.write_buffer_size*12)/10; // integer multiply 1.2 - - s = env->NewWritableFile(fname, &file, map_size); + s = env->NewWritableFile(fname, &file, + env->RecoveryMmapSize(&options)); if (!s.ok()) { return s; }