facebook · pdillinger · Nov 26, 2019 · Nov 26, 2019 · Nov 26, 2019 · Nov 26, 2019
diff --git a/HISTORY.md b/HISTORY.md
@@ -6,6 +6,7 @@
 * With FIFO compaction style, options.periodic_compaction_seconds will have the same meaning as options.ttl. Whichever stricter will be used. With the default options.periodic_compaction_seconds value with options.ttl's default of 0, RocksDB will give a default of 30 days.
 * Added an API GetCreationTimeOfOldestFile(uint64_t* creation_time) to get the file_creation_time of the oldest SST file in the DB.
 * An unlikely usage of FilterPolicy is no longer supported. Calling GetFilterBitsBuilder() on the FilterPolicy returned by NewBloomFilterPolicy will now cause an assertion violation in debug builds, because RocksDB has internally migrated to a more elaborate interface that is expected to evolve further. Custom implementations of FilterPolicy should work as before, except those wrapping the return of NewBloomFilterPolicy, which will require a new override of a protected function in FilterPolicy.
+* NewBloomFilterPolicy now takes bits_per_key as a double instead of an int. This permits finer control over the memory vs. accuracy trade-off in the new Bloom filter implementation and should not change source code compatibility.
 * The option BackupableDBOptions::max_valid_backups_to_open is now only used when opening BackupEngineReadOnly. When opening a read/write BackupEngine, anything but the default value logs a warning and is treated as the default. This change ensures that backup deletion has proper accounting of shared files to ensure they are deleted when no longer referenced by a backup.
 
 ### Default Option Changes
@@ -16,7 +17,7 @@
 * Universal compaction to support options.periodic_compaction_seconds. A full compaction will be triggered if any file is over the threshold.
 * `GetLiveFilesMetaData` and `GetColumnFamilyMetaData` now expose the file number of SST files as well as the oldest blob file referenced by each SST.
 * A batched MultiGet API (DB::MultiGet()) that supports retrieving keys from multiple column families.
-* Full and partitioned filters in the block-based table use an improved Bloom filter implementation, enabled with format_version 5 (or above) because previous releases cannot read this filter. This replacement is faster and more accurate, especially for high bits per key or millions of keys in a single (full) filter. For example, the new Bloom filter has a lower false positive rate at 16 bits per key than the old one at 100 bits per key.
+* Full and partitioned filters in the block-based table use an improved Bloom filter implementation, enabled with format_version 5 (or above) because previous releases cannot read this filter. This replacement is faster and more accurate, especially for high bits per key or millions of keys in a single (full) filter. For example, the new Bloom filter has the same false postive rate at 9.55 bits per key as the old one at 10 bits per key, and a lower false positive rate at 16 bits per key than the old one at 100 bits per key.
 * Added AVX2 instructions to USE_SSE builds to accelerate the new Bloom filter and XXH3-based hash function on compatible x86_64 platforms (Haswell and later, ~2014).
 * Support options.ttl with options.max_open_files = -1. File's oldest ancester time will be written to manifest. If it is availalbe, this information will be used instead of creation_time in table properties.
 * Setting options.ttl for universal compaction now has the same meaning as setting periodic_compaction_seconds.

diff --git a/include/rocksdb/filter_policy.h b/include/rocksdb/filter_policy.h
@@ -151,8 +151,12 @@ class FilterPolicy {
 // Return a new filter policy that uses a bloom filter with approximately
 // the specified number of bits per key.
 //
-// bits_per_key: bits per key in bloom filter. A good value for bits_per_key
-// is 10, which yields a filter with ~ 1% false positive rate.
+// bits_per_key: average bits allocated per key in bloom filter. A good
+// choice is 9.9, which yields a filter with ~ 1% false positive rate.
+// When format_version < 5, the value will be rounded to the nearest
+// integer. Recommend using no more than three decimal digits after the
+// decimal point, as in 6.667.
+//
 // use_block_based_builder: use deprecated block based filter (true) rather
 // than full or partitioned filter (false).
 //
@@ -167,5 +171,5 @@ class FilterPolicy {
 // FilterPolicy (like NewBloomFilterPolicy) that does not ignore
 // trailing spaces in keys.
 extern const FilterPolicy* NewBloomFilterPolicy(
-    int bits_per_key, bool use_block_based_builder = false);
+    double bits_per_key, bool use_block_based_builder = false);
 }  // namespace rocksdb
diff --git a/java/rocksjni/filter.cc b/java/rocksjni/filter.cc
@@ -19,10 +19,10 @@
 /*
  * Class:     org_rocksdb_BloomFilter
  * Method:    createBloomFilter
- * Signature: (IZ)J
+ * Signature: (DZ)J
  */
 jlong Java_org_rocksdb_BloomFilter_createNewBloomFilter(
-    JNIEnv* /*env*/, jclass /*jcls*/, jint bits_per_key,
+    JNIEnv* /*env*/, jclass /*jcls*/, jdouble bits_per_key,
     jboolean use_block_base_builder) {
   auto* sptr_filter = new std::shared_ptr<const rocksdb::FilterPolicy>(
       rocksdb::NewBloomFilterPolicy(bits_per_key, use_block_base_builder));

diff --git a/java/src/main/java/org/rocksdb/BloomFilter.java b/java/src/main/java/org/rocksdb/BloomFilter.java
@@ -20,7 +20,7 @@
  */
 public class BloomFilter extends Filter {
 
-  private static final int DEFAULT_BITS_PER_KEY = 10;
+  private static final double DEFAULT_BITS_PER_KEY = 10.0;
   private static final boolean DEFAULT_MODE = true;
 
   /**
@@ -39,15 +39,15 @@ public BloomFilter() {
    *
    * <p>
    * bits_per_key: bits per key in bloom filter. A good value for bits_per_key
-   * is 10, which yields a filter with ~ 1% false positive rate.
+   * is 9.9, which yields a filter with ~ 1% false positive rate.
    * </p>
    * <p>
    * Callers must delete the result after any database that is using the
    * result has been closed.</p>
    *
    * @param bitsPerKey number of bits to use
    */
-  public BloomFilter(final int bitsPerKey) {
+  public BloomFilter(final double bitsPerKey) {
     this(bitsPerKey, DEFAULT_MODE);
   }
 
@@ -70,10 +70,10 @@ public BloomFilter(final int bitsPerKey) {
    * @param bitsPerKey number of bits to use
    * @param useBlockBasedMode use block based mode or full filter mode
    */
-  public BloomFilter(final int bitsPerKey, final boolean useBlockBasedMode) {
+  public BloomFilter(final double bitsPerKey, final boolean useBlockBasedMode) {
     super(createNewBloomFilter(bitsPerKey, useBlockBasedMode));
   }
 
-  private native static long createNewBloomFilter(final int bitsKeyKey,
+  private native static long createNewBloomFilter(final double bitsKeyKey,
       final boolean useBlockBasedMode);
 }
diff --git a/options/options_test.cc b/options/options_test.cc
@@ -23,6 +23,7 @@
 #include "rocksdb/memtablerep.h"
 #include "rocksdb/utilities/leveldb_options.h"
 #include "rocksdb/utilities/object_registry.h"
+#include "table/block_based/filter_policy_internal.h"
 #include "test_util/testharness.h"
 #include "test_util/testutil.h"
 #include "util/random.h"
@@ -515,13 +516,15 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
   BlockBasedTableOptions table_opt;
   BlockBasedTableOptions new_opt;
   // make sure default values are overwritten by something else
-  ASSERT_OK(GetBlockBasedTableOptionsFromString(table_opt,
-            "cache_index_and_filter_blocks=1;index_type=kHashSearch;"
-            "checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;"
-            "block_cache=1M;block_cache_compressed=1k;block_size=1024;"
-            "block_size_deviation=8;block_restart_interval=4;"
-            "filter_policy=bloomfilter:4:true;whole_key_filtering=1;",
-            &new_opt));
+  ASSERT_OK(GetBlockBasedTableOptionsFromString(
+      table_opt,
+      "cache_index_and_filter_blocks=1;index_type=kHashSearch;"
+      "checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;"
+      "block_cache=1M;block_cache_compressed=1k;block_size=1024;"
+      "block_size_deviation=8;block_restart_interval=4;"
+      "format_version=5;whole_key_filtering=1;"
+      "filter_policy=bloomfilter:4.567:false;",
+      &new_opt));
   ASSERT_TRUE(new_opt.cache_index_and_filter_blocks);
   ASSERT_EQ(new_opt.index_type, BlockBasedTableOptions::kHashSearch);
   ASSERT_EQ(new_opt.checksum, ChecksumType::kxxHash);
@@ -534,7 +537,13 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
   ASSERT_EQ(new_opt.block_size, 1024UL);
   ASSERT_EQ(new_opt.block_size_deviation, 8);
   ASSERT_EQ(new_opt.block_restart_interval, 4);
+  ASSERT_EQ(new_opt.format_version, 5U);
+  ASSERT_EQ(new_opt.whole_key_filtering, true);
   ASSERT_TRUE(new_opt.filter_policy != nullptr);
+  const BloomFilterPolicy& bfp =
+      dynamic_cast<const BloomFilterPolicy&>(*new_opt.filter_policy);
+  EXPECT_EQ(bfp.GetMillibitsPerKey(), 4567);
+  EXPECT_EQ(bfp.GetWholeBitsPerKey(), 5);
 
   // unknown option
   ASSERT_NOK(GetBlockBasedTableOptionsFromString(table_opt,

diff --git a/table/block_based/block_based_table_factory.cc b/table/block_based/block_based_table_factory.cc
@@ -510,8 +510,8 @@ std::string ParseBlockBasedTableOption(const std::string& name,
       if (pos == std::string::npos) {
         return "Invalid filter policy config, missing bits_per_key";
       }
-      int bits_per_key =
-          ParseInt(trim(value.substr(kName.size(), pos - kName.size())));
+      double bits_per_key =
+          ParseDouble(trim(value.substr(kName.size(), pos - kName.size())));
       bool use_block_based_builder =
           ParseBoolean("use_block_based_builder", trim(value.substr(pos + 1)));
       new_options->filter_policy.reset(

diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc
@@ -27,9 +27,10 @@ namespace {
 // See description in FastLocalBloomImpl
 class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {
  public:
-  FastLocalBloomBitsBuilder(const int bits_per_key, const int num_probes)
-      : bits_per_key_(bits_per_key), num_probes_(num_probes) {
-    assert(bits_per_key_);
+  FastLocalBloomBitsBuilder(const int millibits_per_key)
+      : millibits_per_key_(millibits_per_key),
+        num_probes_(FastLocalBloomImpl::ChooseNumProbes(millibits_per_key_)) {
+    assert(millibits_per_key >= 1000);
   }
 
   // No Copy allowed
@@ -77,14 +78,15 @@ class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {
 
   int CalculateNumEntry(const uint32_t bytes) override {
     uint32_t bytes_no_meta = bytes >= 5u ? bytes - 5u : 0;
-    return static_cast<int>(uint64_t{8} * bytes_no_meta / bits_per_key_);
+    return static_cast<int>(uint64_t{8000} * bytes_no_meta /
+                            millibits_per_key_);
   }
 
   uint32_t CalculateSpace(const int num_entry) override {
     uint32_t num_cache_lines = 0;
-    if (bits_per_key_ > 0 && num_entry > 0) {
+    if (millibits_per_key_ > 0 && num_entry > 0) {
       num_cache_lines = static_cast<uint32_t>(
-          (int64_t{num_entry} * bits_per_key_ + 511) / 512);
+          (int64_t{num_entry} * millibits_per_key_ + 511999) / 512000);
     }
     return num_cache_lines * 64 + /*metadata*/ 5;
   }
@@ -136,7 +138,7 @@ class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {
     }
   }
 
-  int bits_per_key_;
+  int millibits_per_key_;
   int num_probes_;
   std::vector<uint64_t> hash_entries_;
 };
@@ -187,7 +189,7 @@ using LegacyBloomImpl = LegacyLocalityBloomImpl</*ExtraRotates*/ false>;
 
 class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder {
  public:
-  explicit LegacyBloomBitsBuilder(const int bits_per_key, const int num_probes);
+  explicit LegacyBloomBitsBuilder(const int bits_per_key);
 
   // No Copy allowed
   LegacyBloomBitsBuilder(const LegacyBloomBitsBuilder&) = delete;
@@ -208,7 +210,6 @@ class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder {
   }
 
  private:
-  friend class FullFilterBlockTest_DuplicateEntries_Test;
   int bits_per_key_;
   int num_probes_;
   std::vector<uint32_t> hash_entries_;
@@ -228,9 +229,9 @@ class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder {
   void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits);
 };
 
-LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key,
-                                               const int num_probes)
-    : bits_per_key_(bits_per_key), num_probes_(num_probes) {
+LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key)
+    : bits_per_key_(bits_per_key),
+      num_probes_(LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key_)) {
   assert(bits_per_key_);
 }
 
@@ -412,12 +413,24 @@ const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllUserModes = {
     kAuto,
 };
 
-BloomFilterPolicy::BloomFilterPolicy(int bits_per_key, Mode mode)
-    : bits_per_key_(bits_per_key), mode_(mode) {
-  // We intentionally round down to reduce probing cost a little bit
-  num_probes_ = static_cast<int>(bits_per_key_ * 0.69);  // 0.69 =~ ln(2)
-  if (num_probes_ < 1) num_probes_ = 1;
-  if (num_probes_ > 30) num_probes_ = 30;
+BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
+    : mode_(mode) {
+  // Sanitize bits_per_key
+  if (bits_per_key < 1.0) {
+    bits_per_key = 1.0;
+  } else if (!(bits_per_key < 100.0)) {  // including NaN
+    bits_per_key = 100.0;
+  }
+
+  // Includes a nudge toward rounding up, to ensure on all platforms
+  // that doubles specified with three decimal digits after the decimal
+  // point are interpreted accurately.
+  millibits_per_key_ = static_cast<int>(bits_per_key * 1000.0 + 0.500001);
+
+  // For better or worse, this is a rounding up of a nudged rounding up,
+  // e.g. 7.4999999999999 will round up to 8, but that provides more
+  // predictability against small arithmetic errors in floating point.
+  whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000;
 }
 
 BloomFilterPolicy::~BloomFilterPolicy() {}
@@ -433,7 +446,7 @@ void BloomFilterPolicy::CreateFilter(const Slice* keys, int n,
   assert(mode_ == kDeprecatedBlock);
 
   // Compute bloom filter size (in both bits and bytes)
-  uint32_t bits = static_cast<uint32_t>(n * bits_per_key_);
+  uint32_t bits = static_cast<uint32_t>(n * whole_bits_per_key_);
 
   // For small n, we can see a very high false positive rate.  Fix it
   // by enforcing a minimum bloom filter length.
@@ -442,12 +455,15 @@ void BloomFilterPolicy::CreateFilter(const Slice* keys, int n,
   uint32_t bytes = (bits + 7) / 8;
   bits = bytes * 8;
 
+  int num_probes =
+      LegacyNoLocalityBloomImpl::ChooseNumProbes(whole_bits_per_key_);
+
   const size_t init_size = dst->size();
   dst->resize(init_size + bytes, 0);
-  dst->push_back(static_cast<char>(num_probes_));  // Remember # of probes
+  dst->push_back(static_cast<char>(num_probes));  // Remember # of probes
   char* array = &(*dst)[init_size];
   for (int i = 0; i < n; i++) {
-    LegacyNoLocalityBloomImpl::AddHash(BloomHash(keys[i]), bits, num_probes_,
+    LegacyNoLocalityBloomImpl::AddHash(BloomHash(keys[i]), bits, num_probes,
                                        array);
   }
 }
@@ -470,7 +486,7 @@ bool BloomFilterPolicy::KeyMayMatch(const Slice& key,
     // Consider it a match.
     return true;
   }
-  // NB: using k not num_probes_
+  // NB: using stored k not num_probes for whole_bits_per_key_
   return LegacyNoLocalityBloomImpl::HashMayMatch(BloomHash(key), bits, k,
                                                  array);
 }
@@ -504,9 +520,9 @@ FilterBitsBuilder* BloomFilterPolicy::GetFilterBitsBuilderInternal(
       case kDeprecatedBlock:
         return nullptr;
       case kFastLocalBloom:
-        return new FastLocalBloomBitsBuilder(bits_per_key_, num_probes_);
+        return new FastLocalBloomBitsBuilder(millibits_per_key_);
       case kLegacyBloom:
-        return new LegacyBloomBitsBuilder(bits_per_key_, num_probes_);
+        return new LegacyBloomBitsBuilder(whole_bits_per_key_);
     }
   }
   assert(false);
@@ -649,7 +665,7 @@ FilterBitsReader* BloomFilterPolicy::GetBloomBitsReader(
   return new AlwaysTrueFilter();
 }
 
-const FilterPolicy* NewBloomFilterPolicy(int bits_per_key,
+const FilterPolicy* NewBloomFilterPolicy(double bits_per_key,
                                          bool use_block_based_builder) {
   BloomFilterPolicy::Mode m;
   if (use_block_based_builder) {

diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h
@@ -91,7 +91,7 @@ class BloomFilterPolicy : public FilterPolicy {
   // tests should prefer using NewBloomFilterPolicy (user-exposed).
   static const std::vector<Mode> kAllUserModes;
 
-  explicit BloomFilterPolicy(int bits_per_key, Mode mode);
+  explicit BloomFilterPolicy(double bits_per_key, Mode mode);
 
   ~BloomFilterPolicy() override;
 
@@ -111,6 +111,11 @@ class BloomFilterPolicy : public FilterPolicy {
   // chosen for this BloomFilterPolicy. Not compatible with CreateFilter.
   FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
 
+  // Essentially for testing only: configured millibits/key
+  int GetMillibitsPerKey() const { return millibits_per_key_; }
+  // Essentially for testing only: legacy whole bits/key
+  int GetWholeBitsPerKey() const { return whole_bits_per_key_; }
+
  protected:
   // To use this function, call FilterBuildingContext::GetBuilder().
   //
@@ -120,8 +125,16 @@ class BloomFilterPolicy : public FilterPolicy {
       const FilterBuildingContext&) const override;
 
  private:
-  int bits_per_key_;
-  int num_probes_;
+  // Newer filters support fractional bits per key. For predictable behavior
+  // of 0.001-precision values across floating point implementations, we
+  // round to thousandths of a bit (on average) per key.
+  int millibits_per_key_;
+
+  // Older filters round to whole number bits per key. (There *should* be no
+  // compatibility issue with fractional bits per key, but preserving old
+  // behavior with format_version < 5 just in case.)
+  int whole_bits_per_key_;
+
   // Selected mode (a specific implementation or way of selecting an
   // implementation) for building new SST filters.
   Mode mode_;