Skip to content

Commit

Permalink
0.2.8
Browse files Browse the repository at this point in the history
  • Loading branch information
al8n committed Dec 2, 2023
1 parent 0746591 commit b21f0d1
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 78 deletions.
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "caches"
version = "0.2.6"
version = "0.2.8"
authors = ["Al Liu <scygliu1@gmail.com>"]
description = "This is a Rust implementation for popular caches (support no_std)."
homepage = "https://github.com/al8n/caches-rs"
Expand Down Expand Up @@ -37,10 +37,11 @@ getrandom = { version = "0.2", features = ["js"] }

[features]
default = ["std"]
std = ["rand", "rand/std", "rand/std_rng"]
std = ["rand", "rand/std", "rand/std_rng", "bitvec/std"]
nightly = ["rand/nightly"]

[dependencies]
bitvec = { version = "1", default-features = false }
hashbrown = { version = "0.14", optional = true }
rand = {version = "0.8", optional = true}

Expand Down
1 change: 1 addition & 0 deletions src/lfu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ impl<K: Hash + Eq> KeyHasher<K> for DefaultKeyHasher<K> {
{
let mut s = self.hasher.build_hasher();
key.hash(&mut s);
#[allow(clippy::manual_hash_one)]
s.finish()
}
}
22 changes: 11 additions & 11 deletions src/lfu/tinylfu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -546,17 +546,17 @@ pub(crate) mod test {
}

// TODO: fix the bug caused by random
// #[test]
// fn test_increment_keys() {
// let mut l: TinyLFU<u64> = TinyLFU::new(16, 16, 0.01).unwrap();

// assert_eq!(l.samples, 16);
// l.increment_keys(&[&1, &2, &2, &3, &3, &3]);
// assert_eq!(l.estimate(&1), 1);
// assert_eq!(l.estimate(&2), 2);
// assert_eq!(l.estimate(&3), 3);
// assert_eq!(6, l.w);
// }
#[test]
fn test_increment_keys() {
let mut l: TinyLFU<u64> = TinyLFU::new(16, 16, 0.01).unwrap();

assert_eq!(l.samples, 16);
l.increment_keys(&[&1, &2, &2, &3, &3, &3]);
assert_eq!(l.estimate(&1), 1);
assert_eq!(l.estimate(&2), 2);
assert_eq!(l.estimate(&3), 3);
assert_eq!(6, l.w);
}

#[test]
fn test_increment_hashed_keys() {
Expand Down
107 changes: 42 additions & 65 deletions src/lfu/tinylfu/bloom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,26 @@
//!
//! I claim no additional copyright over the original implementation.
// use bitvec::vec::BitVec;
use alloc::{vec, vec::Vec};

const LN_2: f64 = std::f64::consts::LN_2;
const LN_2: f64 = core::f64::consts::LN_2;

struct Size {
size: u64,
exp: u64,
}

fn get_size(n: u64) -> Size {
let mut n = n;
if n < 512 {
n = 512;
}

let mut size = 1u64;
let mut exp = 0u64;
while size < n {
fn get_size(ui64: u64) -> (u64, u64) {
let ui64 = if ui64 < 512 { 512 } else { ui64 };
let mut size = 1;
let mut exponent = 0;
while size < ui64 {
size <<= 1;
exp += 1;
exponent += 1;
}

Size { size, exp }
(size, exponent)
}

struct EntriesLocs {
entries: u64,
locs: u64,
}

fn calc_size_by_wrong_positives(num_entries: usize, wrongs: f64) -> EntriesLocs {
let num_entries = num_entries as f64;
let size = -1f64 * num_entries * wrongs.ln() / LN_2.powf(2f64);
let locs = (LN_2 * size / num_entries).ceil();

EntriesLocs {
entries: size as u64,
locs: locs as u64,
}
fn calc_size_by_wrong_positives(num_entries: f64, wrongs: f64) -> (u64, u64) {
let size = (-num_entries * wrongs.ln() / LN_2.powi(2)).ceil() as u64;
let locs = (LN_2 * size as f64 / num_entries).ceil() as u64;
(size, locs)
}

/// Bloom filter
Expand All @@ -58,35 +39,29 @@ pub(crate) struct Bloom {
}

impl Bloom {
pub fn new(cap: usize, false_positive_ratio: f64) -> Self {
let entries_locs = {
if false_positive_ratio < 1f64 {
calc_size_by_wrong_positives(cap, false_positive_ratio)
} else {
EntriesLocs {
entries: cap as u64,
locs: false_positive_ratio as u64,
}
}
pub fn new(entries: usize, locs_or_err: f64) -> Self {
let (entries, locs) = if locs_or_err < 1.0 {
calc_size_by_wrong_positives(entries as f64, locs_or_err)
} else {
(entries as u64, locs_or_err as u64)
};

let size = get_size(entries_locs.entries);

Self {
bitset: vec![0; (size.size >> 6) as usize],
let (size, exponent) = get_size(entries);
Bloom {
bitset: vec![0; size as usize >> 6],
elem_num: 0,
size: size.size - 1,
size_exp: size.exp,
set_locs: entries_locs.locs,
shift: 64 - size.exp,
size_exp: exponent,
size: size - 1,
set_locs: locs,
shift: 64 - exponent,
}
}

/// `size` makes Bloom filter with as bitset of size sz.
#[inline]
#[allow(dead_code)]
pub fn size(&mut self, sz: usize) {
self.bitset = vec![0; sz >> 6]
self.bitset.resize(sz >> 6, 0)
}

/// Returns the exp of the size
Expand All @@ -98,32 +73,32 @@ impl Bloom {

/// `clear` clear the `Bloom` filter
pub fn clear(&mut self) {
self.bitset.iter_mut().for_each(|v| *v = 0);
self.bitset.fill(0)
}

/// `set` sets the bit[idx] of bitset
pub fn set(&mut self, idx: usize) {
let ptr = (self.bitset.as_mut_ptr() as usize + ((idx % 64) >> 3)) as *mut u8;
unsafe {
*ptr |= 1 << (idx % 8);
}
pub fn set(&mut self, idx: u64) {
let array_index = (idx >> 6) as usize;
let bit_index = idx % 64;
self.bitset[array_index] |= 1 << bit_index;
}

/// `is_set` checks if bit[idx] of bitset is set, returns true/false.
pub fn is_set(&self, idx: usize) -> bool {
let ptr = (self.bitset.as_ptr() as usize + ((idx % 64) >> 3)) as *const u8;
let r = unsafe { *ptr >> (idx % 8) } & 1;
r == 1
pub fn is_set(&self, idx: u64) -> bool {
let array_index = (idx >> 6) as usize;
let bit_index = idx % 64;
(self.bitset[array_index] & (1 << bit_index)) != 0
}

/// `add` adds hash of a key to the bloom filter
pub fn add(&mut self, hash: u64) {
let h = hash >> self.shift;
let l = (hash << self.shift) >> self.shift;
(0..self.set_locs).for_each(|i| {
self.set(((h + i * l) & self.size) as usize);
for i in 0..self.set_locs {
let index = (h + i * l) & self.size;
self.set(index);
self.elem_num += 1;
});
}
}

/// `contains` checks if bit(s) for entry hash is/are set,
Expand All @@ -132,7 +107,8 @@ impl Bloom {
let h = hash >> self.shift;
let l = (hash << self.shift) >> self.shift;
for i in 0..self.set_locs {
if !self.is_set(((h + i * l) & self.size) as usize) {
let index = (h + i * l) & self.size;
if !self.is_set(index) {
return false;
}
}
Expand Down Expand Up @@ -164,6 +140,7 @@ impl Bloom {
#[cfg(test)]
mod test {
use super::*;
use alloc::vec::Vec;
use rand::distributions::Alphanumeric;
use rand::{thread_rng, Rng};
use std::collections::hash_map::DefaultHasher;
Expand Down
1 change: 1 addition & 0 deletions src/lfu/tinylfu/sketch/count_min_sketch_core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use core::ops::{Index, IndexMut};

/// `CountMinSketch` is a small conservative-update count-min sketch
/// implementation with 4-bit counters
#[derive(Clone)]
pub(crate) struct CountMinSketch {
rows: [CountMinRow; DEPTH],
mask: u64,
Expand Down

0 comments on commit b21f0d1

Please sign in to comment.