Skip to content

Commit

Permalink
Added union method to HLL (#3293)
Browse files Browse the repository at this point in the history
* While several other methods relative to the joint estimation were
available, union estimation was not.
* Added union method and relative test suite addition.
  • Loading branch information
LucaCappelletti94 authored Aug 18, 2024
1 parent bc22970 commit 732fa36
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions src/core/src/sketch/hyperloglog/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ impl HyperLogLog {
estimators::mle(&counts, self.p, self.q, 0.01) as usize
}

pub fn union(&self, other: &HyperLogLog) -> usize {
let (only_a, only_b, intersection) =
estimators::joint_mle(&self.registers, &other.registers, self.p, self.q);

only_a + only_b + intersection
}

pub fn similarity(&self, other: &HyperLogLog) -> f64 {
let (only_a, only_b, intersection) =
estimators::joint_mle(&self.registers, &other.registers, self.p, self.q);
Expand Down Expand Up @@ -272,13 +279,12 @@ mod test {
const N_UNIQUE_H1: usize = 500741;
const N_UNIQUE_H2: usize = 995845;
const N_UNIQUE_U: usize = 995845;
const INTERSECTION: usize = 500838;

const SIMILARITY: f64 = 0.502783;
const CONTAINMENT_H1: f64 = 1.;
const CONTAINMENT_H2: f64 = 0.502783;

const INTERSECTION: usize = 500838;

let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
filename.push("../../tests/test-data/genome-s10.fa.gz");

Expand Down Expand Up @@ -321,6 +327,9 @@ mod test {
let abs_error = (1. - (hll2.cardinality() as f64 / N_UNIQUE_H2 as f64)).abs();
assert!(abs_error < ERR_RATE, "{}", abs_error);

let abs_error = (1. - (hll1.union(&hll2) as f64 / N_UNIQUE_U as f64)).abs();
assert!(abs_error < ERR_RATE, "{}", abs_error);

let similarity = hll1.similarity(&hll2);
let abs_error = (1. - (similarity / SIMILARITY)).abs();
assert!(abs_error < ERR_RATE, "{} {}", similarity, SIMILARITY);
Expand Down

0 comments on commit 732fa36

Please sign in to comment.