Skip to content

Commit

Permalink
refactor: Empirical::remove: Try to modify inplace
Browse files Browse the repository at this point in the history
The old code always removed entries and re-inserted them
if necessary. The new code will instead modify the value
(= data_point count) in-place and only remove the key-
value pair from the map if the count would've dropped to zero.
  • Loading branch information
FreezyLemon authored and YeungOnion committed Dec 3, 2024
1 parent af648be commit 6ad4291
Showing 1 changed file with 17 additions and 13 deletions.
30 changes: 17 additions & 13 deletions src/distribution/empirical.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::distribution::ContinuousCDF;
use crate::statistics::*;
use non_nan::NonNan;
use std::collections::BTreeMap;
use std::collections::btree_map::{BTreeMap, Entry};
use std::ops::Bound;

mod non_nan {
Expand Down Expand Up @@ -113,26 +113,30 @@ impl Empirical {
None => return,
};

let val = match self.data.remove(&map_key) {
Some(v) => v,
None => return,
let mut entry = match self.data.entry(map_key) {
Entry::Occupied(entry) => entry,
Entry::Vacant(_) => return, // no entry found
};

if val == 1 && self.data.is_empty() {
self.sum = 0;
self.mean = 0.0;
self.var = 0.0;
return;
};
if *entry.get() == 1 {
entry.remove();
if self.data.is_empty() {
// logically, this should not need special handling.
// FP math can result in mean or var being != 0.0 though.
self.sum = 0;
self.mean = 0.0;
self.var = 0.0;
return;
}
} else {
*entry.get_mut() -= 1;
}

// reset mean and var
let sum = self.sum as f64;
self.mean = (sum * self.mean - data_point) / (sum - 1.);
self.var -= (sum - 1.) * (data_point - self.mean) * (data_point - self.mean) / sum;
self.sum -= 1;
if val != 1 {
self.data.insert(map_key, val - 1);
}
}

// Due to issues with rounding and floating-point accuracy the default
Expand Down

0 comments on commit 6ad4291

Please sign in to comment.