diff --git a/histogram/Cargo.toml b/histogram/Cargo.toml index 946f90c..4e12619 100644 --- a/histogram/Cargo.toml +++ b/histogram/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "histogram" -version = "0.10.2" +version = "0.11.0" edition = "2021" authors = ["Brian Martin "] license = "MIT OR Apache-2.0" diff --git a/histogram/src/atomic.rs b/histogram/src/atomic.rs index cdd3295..43384d4 100644 --- a/histogram/src/atomic.rs +++ b/histogram/src/atomic.rs @@ -97,20 +97,20 @@ mod test { let percentiles = histogram.drain(); assert_eq!( percentiles.percentile(50.0), - Ok(Bucket { + Ok(Some(Bucket { count: 1, range: 50..=50, - }) + })) ); histogram.increment(1000).unwrap(); // after another load the map is empty let percentiles = histogram.drain(); assert_eq!( percentiles.percentile(50.0), - Ok(Bucket { + Ok(Some(Bucket { count: 1, range: 1000..=1003, - }) + })) ); } @@ -118,29 +118,48 @@ mod test { // Tests percentiles fn percentiles() { let histogram = AtomicHistogram::new(7, 64).unwrap(); + let percentiles = [25.0, 50.0, 75.0, 90.0, 99.0]; + + // check empty + assert_eq!(histogram.load().percentiles(&percentiles), Ok(None)); + + for percentile in percentiles { + assert_eq!(histogram.load().percentile(percentile), Ok(None)); + } + + // populate and check percentiles for i in 0..=100 { let _ = histogram.increment(i); assert_eq!( histogram.load().percentile(0.0), - Ok(Bucket { + Ok(Some(Bucket { count: 1, range: 0..=0, - }) + })) ); assert_eq!( histogram.load().percentile(100.0), - Ok(Bucket { + Ok(Some(Bucket { count: 1, range: i..=i, - }) + })) + ); + } + + for percentile in percentiles { + assert_eq!( + histogram + .load() + .percentile(percentile) + .map(|b| b.unwrap().end()), + Ok(percentile as u64) ); } - assert_eq!(histogram.load().percentile(25.0).map(|b| b.end()), Ok(25)); - assert_eq!(histogram.load().percentile(50.0).map(|b| b.end()), Ok(50)); - assert_eq!(histogram.load().percentile(75.0).map(|b| b.end()), Ok(75)); - assert_eq!(histogram.load().percentile(90.0).map(|b| b.end()), Ok(90)); - assert_eq!(histogram.load().percentile(99.0).map(|b| b.end()), Ok(99)); - assert_eq!(histogram.load().percentile(99.9).map(|b| b.end()), Ok(100)); + + assert_eq!( + histogram.load().percentile(99.9).map(|b| b.unwrap().end()), + Ok(100) + ); assert_eq!( histogram.load().percentile(-1.0), @@ -155,6 +174,7 @@ mod test { .load() .percentiles(&[50.0, 90.0, 99.0, 99.9]) .unwrap() + .unwrap() .iter() .map(|(p, b)| (*p, b.end())) .collect(); @@ -167,10 +187,10 @@ mod test { let _ = histogram.increment(1024); assert_eq!( histogram.load().percentile(99.9), - Ok(Bucket { + Ok(Some(Bucket { count: 1, range: 1024..=1031, - }) + })) ); } } diff --git a/histogram/src/sparse.rs b/histogram/src/sparse.rs index c2bd45d..3a7bda4 100644 --- a/histogram/src/sparse.rs +++ b/histogram/src/sparse.rs @@ -142,35 +142,67 @@ impl SparseHistogram { Ok(histogram) } - /// Return a single percentile from this histogram. + /// Return a collection of percentiles from this histogram. /// - /// The percentile should be in the inclusive range `0.0..=100.0`. For + /// Each percentile should be in the inclusive range `0.0..=100.0`. For /// example, the 50th percentile (median) can be found using `50.0`. - pub fn percentile(&self, percentile: f64) -> Result { + /// + /// The results will be sorted by the percentile. + pub fn percentiles(&self, percentiles: &[f64]) -> Result>, Error> { + // validate all the percentiles + if percentiles.is_empty() { + return Err(Error::InvalidPercentile); + } + + for percentile in percentiles { + if !(0.0..=100.0).contains(percentile) { + return Err(Error::InvalidPercentile); + } + } + let total: u128 = self.count.iter().map(|v| *v as u128).sum(); - if !(0.0..=100.0).contains(&percentile) { - return Err(Error::InvalidPercentile); + // empty histogram, no percentiles available + if total == 0 { + return Ok(None); } - let search = ((total as f64) * percentile / 100.0).ceil() as usize; + // sort the requested percentiles so we can find them in a single pass + let mut percentiles = percentiles.to_vec(); + percentiles.sort_by(|a, b| a.partial_cmp(b).unwrap()); + + let searches: Vec = percentiles + .iter() + .map(|p| ((total as f64) * *p / 100.0).ceil() as usize) + .collect(); + let mut search_idx = 0; + let mut result: Vec<(f64, Bucket)> = Vec::with_capacity(percentiles.len()); + let mut seen: usize = 0; for (idx, count) in self.index.iter().zip(self.count.iter()) { seen += *count as usize; - if seen >= search { - return Ok(Bucket { - count: *count, - range: self.config.index_to_range(*idx), - }); + while search_idx < searches.len() && seen >= searches[search_idx] { + result.push(( + percentiles[search_idx], + Bucket { + count: *count, + range: self.config.index_to_range(*idx), + }, + )); + search_idx += 1; } } - // should never be reached; return highest bucket if not found - let last_idx = self.index.len() - 1; - Ok(Bucket { - count: self.count[last_idx], - range: self.config.index_to_range(self.index[last_idx]), - }) + Ok(Some(result)) + } + + /// Return a single percentile from this histogram. + /// + /// The percentile should be in the inclusive range `0.0..=100.0`. For + /// example, the 50th percentile (median) can be found using `50.0`. + pub fn percentile(&self, percentile: f64) -> Result, Error> { + self.percentiles(&[percentile]) + .map(|v| v.map(|x| x.first().unwrap().1.clone())) } /// Returns a new histogram with a reduced grouping power. The reduced @@ -384,20 +416,30 @@ mod tests { } #[test] - fn percentile() { + fn percentiles() { let mut hstandard = Histogram::new(4, 10).unwrap(); + let hempty = SparseHistogram::from(&hstandard); + for v in 1..1024 { let _ = hstandard.increment(v); } let hsparse = SparseHistogram::from(&hstandard); - - for percentile in [1.0, 10.0, 25.0, 50.0, 75.0, 90.0, 99.0, 99.9] { + let percentiles = [1.0, 10.0, 25.0, 50.0, 75.0, 90.0, 99.0, 99.9]; + for percentile in percentiles { + let bempty = hempty.percentile(percentile).unwrap(); let bstandard = hstandard.percentile(percentile).unwrap(); let bsparse = hsparse.percentile(percentile).unwrap(); + assert_eq!(bempty, None); assert_eq!(bsparse, bstandard); } + + assert_eq!(hempty.percentiles(&percentiles), Ok(None)); + assert_eq!( + hstandard.percentiles(&percentiles).unwrap(), + hsparse.percentiles(&percentiles).unwrap() + ); } fn compare_histograms(hstandard: &Histogram, hsparse: &SparseHistogram) { diff --git a/histogram/src/standard.rs b/histogram/src/standard.rs index 19c7a8e..56aa7cc 100644 --- a/histogram/src/standard.rs +++ b/histogram/src/standard.rs @@ -76,7 +76,7 @@ impl Histogram { /// example, the 50th percentile (median) can be found using `50.0`. /// /// The results will be sorted by the percentile. - pub fn percentiles(&self, percentiles: &[f64]) -> Result, Error> { + pub fn percentiles(&self, percentiles: &[f64]) -> Result>, Error> { // get the total count let total_count: u128 = self.buckets.iter().map(|v| *v as u128).sum(); @@ -91,6 +91,11 @@ impl Histogram { } } + // empty histogram, no percentiles available + if total_count == 0 { + return Ok(None); + } + let mut bucket_idx = 0; let mut partial_sum = self.buckets[bucket_idx] as u128; @@ -125,16 +130,16 @@ impl Histogram { }) .collect(); - Ok(result) + Ok(Some(result)) } /// Return a single percentile from this histogram. /// /// The percentile should be in the inclusive range `0.0..=100.0`. For /// example, the 50th percentile (median) can be found using `50.0`. - pub fn percentile(&self, percentile: f64) -> Result { + pub fn percentile(&self, percentile: f64) -> Result, Error> { self.percentiles(&[percentile]) - .map(|v| v.first().unwrap().1.clone()) + .map(|v| v.map(|x| x.first().unwrap().1.clone())) } /// Returns a new histogram with a reduced grouping power. The reduced @@ -310,29 +315,39 @@ mod tests { // Tests percentiles fn percentiles() { let mut histogram = Histogram::new(7, 64).unwrap(); + + assert_eq!(histogram.percentile(50.0).unwrap(), None); + assert_eq!( + histogram.percentiles(&[50.0, 90.0, 99.0, 99.9]).unwrap(), + None + ); + for i in 0..=100 { let _ = histogram.increment(i); assert_eq!( histogram.percentile(0.0), - Ok(Bucket { + Ok(Some(Bucket { count: 1, range: 0..=0, - }) + })) ); assert_eq!( histogram.percentile(100.0), - Ok(Bucket { + Ok(Some(Bucket { count: 1, range: i..=i, - }) + })) ); } - assert_eq!(histogram.percentile(25.0).map(|b| b.end()), Ok(25)); - assert_eq!(histogram.percentile(50.0).map(|b| b.end()), Ok(50)); - assert_eq!(histogram.percentile(75.0).map(|b| b.end()), Ok(75)); - assert_eq!(histogram.percentile(90.0).map(|b| b.end()), Ok(90)); - assert_eq!(histogram.percentile(99.0).map(|b| b.end()), Ok(99)); - assert_eq!(histogram.percentile(99.9).map(|b| b.end()), Ok(100)); + assert_eq!(histogram.percentile(25.0).map(|b| b.unwrap().end()), Ok(25)); + assert_eq!(histogram.percentile(50.0).map(|b| b.unwrap().end()), Ok(50)); + assert_eq!(histogram.percentile(75.0).map(|b| b.unwrap().end()), Ok(75)); + assert_eq!(histogram.percentile(90.0).map(|b| b.unwrap().end()), Ok(90)); + assert_eq!(histogram.percentile(99.0).map(|b| b.unwrap().end()), Ok(99)); + assert_eq!( + histogram.percentile(99.9).map(|b| b.unwrap().end()), + Ok(100) + ); assert_eq!(histogram.percentile(-1.0), Err(Error::InvalidPercentile)); assert_eq!(histogram.percentile(101.0), Err(Error::InvalidPercentile)); @@ -340,6 +355,7 @@ mod tests { let percentiles: Vec<(f64, u64)> = histogram .percentiles(&[50.0, 90.0, 99.0, 99.9]) .unwrap() + .unwrap() .iter() .map(|(p, b)| (*p, b.end())) .collect(); @@ -352,10 +368,10 @@ mod tests { let _ = histogram.increment(1024); assert_eq!( histogram.percentile(99.9), - Ok(Bucket { + Ok(Some(Bucket { count: 1, range: 1024..=1031, - }) + })) ); } @@ -395,7 +411,7 @@ mod tests { let v = vals[((*p / 100.0 * (vals.len() as f64)) as usize) - 1]; // Value and relative error from full histogram - let vhist = histogram.percentile(*p).unwrap().end(); + let vhist = histogram.percentile(*p).unwrap().unwrap().end(); let e = (v.abs_diff(vhist) as f64) * 100.0 / (v as f64); assert!(e < error); }