From 8bf847eff2a8a1609c818cae68d227156f5f3dc1 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 25 Jan 2021 09:51:54 -0500 Subject: [PATCH] Add basic performance and memory benchmarks --- benchmarks/.gitignore | 2 + benchmarks/Cargo.toml | 21 ++++ benchmarks/benches/benchmark.rs | 62 ++++++++++ benchmarks/src/bin/memory_usage.rs | 53 ++++++++ benchmarks/src/lib.rs | 190 +++++++++++++++++++++++++++++ 5 files changed, 328 insertions(+) create mode 100644 benchmarks/.gitignore create mode 100644 benchmarks/Cargo.toml create mode 100644 benchmarks/benches/benchmark.rs create mode 100644 benchmarks/src/bin/memory_usage.rs create mode 100644 benchmarks/src/lib.rs diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/benchmarks/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml new file mode 100644 index 0000000..89fecac --- /dev/null +++ b/benchmarks/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "benchmarks" +version = "0.1.0" +authors = ["Jake Goulding "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +sxd-string-slab = { path = ".." } + +backtrace = "0.3.56" +criterion = "0.3.3" +hashbrown = { version = "0.9.1", default-features = false, features = ["ahash", "inline-more"] } +itertools = "0.10.0" +once_cell = "1.5.2" +rand = "0.7.3" + +[[bench]] +name = "benchmark" +harness = false diff --git a/benchmarks/benches/benchmark.rs b/benchmarks/benches/benchmark.rs new file mode 100644 index 0000000..b2d2832 --- /dev/null +++ b/benchmarks/benches/benchmark.rs @@ -0,0 +1,62 @@ +use benchmarks::{DUPLICATES, DUPLICATES_STRING, NO_DUPLICATES, NO_DUPLICATES_STRING}; +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +use hashbrown::HashSet; +use once_cell::sync::Lazy; +use std::convert::TryInto; +use sxd_string_slab::StringArena; + +fn criterion_benchmark(c: &mut Criterion) { + Lazy::force(&NO_DUPLICATES_STRING); + Lazy::force(&DUPLICATES_STRING); + + { + let mut group = c.benchmark_group("no duplicates"); + group.throughput(Throughput::Elements( + NO_DUPLICATES.len().try_into().unwrap(), + )); + + group.bench_function("sxd_string_slab::StringArena", |b| { + b.iter(|| { + let mut arena = StringArena::new(); + for s in NO_DUPLICATES_STRING.lines() { + arena.intern(s); + } + }) + }); + + group.bench_function("hashbrown::HashSet", |b| { + b.iter(|| { + let mut arena = HashSet::new(); + for s in NO_DUPLICATES_STRING.lines() { + arena.get_or_insert_owned(s); + } + }) + }); + } + + { + let mut group = c.benchmark_group("duplicates"); + group.throughput(Throughput::Elements(DUPLICATES.len().try_into().unwrap())); + + group.bench_function("sxd_string_slab::StringArena", |b| { + b.iter(|| { + let mut arena = StringArena::new(); + for s in DUPLICATES_STRING.lines() { + arena.intern(s); + } + }) + }); + + group.bench_function("hashbrown::HashSet", |b| { + b.iter(|| { + let mut arena = HashSet::new(); + for s in DUPLICATES_STRING.lines() { + arena.get_or_insert_owned(s); + } + }) + }); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/benchmarks/src/bin/memory_usage.rs b/benchmarks/src/bin/memory_usage.rs new file mode 100644 index 0000000..0ce8e00 --- /dev/null +++ b/benchmarks/src/bin/memory_usage.rs @@ -0,0 +1,53 @@ +use benchmarks::{alloc::TrackingAllocator, env_or, DUPLICATES_STRING, NO_DUPLICATES_STRING}; +use hashbrown::HashSet; +use once_cell::sync::Lazy; +use std::convert::TryFrom; +use sxd_string_slab::StringArena; + +#[global_allocator] +static A: TrackingAllocator = TrackingAllocator; + +fn main() { + let show_map = env_or("SHOW_MAP", false); + Lazy::force(&NO_DUPLICATES_STRING); + Lazy::force(&DUPLICATES_STRING); + + let total_length: usize = DUPLICATES_STRING.lines().map(str::len).sum(); + let total_length_f64 = f64::from(u32::try_from(total_length).unwrap()); + + eprintln!("String data of {} bytes", total_length); + + let (_arena, alloc_size, alloc_count, alloc_map) = TrackingAllocator::track_allocations(|| { + let mut arena = StringArena::new(); + for s in DUPLICATES_STRING.lines() { + arena.intern(s); + } + arena + }); + + let percent = f64::from(u32::try_from(alloc_size).unwrap()) / total_length_f64 * 100.0; + eprintln!( + "sxd_string_slab::StringArena: {} bytes ({:.2}%) in {} allocations", + alloc_size, percent, alloc_count + ); + if show_map { + eprintln!("{:?}", alloc_map); + } + + let (_arena, alloc_size, alloc_count, alloc_map) = TrackingAllocator::track_allocations(|| { + let mut arena = HashSet::new(); + for s in DUPLICATES_STRING.lines() { + arena.get_or_insert_owned(s); + } + arena + }); + + let percent = f64::from(u32::try_from(alloc_size).unwrap()) / total_length_f64 * 100.0; + eprintln!( + "hashbrown::HashSet: {} bytes ({:.2}%) in {} allocations", + alloc_size, percent, alloc_count + ); + if show_map { + eprintln!("{:?}", alloc_map); + } +} diff --git a/benchmarks/src/lib.rs b/benchmarks/src/lib.rs new file mode 100644 index 0000000..8fbf7a3 --- /dev/null +++ b/benchmarks/src/lib.rs @@ -0,0 +1,190 @@ +use hashbrown::HashSet; +use itertools::Itertools; +use once_cell::sync::Lazy; +use rand::{distributions::Alphanumeric, rngs::StdRng, seq::SliceRandom, Rng, SeedableRng}; +use std::{env, iter, str::FromStr}; + +pub mod alloc { + use backtrace::Backtrace; + use hashbrown::HashMap; + use once_cell::sync::Lazy; + use std::{ + alloc::{GlobalAlloc, Layout, System}, + hash::{Hash, Hasher}, + mem, + sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Mutex, + }, + }; + + pub struct TrackingAllocator; + + static RECURSIVE: AtomicBool = AtomicBool::new(false); + static MEMORY_IN_USE: AtomicUsize = AtomicUsize::new(0); + static N_ALLOCATIONS: AtomicUsize = AtomicUsize::new(0); + static ALLOCATION_MAP: Lazy> = Lazy::new(Default::default); + + #[derive(Debug, Default)] + pub struct AllocMap(HashMap>); + + #[derive(Debug)] + struct Trace(Backtrace); + + impl Trace { + fn new() -> Self { + Self(Backtrace::new_unresolved()) + } + } + + impl Hash for Trace { + fn hash(&self, h: &mut H) + where + H: Hasher, + { + for f in self.0.frames() { + f.ip().hash(h); + } + } + } + + impl PartialEq for Trace { + fn eq(&self, other: &Self) -> bool { + self.0 + .frames() + .iter() + .map(|f| f.ip()) + .eq(other.0.frames().iter().map(|f| f.ip())) + } + } + + unsafe impl GlobalAlloc for TrackingAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + let inside_alloc = RECURSIVE.fetch_or(true, Ordering::SeqCst); + + if !inside_alloc { + MEMORY_IN_USE.fetch_add(layout.size(), Ordering::SeqCst); + N_ALLOCATIONS.fetch_add(1, Ordering::SeqCst); + ALLOCATION_MAP + .lock() + .expect("Mutex Poisoned") + .0 + .entry(Trace::new()) + .or_insert_with(Vec::new) + .push(layout.size()); + } + + RECURSIVE.store(inside_alloc, Ordering::SeqCst); + + System.alloc(layout) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + let inside_alloc = RECURSIVE.fetch_or(true, Ordering::SeqCst); + + if !inside_alloc { + MEMORY_IN_USE.fetch_sub(layout.size(), Ordering::SeqCst); + } + RECURSIVE.store(inside_alloc, Ordering::SeqCst); + + System.dealloc(ptr, layout) + } + } + + impl Eq for Trace {} + + impl TrackingAllocator { + pub fn track_allocations(f: impl FnOnce() -> R) -> (R, usize, usize, AllocMap) { + let start_size = MEMORY_IN_USE.load(Ordering::SeqCst); + let start_allocations = N_ALLOCATIONS.load(Ordering::SeqCst); + let start_map = mem::take(&mut *ALLOCATION_MAP.lock().expect("Mutex Poisoned")); + + let r = f(); + + let end_size = MEMORY_IN_USE.load(Ordering::SeqCst); + let end_allocations = N_ALLOCATIONS.load(Ordering::SeqCst); + let end_map = mem::replace( + &mut *ALLOCATION_MAP.lock().expect("Mutex Poisoned"), + start_map, + ); + + let end_map = AllocMap( + end_map + .0 + .into_iter() + .map(|(mut k, v)| { + k.0.resolve(); + (k, v) + }) + .collect(), + ); + + ( + r, + end_size - start_size, + end_allocations - start_allocations, + end_map, + ) + } + } +} + +pub fn env_or(name: &str, default: T) -> T +where + T: FromStr, +{ + env_or_else(name, || default) +} + +pub fn env_or_else(name: &str, default: impl FnOnce() -> T) -> T +where + T: FromStr, +{ + env::var(name) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or_else(default) +} + +pub fn rng() -> StdRng { + static SEED: Lazy = Lazy::new(|| { + let seed = env_or_else("BENCHMARK_SEED", || rand::thread_rng().gen()); + eprintln!("Using random seed {} (can be set via BENCHMARK_SEED)", seed); + seed + }); + + StdRng::seed_from_u64(*SEED) +} + +pub fn string_iter(rng: &mut impl Rng) -> impl Iterator + '_ { + iter::from_fn(move || { + let string_len = rng.gen_range(0, 2048); + Some(rng.sample_iter(Alphanumeric).take(string_len).collect()) + }) +} + +pub static NO_DUPLICATES: Lazy> = Lazy::new(|| { + let rng = &mut rng(); + + let n_items = env_or("N_NO_DUPLICATES", 10_000); + string_iter(rng).take(n_items).collect() +}); + +pub static NO_DUPLICATES_STRING: Lazy = Lazy::new(|| NO_DUPLICATES.iter().join("\n")); + +pub static DUPLICATES: Lazy> = Lazy::new(|| { + let rng = &mut rng(); + + let n_items = env_or("N_DUPLICATES", 10_000); + let no_dupes: HashSet<_> = string_iter(rng).take(n_items).collect(); + let mut no_dupes: Vec<_> = no_dupes.into_iter().collect(); + + let n_dupes = rng.gen_range(0, no_dupes.len()); + let dupes: Vec<_> = no_dupes.choose_multiple(rng, n_dupes).cloned().collect(); + no_dupes.extend(dupes); + no_dupes.shuffle(rng); + + no_dupes +}); + +pub static DUPLICATES_STRING: Lazy = Lazy::new(|| DUPLICATES.iter().join("\n"));