Skip to content

Commit

Permalink
optimize aligned split length check
Browse files Browse the repository at this point in the history
  • Loading branch information
sarah committed May 23, 2024
1 parent 917d3c7 commit 619937a
Show file tree
Hide file tree
Showing 3 changed files with 347 additions and 125 deletions.
8 changes: 7 additions & 1 deletion pulp/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pulp"
version = "0.18.19"
version = "0.18.20"
edition = "2021"
authors = ["sarah <>"]
description = "Safe generic simd"
Expand All @@ -23,14 +23,20 @@ nightly = ["bytemuck/nightly_stdsimd"]
macro = ["dep:pulp-macro"]

[dev-dependencies]
aligned-vec = "0.6.0"
assert_approx_eq = "1.1.0"
criterion = "0.5.0"
diol = { version = "0.8.3", default-features = false }
rand = "0.8.5"

[[bench]]
name = "bench"
harness = false

[[bench]]
name = "maskload"
harness = false

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]
84 changes: 84 additions & 0 deletions pulp/benches/maskload.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use aligned_vec::avec;
use diol::prelude::*;
use pulp::x86::V3;
use pulp::{m64x4, Offset, Read, Simd, Write};

fn masked(bencher: Bencher, PlotArg(n): PlotArg) {
if let Some(simd) = V3::try_new() {
let dst = &mut *avec![1.0; 8];
let src = &*avec![1.0; 8];

let dst: &mut [f64; 7] = (&mut dst[1..]).try_into().unwrap();
let src: &[f64; 7] = (&src[1..]).try_into().unwrap();
let offset = simd.f64s_align_offset(dst.as_ptr(), dst.len());

bencher.bench(|| {
struct Impl<'a> {
simd: V3,
dst: &'a mut [f64],
src: &'a [f64],
n: usize,
offset: Offset<m64x4>,
}

impl pulp::NullaryFnOnce for Impl<'_> {
type Output = ();

#[inline(always)]
fn call(self) -> Self::Output {
let Self {
simd,
dst,
src,
n,
offset,
} = self;

let (mut dst_prefix, _, mut dst_suffix) =
simd.f64s_as_aligned_mut_simd(dst, offset);
let (src_prefix, _, src_suffix) = simd.f64s_as_aligned_simd(src, offset);
for _ in 0..n {
dst_prefix.write(src_prefix.read_or(simd.splat_f64x4(0.0)));
dst_suffix.write(src_suffix.read_or(simd.splat_f64x4(0.0)));
core::hint::black_box((&mut dst_prefix, &mut dst_suffix));
}
}
}

simd.vectorize(Impl {
simd,
dst,
src,
n,
offset,
})
})
}
}

fn non_masked(bencher: Bencher, PlotArg(n): PlotArg) {
let dst = &mut *avec![1.0; 8];
let src = &*avec![1.0; 8];

let mut dst: &mut [f64; 7] = &mut dst[1..].try_into().unwrap();
let src: &[f64; 7] = &src[1..].try_into().unwrap();

bencher.bench(move || {
for _ in 0..n {
dst.copy_from_slice(src);
core::hint::black_box(&mut dst);
}
})
}

fn main() -> std::io::Result<()> {
let mut bench = Bench::new(BenchConfig::from_args()?);

bench.register_many(
list![masked, non_masked],
[1, 2, 3, 4, 16, 32, 128, 16384].map(PlotArg),
);
bench.run()?;

Ok(())
}
Loading

0 comments on commit 619937a

Please sign in to comment.