diff --git a/coresimd/ppsv/api/arithmetic_reductions.rs b/coresimd/ppsv/api/arithmetic_reductions.rs
index d1aa2433f5..3d51d113fe 100644
--- a/coresimd/ppsv/api/arithmetic_reductions.rs
+++ b/coresimd/ppsv/api/arithmetic_reductions.rs
@@ -6,9 +6,20 @@ macro_rules! impl_arithmetic_reductions {
         impl $id {
             /// Lane-wise addition of the vector elements.
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
             pub fn sum(self) -> $elem_ty {
@@ -19,9 +30,20 @@ macro_rules! impl_arithmetic_reductions {
             }
             /// Lane-wise addition of the vector elements.
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(target_arch = "aarch64")]
             #[inline]
             pub fn sum(self) -> $elem_ty {
@@ -36,9 +58,20 @@ macro_rules! impl_arithmetic_reductions {
 
             /// Lane-wise multiplication of the vector elements.
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
             pub fn product(self) -> $elem_ty {
@@ -49,9 +82,20 @@ macro_rules! impl_arithmetic_reductions {
             }
             /// Lane-wise multiplication of the vector elements.
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(target_arch = "aarch64")]
             #[inline]
             pub fn product(self) -> $elem_ty {
diff --git a/coresimd/ppsv/api/minmax_reductions.rs b/coresimd/ppsv/api/minmax_reductions.rs
index 159c59a99b..9d4eabdf52 100644
--- a/coresimd/ppsv/api/minmax_reductions.rs
+++ b/coresimd/ppsv/api/minmax_reductions.rs
@@ -6,7 +6,13 @@ macro_rules! impl_minmax_reductions {
         impl $id {
             /// Largest vector value.
             ///
-            /// FIXME: document behavior for float vectors with NaNs.
+            /// # Floating-point behvior
+            ///
+            /// If the vector contains only `NaN` values,
+            /// the result is a `NaN`.
+            ///
+            /// Otherwise, if the vector contains `NaN` values, either the
+            /// largest element of the vector or a `NaN` is returned.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
             pub fn max(self) -> $elem_ty {
@@ -15,9 +21,16 @@ macro_rules! impl_minmax_reductions {
                     simd_reduce_max(self)
                 }
             }
+
             /// Largest vector value.
             ///
-            /// FIXME: document behavior for float vectors with NaNs.
+            /// # Floating-point behvior
+            ///
+            /// If the vector contains only `NaN` values,
+            /// the result is a `NaN`.
+            ///
+            /// Otherwise, if the vector contains `NaN` values, either the
+            /// largest element of the vector or a `NaN` is returned.
             #[cfg(target_arch = "aarch64")]
             #[allow(unused_imports)]
             #[inline]
@@ -35,7 +48,13 @@ macro_rules! impl_minmax_reductions {
 
             /// Smallest vector value.
             ///
-            /// FIXME: document behavior for float vectors with NaNs.
+            /// # Floating-point behvior
+            ///
+            /// If the vector contains only `NaN` values,
+            /// the result is a `NaN`.
+            ///
+            /// Otherwise, if the vector contains `NaN` values, either the
+            /// smallest element of the vector or a `NaN` is returned.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
             pub fn min(self) -> $elem_ty {
@@ -44,9 +63,14 @@ macro_rules! impl_minmax_reductions {
                     simd_reduce_min(self)
                 }
             }
-            /// Smallest vector value.
+
+            /// # Floating-point behvior
+            ///
+            /// If the vector contains only `NaN` values,
+            /// the result is a `NaN`.
             ///
-            /// FIXME: document behavior for float vectors with NaNs.
+            /// Otherwise, if the vector contains `NaN` values, either the
+            /// smallest element of the vector or a `NaN` is returned.
             #[cfg(target_arch = "aarch64")]
             #[allow(unused_imports)]
             #[inline]
diff --git a/crates/coresimd/tests/reductions.rs b/crates/coresimd/tests/reductions.rs
new file mode 100644
index 0000000000..bf7c91bdf7
--- /dev/null
+++ b/crates/coresimd/tests/reductions.rs
@@ -0,0 +1,418 @@
+#![feature(cfg_target_feature, stdsimd, target_feature)]
+
+#[macro_use]
+extern crate stdsimd;
+
+use stdsimd::simd::*;
+
+macro_rules! invoke_arch {
+    ($macro:ident, $feature_macro:ident, $id:ident, $elem_ty:ident,
+     [$($feature:tt),*]) => {
+        $($macro!($feature, $feature_macro, $id, $elem_ty);)*
+    }
+}
+
+macro_rules! invoke_vectors {
+    ($macro:ident, [$(($id:ident, $elem_ty:ident)),*]) => {
+        $(
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            invoke_arch!($macro, is_x86_feature_detected, $id, $elem_ty,
+                        ["sse", "sse2", "sse3", "ssse3", "sse4.1",
+                         "sse4.2", "sse4a", "avx2", "avx2", "avx512f"]);
+            #[cfg(target_arch = "aarch64")]
+            invoke_arch!($macro, is_aarch64_feature_detected, $id, $elem_ty,
+                        ["neon"]);
+            #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
+            invoke_arch!($macro, is_arm_feature_detected, $id, $elem_ty,
+                         ["neon"]);
+            #[cfg(target_arch = "powerpc")]
+            invoke_arch!($macro, is_powerpc_feature_detected, $id, $elem_ty, ["altivec"]);
+            #[cfg(target_arch = "powerpc64")]
+            invoke_arch!($macro, is_powerpc64_feature_detected, $id, $elem_ty, ["altivec"]);
+        )*
+    }
+}
+
+macro_rules! finvoke {
+    ($macro:ident) => {
+        invoke_vectors!(
+            $macro,
+            [(f32x2, f32), (f32x4, f32), (f32x8, f32), (f32x16, f32),
+             (f64x2, f64), (f64x4, f64), (f64x8, f64)]
+        );
+    }
+}
+
+macro_rules! iinvoke {
+    ($macro:ident) => {
+        invoke_vectors!(
+            $macro,
+            [(i8x2, i8), (i8x4, i8), (i8x8, i8), (i8x16, i8), (i8x32, i8), (i8x64, i8),
+             (i16x2, i16), (i16x4, i16), (i16x8, i16), (i16x16, i16), (i16x32, i16),
+             (i32x2, i32), (i32x4, i32), (i32x8, i32), (i32x16, i32),
+             (i64x2, i64), (i64x4, i64), (i64x8, i64)]
+        );
+    }
+}
+
+macro_rules! min_nan_test {
+    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
+        if $feature_macro!($feature) {
+            #[target_feature(enable = $feature)]
+            unsafe fn test_fn() {
+                let n0 = ::std::$elem_ty::NAN;
+
+                assert_eq!(n0.min(-3.0), -3.0);
+                assert_eq!((-3.0 as $elem_ty).min(n0), -3.0);
+
+                let v0 = $id::splat(-3.0);
+
+                for i in 0..$id::lanes() {
+                    let v = v0.replace(i, n0);
+                    if i != $id::lanes() - 1 {
+                        assert_eq!(v.min(), -3.0);
+                        let mut v = v;
+                        for j in 0..i {
+                            v = v.replace(j, n0);
+                            assert_eq!(v.min(), -3.0);
+                        }
+                    } else {
+                        // not necessarily n0:
+                        assert!(v.min().is_nan());
+                        let mut v = v;
+                        for j in 0..i {
+                            v = v.replace(j, n0);
+                            assert!(v.min().is_nan());
+                        }
+                    }
+                }
+
+                let vn = $id::splat(n0);
+                assert!(vn.min().is_nan());
+            }
+            unsafe { test_fn() };
+        }
+    }
+}
+
+#[test]
+fn min_nan() {
+    finvoke!(min_nan_test);
+}
+
+macro_rules! max_nan_test {
+    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
+        if $feature_macro!($feature) {
+            #[target_feature(enable = $feature)]
+            unsafe fn test_fn() {
+                let n0 = ::std::$elem_ty::NAN;
+
+                assert_eq!(n0.max(-3.0), -3.0);
+                assert_eq!((-3.0 as $elem_ty).max(n0), -3.0);
+
+                let v0 = $id::splat(-3.0);
+
+                for i in 0..$id::lanes() {
+                    let v = v0.replace(i, n0);
+                    if i != $id::lanes() - 1 {
+                        assert_eq!(v.max(), -3.0);
+                        let mut v = v;
+                        for j in 0..i {
+                            v = v.replace(j, n0);
+                            assert_eq!(v.max(), -3.0);
+                        }
+                    } else {
+                        // not necessarily n0:
+                        assert!(v.max().is_nan());
+                        let mut v = v;
+                        for j in 0..i {
+                            v = v.replace(j, n0);
+                            assert!(v.max().is_nan());
+                        }
+                    }
+                }
+
+                let vn = $id::splat(n0);
+                assert!(vn.max().is_nan());
+            }
+            unsafe { test_fn() };
+        }
+    }
+}
+
+#[test]
+fn max_nan() {
+    finvoke!(max_nan_test);
+}
+
+macro_rules! sum_nan_test {
+    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
+        if $feature_macro!($feature) {
+            #[target_feature(enable = $feature)]
+            unsafe fn test_fn() {
+                let n0 = ::std::$elem_ty::NAN;
+
+                let v0 = $id::splat(-3.0);
+
+                for i in 0..$id::lanes() {
+                    let v = v0.replace(i, n0);
+                    assert!(v.sum().is_nan());
+                }
+                let v = $id::splat(n0);
+                assert!(v.sum().is_nan());
+            }
+            unsafe { test_fn() };
+        }
+    }
+}
+
+#[test]
+fn sum_nan() {
+    finvoke!(sum_nan_test);
+}
+
+macro_rules! product_nan_test {
+    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
+        if $feature_macro!($feature) {
+            #[target_feature(enable = $feature)]
+            unsafe fn test_fn() {
+                let n0 = ::std::$elem_ty::NAN;
+
+                let v0 = $id::splat(-3.0);
+
+                for i in 0..$id::lanes() {
+                    let v = v0.replace(i, n0);
+                    assert!(v.product().is_nan());
+                }
+                let v = $id::splat(n0);
+                assert!(v.product().is_nan());
+            }
+            unsafe { test_fn() };
+        }
+    }
+}
+
+#[test]
+fn product_nan() {
+    finvoke!(product_nan_test);
+}
+
+trait AsInt {
+    type Int;
+    fn as_int(self) -> Self::Int;
+    fn from_int(Self::Int) -> Self;
+}
+
+macro_rules! as_int {
+    ($float:ident, $int:ident) => {
+        impl AsInt for $float {
+            type Int = $int;
+            fn as_int(self)  -> $int {
+                unsafe { ::std::mem::transmute(self) }
+            }
+            fn from_int(x: $int) -> $float {
+                unsafe { ::std::mem::transmute(x) }
+            }
+        }
+    }
+}
+
+as_int!(f32, u32);
+as_int!(f64, u64);
+
+trait TreeReduceSum {
+    type R;
+    fn tree_reduce_sum(self) -> Self::R;
+}
+
+macro_rules! tree_reduce_sum_f {
+    ($elem_ty:ident) => {
+        impl<'a> TreeReduceSum for &'a [$elem_ty] {
+            type R = $elem_ty;
+            fn tree_reduce_sum(self) -> $elem_ty {
+                if self.len() == 2 {
+                    self[0] + self[1]
+                } else {
+                    let mid = self.len() / 2;
+                    let (left, right) = self.split_at(mid);
+                    Self::tree_reduce_sum(left) + Self::tree_reduce_sum(right)
+
+                }
+            }
+        }
+    }
+}
+tree_reduce_sum_f!(f32);
+tree_reduce_sum_f!(f64);
+
+macro_rules! sum_roundoff_test {
+    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
+        if $feature_macro!($feature) {
+            #[target_feature(enable = $feature)]
+            unsafe fn test_fn() {
+                let mut start = std::$elem_ty::EPSILON;
+                let mut sum = 0. as $elem_ty;
+
+                let mut v = $id::splat(0. as $elem_ty);
+                for i in 0..$id::lanes() {
+                    let c = if i % 2 == 0 { 1e3 } else { -1. };
+                    start *= 3.14 * c;
+                    sum += start;
+                    // println!("{} | start: {}", stringify!($id), start);
+                    v = v.replace(i, start);
+                }
+                let vsum = v.sum();
+                println!("{} | lsum: {}", stringify!($id), sum);
+                println!("{} | vsum: {}", stringify!($id), vsum);
+                let r = vsum.as_int() == sum.as_int();
+                // This is false in general; the intrinsic performs a
+                // tree-reduce:
+                println!("{} | equal: {}", stringify!($id), r);
+
+                let mut a = [0. as $elem_ty; $id::lanes()];
+                v.store_unaligned(&mut a);
+
+                let tsum = a.tree_reduce_sum();
+                println!("{} | tsum: {}", stringify!($id), tsum);
+
+                // tolerate 1 ULP difference:
+                if vsum.as_int() > tsum.as_int() {
+                    assert!(vsum.as_int() - tsum.as_int() < 2);
+                } else {
+                    assert!(tsum.as_int() - vsum.as_int() < 2);
+                }
+            }
+            unsafe { test_fn() };
+        }
+    }
+}
+
+#[test]
+fn sum_roundoff_test() {
+    finvoke!(sum_roundoff_test);
+}
+
+trait TreeReduceMul {
+    type R;
+    fn tree_reduce_mul(self) -> Self::R;
+}
+
+macro_rules! tree_reduce_mul_f {
+    ($elem_ty:ident) => {
+        impl<'a> TreeReduceMul for &'a [$elem_ty] {
+            type R = $elem_ty;
+            fn tree_reduce_mul(self) -> $elem_ty {
+                if self.len() == 2 {
+                    self[0] * self[1]
+                } else {
+                    let mid = self.len() / 2;
+                    let (left, right) = self.split_at(mid);
+                    Self::tree_reduce_mul(left) * Self::tree_reduce_mul(right)
+
+                }
+            }
+        }
+    }
+}
+
+tree_reduce_mul_f!(f32);
+tree_reduce_mul_f!(f64);
+
+macro_rules! mul_roundoff_test {
+    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
+        if $feature_macro!($feature) {
+            #[target_feature(enable = $feature)]
+            unsafe fn test_fn() {
+                let mut start = std::$elem_ty::EPSILON;
+                let mut mul = 1. as $elem_ty;
+
+                let mut v = $id::splat(1. as $elem_ty);
+                for i in 0..$id::lanes() {
+                    let c = if i % 2 == 0 { 1e3 } else { -1. };
+                    start *= 3.14 * c;
+                    mul *= start;
+                    println!("{} | start: {}", stringify!($id), start);
+                    v = v.replace(i, start);
+                }
+                let vmul = v.product();
+                println!("{} | lmul: {}", stringify!($id), mul);
+                println!("{} | vmul: {}", stringify!($id), vmul);
+                let r = vmul.as_int() == mul.as_int();
+                // This is false in general; the intrinsic performs a
+                // tree-reduce:
+                println!("{} | equal: {}", stringify!($id), r);
+
+                let mut a = [0. as $elem_ty; $id::lanes()];
+                v.store_unaligned(&mut a);
+
+                let tmul = a.tree_reduce_mul();
+                println!("{} | tmul: {}", stringify!($id), tmul);
+
+                // tolerate 1 ULP difference:
+                if vmul.as_int() > tmul.as_int() {
+                    assert!(vmul.as_int() - tmul.as_int() < 2);
+                } else {
+                    assert!(tmul.as_int() - vmul.as_int() < 2);
+                }
+            }
+            unsafe { test_fn() };
+        }
+    }
+}
+
+#[test]
+fn mul_roundoff_test() {
+    finvoke!(mul_roundoff_test);
+}
+
+macro_rules! sum_overflow_test {
+    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
+        if $feature_macro!($feature) {
+            #[target_feature(enable = $feature)]
+            unsafe fn test_fn() {
+                let start = $elem_ty::max_value() - ($id::lanes() as $elem_ty / 2);
+
+                let v = $id::splat(start as $elem_ty);
+                let vsum = v.sum();
+
+                let mut sum = start;
+                for _ in 1..$id::lanes() {
+                    sum = sum.wrapping_add(start);
+                }
+                assert_eq!(sum, vsum);
+            }
+            unsafe { test_fn() };
+        }
+    }
+}
+
+#[test]
+fn sum_overflow_test() {
+    iinvoke!(sum_overflow_test);
+}
+
+macro_rules! mul_overflow_test {
+    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
+        if $feature_macro!($feature) {
+            #[target_feature(enable = $feature)]
+            unsafe fn test_fn() {
+                let start = $elem_ty::max_value() - ($id::lanes() as $elem_ty / 2);
+
+                let v = $id::splat(start as $elem_ty);
+                let vmul = v.product();
+
+                let mut mul = start;
+                for _ in 1..$id::lanes() {
+                    mul = mul.wrapping_mul(start);
+                }
+                assert_eq!(mul, vmul);
+            }
+            unsafe { test_fn() };
+        }
+    }
+}
+
+#[test]
+fn mul_overflow_test() {
+    iinvoke!(mul_overflow_test);
+}