diff --git a/include/private/dsp/arch/x86/avx512/hmath.h b/include/private/dsp/arch/x86/avx512/hmath.h
index ced53654..41bbec54 100644
--- a/include/private/dsp/arch/x86/avx512/hmath.h
+++ b/include/private/dsp/arch/x86/avx512/hmath.h
@@ -27,6 +27,7 @@
 #endif /* PRIVATE_DSP_ARCH_X86_AVX512_IMPL */
 
 
+#include <private/dsp/arch/x86/avx512/hmath/hdotp.h>
 #include <private/dsp/arch/x86/avx512/hmath/hsum.h>
 
 
diff --git a/include/private/dsp/arch/x86/avx512/hmath/hdotp.h b/include/private/dsp/arch/x86/avx512/hmath/hdotp.h
new file mode 100644
index 00000000..edf30f57
--- /dev/null
+++ b/include/private/dsp/arch/x86/avx512/hmath/hdotp.h
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 11 дек. 2024 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#ifndef PRIVATE_DSP_ARCH_X86_AVX512_HMATH_HDOTP_H_
+#define PRIVATE_DSP_ARCH_X86_AVX512_HMATH_HDOTP_H_
+
+#ifndef PRIVATE_DSP_ARCH_X86_AVX512_IMPL
+    #error "This header should not be included directly"
+#endif /* PRIVATE_DSP_ARCH_X86_AVX512_IMPL */
+
+namespace lsp
+{
+    namespace avx512
+    {
+        float h_dotp(const float *a, const float *b, size_t count)
+        {
+            IF_ARCH_X86(
+                float result;
+                size_t off;
+            );
+            ARCH_X86_ASM
+            (
+                __ASM_EMIT("vxorps          %%zmm0, %%zmm0, %%zmm0")
+                __ASM_EMIT("xor             %[off], %[off]")
+                __ASM_EMIT("vxorps          %%zmm1, %%zmm1, %%zmm1")
+                /* x64 blocks */
+                __ASM_EMIT("sub             $64, %[count]")
+                __ASM_EMIT("jb              2f")
+                __ASM_EMIT("1:")
+                __ASM_EMIT("vmovups         0x00(%[a], %[off]), %%zmm2")
+                __ASM_EMIT("vmovups         0x40(%[a], %[off]), %%zmm3")
+                __ASM_EMIT("vmovups         0x80(%[a], %[off]), %%zmm4")
+                __ASM_EMIT("vmovups         0xc0(%[a], %[off]), %%zmm5")
+                __ASM_EMIT("vmulps          0x00(%[b], %[off]), %%zmm2, %%zmm2")
+                __ASM_EMIT("vmulps          0x40(%[b], %[off]), %%zmm3, %%zmm3")
+                __ASM_EMIT("vmulps          0xc0(%[b], %[off]), %%zmm4, %%zmm4")
+                __ASM_EMIT("vmulps          0x80(%[b], %[off]), %%zmm5, %%zmm5")
+                __ASM_EMIT("vaddps          %%zmm2, %%zmm0, %%zmm0")
+                __ASM_EMIT("vaddps          %%zmm3, %%zmm1, %%zmm1")
+                __ASM_EMIT("vaddps          %%zmm4, %%zmm0, %%zmm0")
+                __ASM_EMIT("vaddps          %%zmm5, %%zmm1, %%zmm1")
+                __ASM_EMIT("add             $0x100, %[off]")
+                __ASM_EMIT("sub             $64, %[count]")
+                __ASM_EMIT("jae             1b")
+                __ASM_EMIT("2:")
+                /* x32 block */
+                __ASM_EMIT("add             $32, %[count]")
+                __ASM_EMIT("jl              4f")
+                __ASM_EMIT("vmovups         0x00(%[a], %[off]), %%zmm2")
+                __ASM_EMIT("vmovups         0x40(%[a], %[off]), %%zmm3")
+                __ASM_EMIT("vmulps          0x00(%[b], %[off]), %%zmm2, %%zmm2")
+                __ASM_EMIT("vmulps          0x40(%[b], %[off]), %%zmm3, %%zmm3")
+                __ASM_EMIT("vaddps          %%zmm2, %%zmm0, %%zmm0")
+                __ASM_EMIT("vaddps          %%zmm3, %%zmm1, %%zmm1")
+                __ASM_EMIT("add             $0x80, %[off]")
+                __ASM_EMIT("sub             $32, %[count]")
+                __ASM_EMIT("4:")
+                __ASM_EMIT("vextractf64x4   $1, %%zmm0, %%ymm2")
+                __ASM_EMIT("vextractf64x4   $1, %%zmm1, %%ymm3")
+                __ASM_EMIT("vaddps          %%ymm2, %%ymm0, %%ymm0")
+                __ASM_EMIT("vaddps          %%ymm3, %%ymm1, %%ymm1")
+                /* x16 block */
+                __ASM_EMIT("add             $16, %[count]")
+                __ASM_EMIT("jl              6f")
+                __ASM_EMIT("vmovups         0x00(%[a], %[off]), %%ymm2")
+                __ASM_EMIT("vmovups         0x20(%[a], %[off]), %%ymm3")
+                __ASM_EMIT("vmulps          0x00(%[b], %[off]), %%ymm2, %%ymm2")
+                __ASM_EMIT("vmulps          0x20(%[b], %[off]), %%ymm3, %%ymm3")
+                __ASM_EMIT("vaddps          %%ymm2, %%ymm0, %%ymm0")
+                __ASM_EMIT("vaddps          %%ymm3, %%ymm1, %%ymm1")
+                __ASM_EMIT("add             $0x40, %[off]")
+                __ASM_EMIT("sub             $16, %[count]")
+                __ASM_EMIT("6:")
+                /* x8 block */
+                __ASM_EMIT("add             $8, %[count]")
+                __ASM_EMIT("vaddps          %%ymm1, %%ymm0, %%ymm0")
+                __ASM_EMIT("jl              8f")
+                __ASM_EMIT("vmovups         0x00(%[a], %[off]), %%ymm2")
+                __ASM_EMIT("vmulps          0x00(%[b], %[off]), %%ymm2, %%ymm2")
+                __ASM_EMIT("vaddps          %%ymm2, %%ymm0, %%ymm0")
+                __ASM_EMIT("add             $0x20, %[off]")
+                __ASM_EMIT("sub             $8, %[count]")
+                __ASM_EMIT("8:")
+                /* x4 block */
+                __ASM_EMIT("vextractf128    $0x01, %%ymm0, %%xmm1")
+                __ASM_EMIT("add             $4, %[count]")
+                __ASM_EMIT("vaddps          %%xmm1, %%xmm0, %%xmm0")
+                __ASM_EMIT("jl              10f")
+                __ASM_EMIT("vmovups         0x00(%[a], %[off]), %%xmm2")
+                __ASM_EMIT("vmulps          0x00(%[b], %[off]), %%xmm2, %%xmm2")
+                __ASM_EMIT("vaddps          %%xmm2, %%xmm0, %%xmm0")
+                __ASM_EMIT("add             $0x10, %[off]")
+                __ASM_EMIT("sub             $4, %[count]")
+                __ASM_EMIT("10:")
+                /* x1 block */
+                __ASM_EMIT("vhaddps         %%xmm0, %%xmm0, %%xmm0")
+                __ASM_EMIT("add             $3, %[count]")
+                __ASM_EMIT("vhaddps         %%xmm0, %%xmm0, %%xmm0")
+                __ASM_EMIT("jl              12f")
+                __ASM_EMIT("11:")
+                __ASM_EMIT("vmovss          0x00(%[a], %[off]), %%xmm2")
+                __ASM_EMIT("vmulss          0x00(%[b], %[off]), %%xmm2, %%xmm2")
+                __ASM_EMIT("vaddss          %%xmm2, %%xmm0, %%xmm0")
+                __ASM_EMIT("add             $0x04, %[off]")
+                __ASM_EMIT("dec             %[count]")
+                __ASM_EMIT("jge             11b")
+                __ASM_EMIT("12:")
+                /* end */
+                : [count] "+r" (count), [off] "=&r" (off),
+                  [res] "=Yz" (result)
+                : [a] "r" (a), [b] "r" (b)
+                : "cc", "memory",
+                  "%xmm1", "%xmm2", "%xmm3",
+                  "%xmm4", "%xmm5"
+            );
+
+            return result;
+        }
+
+        float h_sqr_dotp(const float *a, const float *b, size_t count)
+        {
+            IF_ARCH_X86(
+                float result;
+                size_t off;
+            );
+            ARCH_X86_ASM
+            (
+                __ASM_EMIT("vxorps          %%zmm0, %%zmm0, %%zmm0")
+                __ASM_EMIT("xor             %[off], %[off]")
+                __ASM_EMIT("vxorps          %%zmm1, %%zmm1, %%zmm1")
+                /* x32 blocks */
+                __ASM_EMIT("sub             $32, %[count]")
+                __ASM_EMIT("jb              2f")
+                __ASM_EMIT("1:")
+                __ASM_EMIT("vmovups         0x00(%[a], %[off]), %%zmm2")
+                __ASM_EMIT("vmovups         0x40(%[a], %[off]), %%zmm3")
+                __ASM_EMIT("vmovups         0x00(%[b], %[off]), %%zmm4")
+                __ASM_EMIT("vmovups         0x40(%[b], %[off]), %%zmm5")
+                __ASM_EMIT("vmulps          %%zmm2, %%zmm2, %%zmm2")
+                __ASM_EMIT("vmulps          %%zmm3, %%zmm3, %%zmm3")
+                __ASM_EMIT("vmulps          %%zmm4, %%zmm4, %%zmm4")
+                __ASM_EMIT("vmulps          %%zmm5, %%zmm5, %%zmm5")
+                __ASM_EMIT("vfmadd231ps     %%zmm4, %%zmm2, %%zmm0")
+                __ASM_EMIT("vfmadd231ps     %%zmm5, %%zmm3, %%zmm1")
+                __ASM_EMIT("add             $0x80, %[off]")
+                __ASM_EMIT("sub             $32, %[count]")
+                __ASM_EMIT("jae             1b")
+                __ASM_EMIT("2:")
+                __ASM_EMIT("vextractf64x4   $1, %%zmm0, %%ymm2")
+                __ASM_EMIT("vextractf64x4   $1, %%zmm1, %%ymm3")
+                __ASM_EMIT("vaddps          %%ymm2, %%ymm0, %%ymm0")
+                __ASM_EMIT("vaddps          %%ymm3, %%ymm1, %%ymm1")
+                /* x16 block */
+                __ASM_EMIT("add             $16, %[count]")
+                __ASM_EMIT("jl              4f")
+                __ASM_EMIT("vmovups         0x00(%[a], %[off]), %%ymm2")
+                __ASM_EMIT("vmovups         0x20(%[a], %[off]), %%ymm3")
+                __ASM_EMIT("vmovups         0x00(%[b], %[off]), %%ymm4")
+                __ASM_EMIT("vmovups         0x20(%[b], %[off]), %%ymm5")
+                __ASM_EMIT("vmulps          %%ymm2, %%ymm2, %%ymm2")
+                __ASM_EMIT("vmulps          %%ymm3, %%ymm3, %%ymm3")
+                __ASM_EMIT("vmulps          %%ymm4, %%ymm4, %%ymm4")
+                __ASM_EMIT("vmulps          %%ymm5, %%ymm5, %%ymm5")
+                __ASM_EMIT("vfmadd231ps     %%ymm4, %%ymm2, %%ymm0")
+                __ASM_EMIT("vfmadd231ps     %%ymm5, %%ymm3, %%ymm1")
+                __ASM_EMIT("add             $0x40, %[off]")
+                __ASM_EMIT("sub             $16, %[count]")
+                __ASM_EMIT("4:")
+                __ASM_EMIT("vextractf128    $0x01, %%ymm0, %%xmm2")
+                __ASM_EMIT("vextractf128    $0x01, %%ymm1, %%xmm3")
+                __ASM_EMIT("vaddps          %%xmm2, %%xmm0, %%xmm0")
+                __ASM_EMIT("vaddps          %%xmm3, %%xmm1, %%xmm1")
+                /* x8 block */
+                __ASM_EMIT("add             $8, %[count]")
+                __ASM_EMIT("jl              6f")
+                __ASM_EMIT("vmovups         0x00(%[a], %[off]), %%xmm2")
+                __ASM_EMIT("vmovups         0x10(%[a], %[off]), %%xmm3")
+                __ASM_EMIT("vmovups         0x00(%[b], %[off]), %%xmm4")
+                __ASM_EMIT("vmovups         0x10(%[b], %[off]), %%xmm5")
+                __ASM_EMIT("vmulps          %%xmm2, %%xmm2, %%xmm2")
+                __ASM_EMIT("vmulps          %%xmm3, %%xmm3, %%xmm3")
+                __ASM_EMIT("vmulps          %%xmm4, %%xmm4, %%xmm4")
+                __ASM_EMIT("vmulps          %%xmm5, %%xmm5, %%xmm5")
+                __ASM_EMIT("vfmadd231ps     %%xmm4, %%xmm2, %%xmm0")
+                __ASM_EMIT("vfmadd231ps     %%xmm5, %%xmm3, %%xmm1")
+                __ASM_EMIT("add             $0x20, %[off]")
+                __ASM_EMIT("sub             $8, %[count]")
+                __ASM_EMIT("6:")
+                __ASM_EMIT("vaddps          %%xmm1, %%xmm0, %%xmm0")
+                /* x4 block */
+                __ASM_EMIT("add             $4, %[count]")
+                __ASM_EMIT("jl              8f")
+                __ASM_EMIT("vmovups         0x00(%[a], %[off]), %%xmm2")
+                __ASM_EMIT("vmovups         0x00(%[b], %[off]), %%xmm4")
+                __ASM_EMIT("vmulps          %%xmm2, %%xmm2, %%xmm2")
+                __ASM_EMIT("vmulps          %%xmm4, %%xmm4, %%xmm4")
+                __ASM_EMIT("vfmadd231ps     %%xmm4, %%xmm2, %%xmm0")
+                __ASM_EMIT("add             $0x10, %[off]")
+                __ASM_EMIT("sub             $4, %[count]")
+                __ASM_EMIT("8:")
+                /* x1 block */
+                __ASM_EMIT("vhaddps         %%xmm0, %%xmm0, %%xmm0")
+                __ASM_EMIT("add             $3, %[count]")
+                __ASM_EMIT("vhaddps         %%xmm0, %%xmm0, %%xmm0")
+                __ASM_EMIT("jl              10f")
+                __ASM_EMIT("9:")
+                __ASM_EMIT("vmovss          0x00(%[a], %[off]), %%xmm2")
+                __ASM_EMIT("vmovss          0x00(%[b], %[off]), %%xmm4")
+                __ASM_EMIT("vmulss          %%xmm2, %%xmm2, %%xmm2")
+                __ASM_EMIT("vmulss          %%xmm4, %%xmm4, %%xmm4")
+                __ASM_EMIT("vfmadd231ss     %%xmm4, %%xmm2, %%xmm0")
+                __ASM_EMIT("add             $0x04, %[off]")
+                __ASM_EMIT("dec             %[count]")
+                __ASM_EMIT("jge             9b")
+                __ASM_EMIT("10:")
+                /* end */
+                : [count] "+r" (count), [off] "=&r" (off),
+                  [res] "=Yz" (result)
+                : [a] "r" (a), [b] "r" (b)
+                : "cc", "memory",
+                  "%xmm1", "%xmm2", "%xmm3",
+                  "%xmm4", "%xmm5"
+            );
+
+            return result;
+        }
+
+        IF_ARCH_X86(
+            static const uint32_t h_abs_dotp_const[] __lsp_aligned64 =
+            {
+                LSP_DSP_VEC16(0x7fffffff)
+            };
+        )
+
+        float h_abs_dotp(const float *a, const float *b, size_t count)
+        {
+            IF_ARCH_X86(
+                float result;
+                size_t off;
+            );
+            ARCH_X86_ASM
+            (
+                __ASM_EMIT("vxorps          %%zmm0, %%zmm0, %%zmm0")
+                __ASM_EMIT("vmovaps         %[CC], %%zmm6")
+                __ASM_EMIT("xor             %[off], %[off]")
+                __ASM_EMIT("vxorps          %%zmm1, %%zmm1, %%zmm1")
+                __ASM_EMIT("vmovaps         %%zmm6, %%zmm7")
+                /* x32 blocks */
+                __ASM_EMIT("sub             $32, %[count]")
+                __ASM_EMIT("jb              2f")
+                __ASM_EMIT("1:")
+                __ASM_EMIT("vandps          0x00(%[a], %[off]), %%zmm6, %%zmm2")
+                __ASM_EMIT("vandps          0x40(%[a], %[off]), %%zmm7, %%zmm3")
+                __ASM_EMIT("vandps          0x00(%[b], %[off]), %%zmm6, %%zmm4")
+                __ASM_EMIT("vandps          0x40(%[b], %[off]), %%zmm7, %%zmm5")
+                __ASM_EMIT("vfmadd231ps     %%zmm4, %%zmm2, %%zmm0")
+                __ASM_EMIT("vfmadd231ps     %%zmm5, %%zmm3, %%zmm1")
+                __ASM_EMIT("add             $0x80, %[off]")
+                __ASM_EMIT("sub             $32, %[count]")
+                __ASM_EMIT("jae             1b")
+                __ASM_EMIT("2:")
+                __ASM_EMIT("vextractf64x4   $1, %%zmm0, %%ymm2")
+                __ASM_EMIT("vextractf64x4   $1, %%zmm1, %%ymm3")
+                __ASM_EMIT("vaddps          %%ymm2, %%ymm0, %%ymm0")
+                __ASM_EMIT("vaddps          %%ymm3, %%ymm1, %%ymm1")
+                /* x16 block */
+                __ASM_EMIT("add             $16, %[count]")
+                __ASM_EMIT("jl              4f")
+                __ASM_EMIT("vandps          0x00(%[a], %[off]), %%ymm6, %%ymm2")
+                __ASM_EMIT("vandps          0x40(%[a], %[off]), %%ymm7, %%ymm3")
+                __ASM_EMIT("vandps          0x00(%[b], %[off]), %%ymm6, %%ymm4")
+                __ASM_EMIT("vandps          0x40(%[b], %[off]), %%ymm7, %%ymm5")
+                __ASM_EMIT("vfmadd231ps     %%ymm4, %%ymm2, %%ymm0")
+                __ASM_EMIT("vfmadd231ps     %%ymm5, %%ymm3, %%ymm1")
+                __ASM_EMIT("add             $0x40, %[off]")
+                __ASM_EMIT("sub             $16, %[count]")
+                __ASM_EMIT("4:")
+                __ASM_EMIT("vextractf128    $0x01, %%ymm0, %%xmm2")
+                __ASM_EMIT("vextractf128    $0x01, %%ymm1, %%xmm3")
+                __ASM_EMIT("vaddps          %%xmm2, %%xmm0, %%xmm0")
+                __ASM_EMIT("vaddps          %%xmm3, %%xmm1, %%xmm1")
+                /* x8 block */
+                __ASM_EMIT("add             $8, %[count]")
+                __ASM_EMIT("jl              6f")
+                __ASM_EMIT("vandps          0x00(%[a], %[off]), %%xmm6, %%xmm2")
+                __ASM_EMIT("vandps          0x40(%[a], %[off]), %%xmm7, %%xmm3")
+                __ASM_EMIT("vandps          0x00(%[b], %[off]), %%xmm6, %%xmm4")
+                __ASM_EMIT("vandps          0x40(%[b], %[off]), %%xmm7, %%xmm5")
+                __ASM_EMIT("vfmadd231ps     %%xmm4, %%xmm2, %%xmm0")
+                __ASM_EMIT("vfmadd231ps     %%xmm5, %%xmm3, %%xmm1")
+                __ASM_EMIT("add             $0x20, %[off]")
+                __ASM_EMIT("sub             $8, %[count]")
+                __ASM_EMIT("6:")
+                __ASM_EMIT("vaddps          %%xmm1, %%xmm0, %%xmm0")
+                /* x4 block */
+                __ASM_EMIT("add             $4, %[count]")
+                __ASM_EMIT("jl              8f")
+                __ASM_EMIT("vandps          0x00(%[a], %[off]), %%xmm6, %%xmm2")
+                __ASM_EMIT("vandps          0x00(%[b], %[off]), %%xmm7, %%xmm4")
+                __ASM_EMIT("vfmadd231ps     %%xmm4, %%xmm2, %%xmm0")
+                __ASM_EMIT("add             $0x10, %[off]")
+                __ASM_EMIT("sub             $4, %[count]")
+                __ASM_EMIT("8:")
+                /* x1 block */
+                __ASM_EMIT("vhaddps         %%xmm0, %%xmm0, %%xmm0")
+                __ASM_EMIT("add             $3, %[count]")
+                __ASM_EMIT("vhaddps         %%xmm0, %%xmm0, %%xmm0")
+                __ASM_EMIT("jl              10f")
+                __ASM_EMIT("9:")
+                __ASM_EMIT("vmovss          0x00(%[a], %[off]), %%xmm2")
+                __ASM_EMIT("vmovss          0x00(%[b], %[off]), %%xmm4")
+                __ASM_EMIT("vandps          %%xmm2, %%xmm6, %%xmm2")
+                __ASM_EMIT("vandps          %%xmm4, %%xmm7, %%xmm4")
+                __ASM_EMIT("vfmadd231ps     %%xmm4, %%xmm2, %%xmm0")
+                __ASM_EMIT("add             $0x04, %[off]")
+                __ASM_EMIT("dec             %[count]")
+                __ASM_EMIT("jge             9b")
+                __ASM_EMIT("10:")
+                /* end */
+                : [count] "+r" (count), [off] "=&r" (off),
+                  [res] "=Yz" (result)
+                : [a] "r" (a), [b] "r" (b),
+                  [CC] "m" (h_abs_dotp_const)
+                : "cc", "memory",
+                  "%xmm1", "%xmm2", "%xmm3",
+                  "%xmm4", "%xmm5", "%xmm6", "%xmm7"
+            );
+
+            return result;
+        }
+    } /* namespace avx512 */
+} /* namespace lsp */
+
+
+
+#endif /* PRIVATE_DSP_ARCH_X86_AVX512_HMATH_HDOTP_H_ */
diff --git a/src/main/x86/avx512.cpp b/src/main/x86/avx512.cpp
index c33431ae..88deddd6 100644
--- a/src/main/x86/avx512.cpp
+++ b/src/main/x86/avx512.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
- *           (C) 2023 Vladimir Sadovnikov <sadko4u@gmail.com>
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
  *
  * This file is part of lsp-dsp-lib
  * Created on: 24 мая 2023 г.
@@ -326,6 +326,10 @@
                 CEXPORT1(vl, h_sum);
                 CEXPORT1(vl, h_sqr_sum);
                 CEXPORT1(vl, h_abs_sum);
+
+                CEXPORT1(vl, h_dotp);
+                CEXPORT1(vl, h_sqr_dotp);
+                CEXPORT1(vl, h_abs_dotp);
             }
         } /* namespace avx2 */
     } /* namespace lsp */
diff --git a/src/test/ptest/hmath/hdotp.cpp b/src/test/ptest/hmath/h_abs_dotp.cpp
similarity index 65%
rename from src/test/ptest/hmath/hdotp.cpp
rename to src/test/ptest/hmath/h_abs_dotp.cpp
index 9394c8a8..481d6f4d 100644
--- a/src/test/ptest/hmath/hdotp.cpp
+++ b/src/test/ptest/hmath/h_abs_dotp.cpp
@@ -1,9 +1,9 @@
 /*
- * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
- *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
  *
  * This file is part of lsp-dsp-lib
- * Created on: 31 мар. 2020 г.
+ * Created on: 11 дек. 2024 г.
  *
  * lsp-dsp-lib is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,30 +25,29 @@
 #include <lsp-plug.in/test-fw/helpers.h>
 #include <lsp-plug.in/test-fw/ptest.h>
 
-#define MIN_RANK 8
+#define MIN_RANK 5
 #define MAX_RANK 16
 
 namespace lsp
 {
     namespace generic
     {
-        float h_dotp(const float *a, const float *b, size_t count);
-        float h_sqr_dotp(const float *a, const float *b, size_t count);
         float h_abs_dotp(const float *a, const float *b, size_t count);
     }
 
     IF_ARCH_X86(
         namespace sse
         {
-            float h_dotp(const float *a, const float *b, size_t count);
-            float h_sqr_dotp(const float *a, const float *b, size_t count);
             float h_abs_dotp(const float *a, const float *b, size_t count);
         }
 
         namespace avx
         {
-            float h_dotp(const float *a, const float *b, size_t count);
-            float h_sqr_dotp(const float *a, const float *b, size_t count);
+            float h_abs_dotp(const float *a, const float *b, size_t count);
+        }
+
+        namespace avx512
+        {
             float h_abs_dotp(const float *a, const float *b, size_t count);
         }
     )
@@ -56,8 +55,6 @@ namespace lsp
     IF_ARCH_ARM(
         namespace neon_d32
         {
-            float h_dotp(const float *a, const float *b, size_t count);
-            float h_sqr_dotp(const float *a, const float *b, size_t count);
             float h_abs_dotp(const float *a, const float *b, size_t count);
         }
     )
@@ -65,8 +62,6 @@ namespace lsp
     IF_ARCH_AARCH64(
         namespace asimd
         {
-            float h_dotp(const float *a, const float *b, size_t count);
-            float h_sqr_dotp(const float *a, const float *b, size_t count);
             float h_abs_dotp(const float *a, const float *b, size_t count);
         }
     )
@@ -74,7 +69,7 @@ namespace lsp
     typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
 }
 
-PTEST_BEGIN("dsp.hmath", hdotp, 5, 10000)
+PTEST_BEGIN("dsp.hmath", h_abs_dotp, 5, 5000)
 
     void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
     {
@@ -106,26 +101,13 @@ PTEST_BEGIN("dsp.hmath", hdotp, 5, 10000)
         {
             size_t count = 1 << i;
 
-            CALL(generic::h_dotp);
-            IF_ARCH_X86(CALL(sse::h_dotp));
-            IF_ARCH_X86(CALL(avx::h_dotp));
-            IF_ARCH_ARM(CALL(neon_d32::h_dotp));
-            IF_ARCH_AARCH64(CALL(asimd::h_dotp));
-            PTEST_SEPARATOR;
-
-            CALL(generic::h_sqr_dotp);
-            IF_ARCH_X86(CALL(sse::h_sqr_dotp));
-            IF_ARCH_X86(CALL(avx::h_sqr_dotp));
-            IF_ARCH_ARM(CALL(neon_d32::h_sqr_dotp));
-            IF_ARCH_AARCH64(CALL(asimd::h_sqr_dotp));
-            PTEST_SEPARATOR;
-
             CALL(generic::h_abs_dotp);
             IF_ARCH_X86(CALL(sse::h_abs_dotp));
             IF_ARCH_X86(CALL(avx::h_abs_dotp));
+            IF_ARCH_X86(CALL(avx512::h_abs_dotp));
             IF_ARCH_ARM(CALL(neon_d32::h_abs_dotp));
             IF_ARCH_AARCH64(CALL(asimd::h_abs_dotp));
-            PTEST_SEPARATOR2;
+            PTEST_SEPARATOR;
         }
 
         free_aligned(data);
diff --git a/src/test/ptest/hmath/h_abs_sum.cpp b/src/test/ptest/hmath/h_abs_sum.cpp
index 4b26b16b..f00d4620 100644
--- a/src/test/ptest/hmath/h_abs_sum.cpp
+++ b/src/test/ptest/hmath/h_abs_sum.cpp
@@ -24,7 +24,7 @@
 #include <lsp-plug.in/test-fw/helpers.h>
 #include <lsp-plug.in/common/alloc.h>
 
-#define MIN_RANK 8
+#define MIN_RANK 5
 #define MAX_RANK 16
 
 namespace lsp
@@ -68,7 +68,7 @@ namespace lsp
     typedef float (* h_sum_t)(const float *src, size_t count);
 }
 
-PTEST_BEGIN("dsp.hmath", h_abs_sum, 5, 10000)
+PTEST_BEGIN("dsp.hmath", h_abs_sum, 5, 5000)
 
     void call(const char *label, float *src, size_t count, h_sum_t func)
     {
diff --git a/src/test/ptest/hmath/h_dotp.cpp b/src/test/ptest/hmath/h_dotp.cpp
new file mode 100644
index 00000000..9073c835
--- /dev/null
+++ b/src/test/ptest/hmath/h_dotp.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 31 мар. 2020 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <lsp-plug.in/common/alloc.h>
+#include <lsp-plug.in/common/types.h>
+#include <lsp-plug.in/dsp/dsp.h>
+#include <lsp-plug.in/test-fw/helpers.h>
+#include <lsp-plug.in/test-fw/ptest.h>
+
+#define MIN_RANK 5
+#define MAX_RANK 16
+
+namespace lsp
+{
+    namespace generic
+    {
+        float h_dotp(const float *a, const float *b, size_t count);
+    }
+
+    IF_ARCH_X86(
+        namespace sse
+        {
+            float h_dotp(const float *a, const float *b, size_t count);
+        }
+
+        namespace avx
+        {
+            float h_dotp(const float *a, const float *b, size_t count);
+        }
+
+        namespace avx512
+        {
+            float h_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    IF_ARCH_ARM(
+        namespace neon_d32
+        {
+            float h_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    IF_ARCH_AARCH64(
+        namespace asimd
+        {
+            float h_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
+}
+
+PTEST_BEGIN("dsp.hmath", h_dotp, 5, 5000)
+
+    void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
+    {
+        if (!PTEST_SUPPORTED(func))
+            return;
+
+        char buf[80];
+        snprintf(buf, sizeof(buf), "%s x %d", label, int(count));
+        printf("Testing %s numbers...\n", buf);
+
+        PTEST_LOOP(buf,
+            func(a, b, count);
+        );
+    }
+
+    PTEST_MAIN
+    {
+        size_t buf_size = 1 << MAX_RANK;
+        uint8_t *data   = NULL;
+        float *a        = alloc_aligned<float>(data, buf_size * 2, 64);
+        float *b        = &a[buf_size];
+
+        randomize_sign(a, buf_size * 2);
+
+        #define CALL(func) \
+            call(#func, a, b, count, func)
+
+        for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
+        {
+            size_t count = 1 << i;
+
+            CALL(generic::h_dotp);
+            IF_ARCH_X86(CALL(sse::h_dotp));
+            IF_ARCH_X86(CALL(avx::h_dotp));
+            IF_ARCH_X86(CALL(avx512::h_dotp));
+            IF_ARCH_ARM(CALL(neon_d32::h_dotp));
+            IF_ARCH_AARCH64(CALL(asimd::h_dotp));
+            PTEST_SEPARATOR;
+        }
+
+        free_aligned(data);
+    }
+
+PTEST_END
+
+
+
diff --git a/src/test/ptest/hmath/h_sqr_dotp.cpp b/src/test/ptest/hmath/h_sqr_dotp.cpp
new file mode 100644
index 00000000..90cedd06
--- /dev/null
+++ b/src/test/ptest/hmath/h_sqr_dotp.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 11 дек. 2024 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <lsp-plug.in/common/alloc.h>
+#include <lsp-plug.in/common/types.h>
+#include <lsp-plug.in/dsp/dsp.h>
+#include <lsp-plug.in/test-fw/helpers.h>
+#include <lsp-plug.in/test-fw/ptest.h>
+
+#define MIN_RANK 5
+#define MAX_RANK 16
+
+namespace lsp
+{
+    namespace generic
+    {
+        float h_sqr_dotp(const float *a, const float *b, size_t count);
+    }
+
+    IF_ARCH_X86(
+        namespace sse
+        {
+            float h_sqr_dotp(const float *a, const float *b, size_t count);
+        }
+
+        namespace avx
+        {
+            float h_sqr_dotp(const float *a, const float *b, size_t count);
+        }
+
+        namespace avx512
+        {
+            float h_sqr_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    IF_ARCH_ARM(
+        namespace neon_d32
+        {
+            float h_sqr_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    IF_ARCH_AARCH64(
+        namespace asimd
+        {
+            float h_sqr_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
+}
+
+PTEST_BEGIN("dsp.hmath", h_sqr_dotp, 5, 5000)
+
+    void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
+    {
+        if (!PTEST_SUPPORTED(func))
+            return;
+
+        char buf[80];
+        snprintf(buf, sizeof(buf), "%s x %d", label, int(count));
+        printf("Testing %s numbers...\n", buf);
+
+        PTEST_LOOP(buf,
+            func(a, b, count);
+        );
+    }
+
+    PTEST_MAIN
+    {
+        size_t buf_size = 1 << MAX_RANK;
+        uint8_t *data   = NULL;
+        float *a        = alloc_aligned<float>(data, buf_size * 2, 64);
+        float *b        = &a[buf_size];
+
+        randomize_sign(a, buf_size * 2);
+
+        #define CALL(func) \
+            call(#func, a, b, count, func)
+
+        for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
+        {
+            size_t count = 1 << i;
+
+            CALL(generic::h_sqr_dotp);
+            IF_ARCH_X86(CALL(sse::h_sqr_dotp));
+            IF_ARCH_X86(CALL(avx::h_sqr_dotp));
+            IF_ARCH_X86(CALL(avx512::h_sqr_dotp));
+            IF_ARCH_ARM(CALL(neon_d32::h_sqr_dotp));
+            IF_ARCH_AARCH64(CALL(asimd::h_sqr_dotp));
+            PTEST_SEPARATOR;
+        }
+
+        free_aligned(data);
+    }
+
+PTEST_END
+
+
+
diff --git a/src/test/ptest/hmath/h_sqr_sum.cpp b/src/test/ptest/hmath/h_sqr_sum.cpp
index 11517a20..d6e9d1e8 100644
--- a/src/test/ptest/hmath/h_sqr_sum.cpp
+++ b/src/test/ptest/hmath/h_sqr_sum.cpp
@@ -24,7 +24,7 @@
 #include <lsp-plug.in/test-fw/helpers.h>
 #include <lsp-plug.in/common/alloc.h>
 
-#define MIN_RANK 8
+#define MIN_RANK 5
 #define MAX_RANK 16
 
 namespace lsp
@@ -69,7 +69,7 @@ namespace lsp
     typedef float (* h_sum_t)(const float *src, size_t count);
 }
 
-PTEST_BEGIN("dsp.hmath", h_sqr_sum, 5, 10000)
+PTEST_BEGIN("dsp.hmath", h_sqr_sum, 5, 5000)
 
     void call(const char *label, float *src, size_t count, h_sum_t func)
     {
diff --git a/src/test/ptest/hmath/h_sum.cpp b/src/test/ptest/hmath/h_sum.cpp
index 1ced0f0f..020cf79d 100644
--- a/src/test/ptest/hmath/h_sum.cpp
+++ b/src/test/ptest/hmath/h_sum.cpp
@@ -24,7 +24,7 @@
 #include <lsp-plug.in/test-fw/helpers.h>
 #include <lsp-plug.in/common/alloc.h>
 
-#define MIN_RANK 8
+#define MIN_RANK 5
 #define MAX_RANK 16
 
 namespace lsp
@@ -68,7 +68,7 @@ namespace lsp
     typedef float (* h_sum_t)(const float *src, size_t count);
 }
 
-PTEST_BEGIN("dsp.hmath", h_sum, 5, 10000)
+PTEST_BEGIN("dsp.hmath", h_sum, 5, 5000)
 
     void call(const char *label, float *src, size_t count, h_sum_t func)
     {
diff --git a/src/test/utest/hmath/hdotp.cpp b/src/test/utest/hmath/h_abs_dotp.cpp
similarity index 70%
rename from src/test/utest/hmath/hdotp.cpp
rename to src/test/utest/hmath/h_abs_dotp.cpp
index 276538ba..25926c23 100644
--- a/src/test/utest/hmath/hdotp.cpp
+++ b/src/test/utest/hmath/h_abs_dotp.cpp
@@ -1,9 +1,9 @@
 /*
- * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
- *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
  *
  * This file is part of lsp-dsp-lib
- * Created on: 31 мар. 2020 г.
+ * Created on: 11 дек. 2024 г.
  *
  * lsp-dsp-lib is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,23 +32,22 @@ namespace lsp
 {
     namespace generic
     {
-        float h_dotp(const float *a, const float *b, size_t count);
-        float h_sqr_dotp(const float *a, const float *b, size_t count);
         float h_abs_dotp(const float *a, const float *b, size_t count);
     }
 
     IF_ARCH_X86(
         namespace sse
         {
-            float h_dotp(const float *a, const float *b, size_t count);
-            float h_sqr_dotp(const float *a, const float *b, size_t count);
             float h_abs_dotp(const float *a, const float *b, size_t count);
         }
 
         namespace avx
         {
-            float h_dotp(const float *a, const float *b, size_t count);
-            float h_sqr_dotp(const float *a, const float *b, size_t count);
+            float h_abs_dotp(const float *a, const float *b, size_t count);
+        }
+
+        namespace avx512
+        {
             float h_abs_dotp(const float *a, const float *b, size_t count);
         }
     )
@@ -56,8 +55,6 @@ namespace lsp
     IF_ARCH_ARM(
         namespace neon_d32
         {
-            float h_dotp(const float *a, const float *b, size_t count);
-            float h_sqr_dotp(const float *a, const float *b, size_t count);
             float h_abs_dotp(const float *a, const float *b, size_t count);
         }
     )
@@ -65,8 +62,6 @@ namespace lsp
     IF_ARCH_AARCH64(
         namespace asimd
         {
-            float h_dotp(const float *a, const float *b, size_t count);
-            float h_sqr_dotp(const float *a, const float *b, size_t count);
             float h_abs_dotp(const float *a, const float *b, size_t count);
         }
     )
@@ -74,7 +69,7 @@ namespace lsp
     typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
 }
 
-UTEST_BEGIN("dsp.hmath", hdotp)
+UTEST_BEGIN("dsp.hmath", h_abs_dotp)
 
     void call(const char *label, size_t align, h_dotp_t func1, h_dotp_t func2)
     {
@@ -119,20 +114,10 @@ UTEST_BEGIN("dsp.hmath", hdotp)
         #define CALL(generic, func, align) \
             call(#func, align, generic, func);
 
-        IF_ARCH_X86(CALL(generic::h_dotp, sse::h_dotp, 16));
-        IF_ARCH_X86(CALL(generic::h_sqr_dotp, sse::h_sqr_dotp, 16));
         IF_ARCH_X86(CALL(generic::h_abs_dotp, sse::h_abs_dotp, 16));
-
-        IF_ARCH_X86(CALL(generic::h_dotp, avx::h_dotp, 32));
-        IF_ARCH_X86(CALL(generic::h_sqr_dotp, avx::h_sqr_dotp, 32));
         IF_ARCH_X86(CALL(generic::h_abs_dotp, avx::h_abs_dotp, 32));
-
-        IF_ARCH_ARM(CALL(generic::h_dotp, neon_d32::h_dotp, 16));
-        IF_ARCH_ARM(CALL(generic::h_sqr_dotp, neon_d32::h_sqr_dotp, 16));
+        IF_ARCH_X86(CALL(generic::h_abs_dotp, avx512::h_abs_dotp, 64));
         IF_ARCH_ARM(CALL(generic::h_abs_dotp, neon_d32::h_abs_dotp, 16));
-
-        IF_ARCH_AARCH64(CALL(generic::h_dotp, asimd::h_dotp, 16));
-        IF_ARCH_AARCH64(CALL(generic::h_sqr_dotp, asimd::h_sqr_dotp, 16));
         IF_ARCH_AARCH64(CALL(generic::h_abs_dotp, asimd::h_abs_dotp, 16));
     }
 UTEST_END
diff --git a/src/test/utest/hmath/h_dotp.cpp b/src/test/utest/hmath/h_dotp.cpp
new file mode 100644
index 00000000..11ae2b9e
--- /dev/null
+++ b/src/test/utest/hmath/h_dotp.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 31 мар. 2020 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <lsp-plug.in/common/types.h>
+#include <lsp-plug.in/test-fw/utest.h>
+#include <lsp-plug.in/test-fw/FloatBuffer.h>
+#include <lsp-plug.in/test-fw/helpers.h>
+
+#ifndef TOLERANCE
+    #define TOLERANCE 1e-4
+#endif
+
+namespace lsp
+{
+    namespace generic
+    {
+        float h_dotp(const float *a, const float *b, size_t count);
+    }
+
+    IF_ARCH_X86(
+        namespace sse
+        {
+            float h_dotp(const float *a, const float *b, size_t count);
+        }
+
+        namespace avx
+        {
+            float h_dotp(const float *a, const float *b, size_t count);
+        }
+
+        namespace avx512
+        {
+            float h_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    IF_ARCH_ARM(
+        namespace neon_d32
+        {
+            float h_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    IF_ARCH_AARCH64(
+        namespace asimd
+        {
+            float h_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
+}
+
+UTEST_BEGIN("dsp.hmath", h_dotp)
+
+    void call(const char *label, size_t align, h_dotp_t func1, h_dotp_t func2)
+    {
+        if (!UTEST_SUPPORTED(func1))
+            return;
+        if (!UTEST_SUPPORTED(func2))
+            return;
+
+        UTEST_FOREACH(count, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                32, 64, 65, 100, 768, 999, 0x1fff)
+        {
+            for (size_t mask=0; mask <= 0x03; ++mask)
+            {
+                printf("Testing %s on input buffer of %d numbers, mask=0x%x...\n", label, int(count), int(mask));
+
+                FloatBuffer a(count, align, mask & 0x01);
+                FloatBuffer b(count, align, mask & 0x02);
+
+                a.randomize_sign();
+                b.randomize_sign();
+
+                // Call functions
+                float xa = func1(a, b, count);
+                float xb = func2(a, b, count);
+
+                UTEST_ASSERT_MSG(a.valid(), "Source buffer A corrupted");
+                UTEST_ASSERT_MSG(b.valid(), "Source buffer B corrupted");
+
+                // Compare buffers
+                if (!float_equals_adaptive(xa, xb, TOLERANCE))
+                {
+                    a.dump("A");
+                    b.dump("B");
+                    UTEST_FAIL_MSG("%s: Result of function 1 (%f) differs result of function 2 (%f)", label, xa, xb);
+                }
+            }
+        }
+    }
+
+    UTEST_MAIN
+    {
+        #define CALL(generic, func, align) \
+            call(#func, align, generic, func);
+
+        IF_ARCH_X86(CALL(generic::h_dotp, sse::h_dotp, 16));
+        IF_ARCH_X86(CALL(generic::h_dotp, avx::h_dotp, 32));
+        IF_ARCH_X86(CALL(generic::h_dotp, avx512::h_dotp, 64));
+        IF_ARCH_ARM(CALL(generic::h_dotp, neon_d32::h_dotp, 16));
+        IF_ARCH_AARCH64(CALL(generic::h_dotp, asimd::h_dotp, 16));
+    }
+UTEST_END
diff --git a/src/test/utest/hmath/h_sqr_dotp.cpp b/src/test/utest/hmath/h_sqr_dotp.cpp
new file mode 100644
index 00000000..d430a175
--- /dev/null
+++ b/src/test/utest/hmath/h_sqr_dotp.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 11 дек. 2024 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <lsp-plug.in/common/types.h>
+#include <lsp-plug.in/test-fw/utest.h>
+#include <lsp-plug.in/test-fw/FloatBuffer.h>
+#include <lsp-plug.in/test-fw/helpers.h>
+
+#ifndef TOLERANCE
+    #define TOLERANCE 1e-4
+#endif
+
+namespace lsp
+{
+    namespace generic
+    {
+        float h_sqr_dotp(const float *a, const float *b, size_t count);
+    }
+
+    IF_ARCH_X86(
+        namespace sse
+        {
+            float h_sqr_dotp(const float *a, const float *b, size_t count);
+        }
+
+        namespace avx
+        {
+            float h_sqr_dotp(const float *a, const float *b, size_t count);
+        }
+
+        namespace avx512
+        {
+            float h_sqr_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    IF_ARCH_ARM(
+        namespace neon_d32
+        {
+            float h_sqr_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    IF_ARCH_AARCH64(
+        namespace asimd
+        {
+            float h_sqr_dotp(const float *a, const float *b, size_t count);
+        }
+    )
+
+    typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
+}
+
+UTEST_BEGIN("dsp.hmath", h_sqr_dotp)
+
+    void call(const char *label, size_t align, h_dotp_t func1, h_dotp_t func2)
+    {
+        if (!UTEST_SUPPORTED(func1))
+            return;
+        if (!UTEST_SUPPORTED(func2))
+            return;
+
+        UTEST_FOREACH(count, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                32, 64, 65, 100, 768, 999, 0x1fff)
+        {
+            for (size_t mask=0; mask <= 0x03; ++mask)
+            {
+                printf("Testing %s on input buffer of %d numbers, mask=0x%x...\n", label, int(count), int(mask));
+
+                FloatBuffer a(count, align, mask & 0x01);
+                FloatBuffer b(count, align, mask & 0x02);
+
+                a.randomize_sign();
+                b.randomize_sign();
+
+                // Call functions
+                float xa = func1(a, b, count);
+                float xb = func2(a, b, count);
+
+                UTEST_ASSERT_MSG(a.valid(), "Source buffer A corrupted");
+                UTEST_ASSERT_MSG(b.valid(), "Source buffer B corrupted");
+
+                // Compare buffers
+                if (!float_equals_adaptive(xa, xb, TOLERANCE))
+                {
+                    a.dump("A");
+                    b.dump("B");
+                    UTEST_FAIL_MSG("%s: Result of function 1 (%f) differs result of function 2 (%f)", label, xa, xb);
+                }
+            }
+        }
+    }
+
+    UTEST_MAIN
+    {
+        #define CALL(generic, func, align) \
+            call(#func, align, generic, func);
+
+        IF_ARCH_X86(CALL(generic::h_sqr_dotp, sse::h_sqr_dotp, 16));
+        IF_ARCH_X86(CALL(generic::h_sqr_dotp, avx::h_sqr_dotp, 32));
+        IF_ARCH_X86(CALL(generic::h_sqr_dotp, avx512::h_sqr_dotp, 64));
+        IF_ARCH_ARM(CALL(generic::h_sqr_dotp, neon_d32::h_sqr_dotp, 16));
+        IF_ARCH_AARCH64(CALL(generic::h_sqr_dotp, asimd::h_sqr_dotp, 16));
+    }
+UTEST_END