From 9b85f424474a4811c72448102c347c8f0253775f Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Mon, 6 Nov 2023 13:35:38 +0100 Subject: [PATCH] fix OpenBLAS on newer Intel CPUs --- .../o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.2.0.eb | 3 + .../o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.3.0.eb | 13 ++-- .../OpenBLAS-0.3.20-NVHPC-22.7-CUDA-11.7.0.eb | 3 + ...xASUM-microkernels-on-new-intel-cpus.patch | 65 +++++++++++++++++++ .../o/OpenBLAS/OpenBLAS-0.3.21-GCC-12.2.0.eb | 3 + .../o/OpenBLAS/OpenBLAS-0.3.23-GCC-12.3.0.eb | 3 + .../o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb | 3 + 7 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.2.0.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.2.0.eb index 03bd16ce48f..48991f92ef5 100644 --- a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.2.0.eb +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.2.0.eb @@ -17,6 +17,7 @@ patches = [ ('timing.tgz', '.'), 'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch', 'OpenBLAS-0.3.20_fix-x86-cpuid.patch', + 'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch', 'OpenBLAS-0.3.21_fix-order-vectorization.patch', 'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch', 'OpenBLAS-0.3.21_avoid-crash-in-zdot.patch', @@ -28,6 +29,8 @@ checksums = [ {'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch': 'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'}, {'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'}, + {'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch': + '1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'}, {'OpenBLAS-0.3.21_fix-order-vectorization.patch': '08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'}, {'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch': diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.3.0.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.3.0.eb index 5b104219e11..96a3218f027 100644 --- a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.3.0.eb +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.3.0.eb @@ -16,11 +16,12 @@ patches = [ ('large.tgz', '.'), ('timing.tgz', '.'), 'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch', + 'OpenBLAS-0.3.20_fix-cpuid-neoverse-v1-n2.patch', + 'OpenBLAS-0.3.20_fix-x86-cpuid.patch', + 'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch', 'OpenBLAS-0.3.21_fix-order-vectorization.patch', 'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch', 'OpenBLAS-0.3.21_avoid-crash-in-zdot.patch', - 'OpenBLAS-0.3.20_fix-cpuid-neoverse-v1-n2.patch', - 'OpenBLAS-0.3.20_fix-x86-cpuid.patch', ] checksums = [ {'v0.3.20.tar.gz': '8495c9affc536253648e942908e88e097f2ec7753ede55aca52e5dead3029e3c'}, @@ -28,14 +29,16 @@ checksums = [ {'timing.tgz': '999c65f8ea8bd4eac7f1c7f3463d4946917afd20a997807300fe35d70122f3af'}, {'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch': 'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'}, + {'OpenBLAS-0.3.20_fix-cpuid-neoverse-v1-n2.patch': + '1b495465f8dd1e151d74cf5aa4288120361d29164d6a377228a8d51c255b8a9e'}, + {'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'}, + {'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch': + '1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'}, {'OpenBLAS-0.3.21_fix-order-vectorization.patch': '08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'}, {'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch': 'bd6836206a883208dc8bc997946f97e4c97d91d8e101fc54db414aaa56902fc3'}, {'OpenBLAS-0.3.21_avoid-crash-in-zdot.patch': '3dac2c1ec896df574f1b37cde81a16f24550b7f1eb81fbfacb0c4449b0dc7894'}, - {'OpenBLAS-0.3.20_fix-cpuid-neoverse-v1-n2.patch': - '1b495465f8dd1e151d74cf5aa4288120361d29164d6a377228a8d51c255b8a9e'}, - {'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'}, ] builddependencies = [ diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-NVHPC-22.7-CUDA-11.7.0.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-NVHPC-22.7-CUDA-11.7.0.eb index cbf11c2a295..d7442a5c7d8 100644 --- a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-NVHPC-22.7-CUDA-11.7.0.eb +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-NVHPC-22.7-CUDA-11.7.0.eb @@ -17,6 +17,7 @@ patches = [ ('timing.tgz', '.'), 'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch', 'OpenBLAS-0.3.20_fix-x86-cpuid.patch', + 'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch', 'OpenBLAS-0.3.21_fix-order-vectorization.patch', 'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch', 'OpenBLAS-0.3.21_avoid-crash-in-zdot.patch', @@ -28,6 +29,8 @@ checksums = [ {'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch': 'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'}, {'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'}, + {'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch': + '1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'}, {'OpenBLAS-0.3.21_fix-order-vectorization.patch': '08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'}, {'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch': diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch new file mode 100644 index 00000000000..a342b04b79f --- /dev/null +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch @@ -0,0 +1,65 @@ +From 9019bc494514a74c2042152cdca0a36adea7b42f Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Sat, 4 Nov 2023 22:10:06 +0100 +Subject: [PATCH] Use SkylakeX ?ASUM microkernel for Cooperlake/Sapphirerapids + as well + +--- + kernel/x86_64/casum.c | 2 +- + kernel/x86_64/dasum.c | 2 +- + kernel/x86_64/sasum.c | 2 +- + kernel/x86_64/zasum.c | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/casum.c b/kernel/x86_64/casum.c +index 60feec0ceb..e4d0543114 100644 +--- a/kernel/x86_64/casum.c ++++ b/kernel/x86_64/casum.c +@@ -4,7 +4,7 @@ + #define ABS_K(a) ((a) > 0 ? (a) : (-(a))) + #endif + +-#if defined(SKYLAKEX) ++#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) + #include "casum_microk_skylakex-2.c" + #endif + +diff --git a/kernel/x86_64/dasum.c b/kernel/x86_64/dasum.c +index a9c40f38f0..0147c6978a 100644 +--- a/kernel/x86_64/dasum.c ++++ b/kernel/x86_64/dasum.c +@@ -4,7 +4,7 @@ + #define ABS_K(a) ((a) > 0 ? (a) : (-(a))) + #endif + +-#if defined(SKYLAKEX) ++#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) + #include "dasum_microk_skylakex-2.c" + #elif defined(HASWELL) || defined(ZEN) + #include "dasum_microk_haswell-2.c" +diff --git a/kernel/x86_64/sasum.c b/kernel/x86_64/sasum.c +index 37a92468ff..3f22cb97a1 100644 +--- a/kernel/x86_64/sasum.c ++++ b/kernel/x86_64/sasum.c +@@ -9,7 +9,7 @@ + + #endif + +-#if defined(SKYLAKEX) ++#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) + #include "sasum_microk_skylakex-2.c" + #elif defined(HASWELL) || defined(ZEN) + #include "sasum_microk_haswell-2.c" +diff --git a/kernel/x86_64/zasum.c b/kernel/x86_64/zasum.c +index 80e95a2c89..3f17ab1cfa 100644 +--- a/kernel/x86_64/zasum.c ++++ b/kernel/x86_64/zasum.c +@@ -4,7 +4,7 @@ + #define ABS_K(a) ((a) > 0 ? (a) : (-(a))) + #endif + +-#if defined(SKYLAKEX) ++#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) + #include "zasum_microk_skylakex-2.c" + #endif + diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.21-GCC-12.2.0.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.21-GCC-12.2.0.eb index 66b40a6a8a1..7e7d8c4b75f 100644 --- a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.21-GCC-12.2.0.eb +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.21-GCC-12.2.0.eb @@ -17,6 +17,7 @@ patches = [ ('timing.tgz', '.'), 'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch', 'OpenBLAS-0.3.20_fix-x86-cpuid.patch', + 'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch', 'OpenBLAS-0.3.21_fix-order-vectorization.patch', 'OpenBLAS-0.3.21-GCC-12.2.0_disable-fma-in-cscal-zscal.patch', 'OpenBLAS-0.3.21_avoid-crash-in-zdot.patch', @@ -28,6 +29,8 @@ checksums = [ {'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch': 'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'}, {'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'}, + {'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch': + '1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'}, {'OpenBLAS-0.3.21_fix-order-vectorization.patch': '08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'}, {'OpenBLAS-0.3.21-GCC-12.2.0_disable-fma-in-cscal-zscal.patch': diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.23-GCC-12.3.0.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.23-GCC-12.3.0.eb index 8051da228f0..490f623ac15 100644 --- a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.23-GCC-12.3.0.eb +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.23-GCC-12.3.0.eb @@ -17,6 +17,7 @@ patches = [ ('timing.tgz', '.'), 'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch', 'OpenBLAS-0.3.20_fix-x86-cpuid.patch', + 'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch', 'OpenBLAS-0.3.21_fix-order-vectorization.patch', 'OpenBLAS-0.3.23_fix-parallel-build.patch', 'OpenBLAS-0.3.23_fix-lapack-test.patch', @@ -30,6 +31,8 @@ checksums = [ {'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch': 'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'}, {'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'}, + {'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch': + '1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'}, {'OpenBLAS-0.3.21_fix-order-vectorization.patch': '08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'}, {'OpenBLAS-0.3.23_fix-parallel-build.patch': 'abe10ba3b0ca54772dbf235596e35325a5159018f6a60cfc88824c2c220d99d9'}, diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb index f3358f87771..eb7737a76aa 100644 --- a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb @@ -16,6 +16,7 @@ patches = [ ('large.tgz', '.'), ('timing.tgz', '.'), 'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch', + 'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch', 'OpenBLAS-0.3.21_fix-order-vectorization.patch', 'OpenBLAS-0.3.23_fix-lapack-test.patch', 'OpenBLAS-0.3.23_disable-DDRGES3-LAPACK-test.patch', @@ -26,6 +27,8 @@ checksums = [ {'timing.tgz': '999c65f8ea8bd4eac7f1c7f3463d4946917afd20a997807300fe35d70122f3af'}, {'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch': 'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'}, + {'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch': + '1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'}, {'OpenBLAS-0.3.21_fix-order-vectorization.patch': '08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'}, {'OpenBLAS-0.3.23_fix-lapack-test.patch': 'f6b3d81061f136e34aaf5359bb80fb9d2bba28825cc1dd26179b8dd01a9a0054'},