Skip to content

Commit

Permalink
Merge pull request easybuilders#19159 from Flamefire/20231106133537_n…
Browse files Browse the repository at this point in the history
…ew_pr_OpenBLAS0320

fix OpenBLAS on newer Intel CPUs
  • Loading branch information
Micket authored Nov 7, 2023
2 parents e8383fa + 9b85f42 commit c757d75
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ patches = [
('timing.tgz', '.'),
'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch',
'OpenBLAS-0.3.20_fix-x86-cpuid.patch',
'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch',
'OpenBLAS-0.3.21_fix-order-vectorization.patch',
'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch',
'OpenBLAS-0.3.21_avoid-crash-in-zdot.patch',
Expand All @@ -28,6 +29,8 @@ checksums = [
{'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch':
'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'},
{'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'},
{'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch':
'1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'},
{'OpenBLAS-0.3.21_fix-order-vectorization.patch':
'08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'},
{'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch':
Expand Down
13 changes: 8 additions & 5 deletions easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.20-GCC-11.3.0.eb
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,29 @@ patches = [
('large.tgz', '.'),
('timing.tgz', '.'),
'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch',
'OpenBLAS-0.3.20_fix-cpuid-neoverse-v1-n2.patch',
'OpenBLAS-0.3.20_fix-x86-cpuid.patch',
'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch',
'OpenBLAS-0.3.21_fix-order-vectorization.patch',
'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch',
'OpenBLAS-0.3.21_avoid-crash-in-zdot.patch',
'OpenBLAS-0.3.20_fix-cpuid-neoverse-v1-n2.patch',
'OpenBLAS-0.3.20_fix-x86-cpuid.patch',
]
checksums = [
{'v0.3.20.tar.gz': '8495c9affc536253648e942908e88e097f2ec7753ede55aca52e5dead3029e3c'},
{'large.tgz': 'f328d88b7fa97722f271d7d0cfea1c220e0f8e5ed5ff01d8ef1eb51d6f4243a1'},
{'timing.tgz': '999c65f8ea8bd4eac7f1c7f3463d4946917afd20a997807300fe35d70122f3af'},
{'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch':
'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'},
{'OpenBLAS-0.3.20_fix-cpuid-neoverse-v1-n2.patch':
'1b495465f8dd1e151d74cf5aa4288120361d29164d6a377228a8d51c255b8a9e'},
{'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'},
{'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch':
'1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'},
{'OpenBLAS-0.3.21_fix-order-vectorization.patch':
'08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'},
{'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch':
'bd6836206a883208dc8bc997946f97e4c97d91d8e101fc54db414aaa56902fc3'},
{'OpenBLAS-0.3.21_avoid-crash-in-zdot.patch': '3dac2c1ec896df574f1b37cde81a16f24550b7f1eb81fbfacb0c4449b0dc7894'},
{'OpenBLAS-0.3.20_fix-cpuid-neoverse-v1-n2.patch':
'1b495465f8dd1e151d74cf5aa4288120361d29164d6a377228a8d51c255b8a9e'},
{'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'},
]

builddependencies = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ patches = [
('timing.tgz', '.'),
'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch',
'OpenBLAS-0.3.20_fix-x86-cpuid.patch',
'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch',
'OpenBLAS-0.3.21_fix-order-vectorization.patch',
'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch',
'OpenBLAS-0.3.21_avoid-crash-in-zdot.patch',
Expand All @@ -28,6 +29,8 @@ checksums = [
{'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch':
'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'},
{'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'},
{'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch':
'1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'},
{'OpenBLAS-0.3.21_fix-order-vectorization.patch':
'08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'},
{'OpenBLAS-0.3.21_disable-fma-in-cscal-zscal.patch':
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
From 9019bc494514a74c2042152cdca0a36adea7b42f Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Sat, 4 Nov 2023 22:10:06 +0100
Subject: [PATCH] Use SkylakeX ?ASUM microkernel for Cooperlake/Sapphirerapids
as well

---
kernel/x86_64/casum.c | 2 +-
kernel/x86_64/dasum.c | 2 +-
kernel/x86_64/sasum.c | 2 +-
kernel/x86_64/zasum.c | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/x86_64/casum.c b/kernel/x86_64/casum.c
index 60feec0ceb..e4d0543114 100644
--- a/kernel/x86_64/casum.c
+++ b/kernel/x86_64/casum.c
@@ -4,7 +4,7 @@
#define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
#endif

-#if defined(SKYLAKEX)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#include "casum_microk_skylakex-2.c"
#endif

diff --git a/kernel/x86_64/dasum.c b/kernel/x86_64/dasum.c
index a9c40f38f0..0147c6978a 100644
--- a/kernel/x86_64/dasum.c
+++ b/kernel/x86_64/dasum.c
@@ -4,7 +4,7 @@
#define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
#endif

-#if defined(SKYLAKEX)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#include "dasum_microk_skylakex-2.c"
#elif defined(HASWELL) || defined(ZEN)
#include "dasum_microk_haswell-2.c"
diff --git a/kernel/x86_64/sasum.c b/kernel/x86_64/sasum.c
index 37a92468ff..3f22cb97a1 100644
--- a/kernel/x86_64/sasum.c
+++ b/kernel/x86_64/sasum.c
@@ -9,7 +9,7 @@

#endif

-#if defined(SKYLAKEX)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#include "sasum_microk_skylakex-2.c"
#elif defined(HASWELL) || defined(ZEN)
#include "sasum_microk_haswell-2.c"
diff --git a/kernel/x86_64/zasum.c b/kernel/x86_64/zasum.c
index 80e95a2c89..3f17ab1cfa 100644
--- a/kernel/x86_64/zasum.c
+++ b/kernel/x86_64/zasum.c
@@ -4,7 +4,7 @@
#define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
#endif

-#if defined(SKYLAKEX)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#include "zasum_microk_skylakex-2.c"
#endif

Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ patches = [
('timing.tgz', '.'),
'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch',
'OpenBLAS-0.3.20_fix-x86-cpuid.patch',
'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch',
'OpenBLAS-0.3.21_fix-order-vectorization.patch',
'OpenBLAS-0.3.21-GCC-12.2.0_disable-fma-in-cscal-zscal.patch',
'OpenBLAS-0.3.21_avoid-crash-in-zdot.patch',
Expand All @@ -28,6 +29,8 @@ checksums = [
{'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch':
'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'},
{'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'},
{'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch':
'1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'},
{'OpenBLAS-0.3.21_fix-order-vectorization.patch':
'08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'},
{'OpenBLAS-0.3.21-GCC-12.2.0_disable-fma-in-cscal-zscal.patch':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ patches = [
('timing.tgz', '.'),
'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch',
'OpenBLAS-0.3.20_fix-x86-cpuid.patch',
'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch',
'OpenBLAS-0.3.21_fix-order-vectorization.patch',
'OpenBLAS-0.3.23_fix-parallel-build.patch',
'OpenBLAS-0.3.23_fix-lapack-test.patch',
Expand All @@ -30,6 +31,8 @@ checksums = [
{'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch':
'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'},
{'OpenBLAS-0.3.20_fix-x86-cpuid.patch': '57e8384404e136b9f0dafc26573adeb7dc69e60d84a7e189643b91d6299888fc'},
{'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch':
'1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'},
{'OpenBLAS-0.3.21_fix-order-vectorization.patch':
'08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'},
{'OpenBLAS-0.3.23_fix-parallel-build.patch': 'abe10ba3b0ca54772dbf235596e35325a5159018f6a60cfc88824c2c220d99d9'},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ patches = [
('large.tgz', '.'),
('timing.tgz', '.'),
'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch',
'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch',
'OpenBLAS-0.3.21_fix-order-vectorization.patch',
'OpenBLAS-0.3.23_fix-lapack-test.patch',
'OpenBLAS-0.3.23_disable-DDRGES3-LAPACK-test.patch',
Expand All @@ -26,6 +27,8 @@ checksums = [
{'timing.tgz': '999c65f8ea8bd4eac7f1c7f3463d4946917afd20a997807300fe35d70122f3af'},
{'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch':
'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'},
{'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch':
'1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'},
{'OpenBLAS-0.3.21_fix-order-vectorization.patch':
'08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'},
{'OpenBLAS-0.3.23_fix-lapack-test.patch': 'f6b3d81061f136e34aaf5359bb80fb9d2bba28825cc1dd26179b8dd01a9a0054'},
Expand Down

0 comments on commit c757d75

Please sign in to comment.