From 0425cc6a0eb3637758a6eaf3de5385e46357d932 Mon Sep 17 00:00:00 2001
From: Junchao Zhang <jczhang@anl.gov>
Date: Thu, 15 Feb 2024 16:33:47 -0600
Subject: [PATCH] Add a workaround for compilation errors with cuda-12.2.0 +
 gcc-12.3 (#2108)

On Perlmutter@NERSC, I met this error

/usr/lib64/gcc/x86_64-suse-linux/12/include/avx512fp16intrin.h(38): error: vector_size attribute requires an arithmetic or enum type
   typedef __half __v8hf __attribute__ ((__vector_size__ (16)));

The workaround was mentioned at https://forums.developer.nvidia.com/t/including-cub-header-breakes-compilation-with-gcc-12-and-sse2-or-better/255018
---
 batched/dense/src/KokkosBatched_Vector_SIMD.hpp | 8 ++++++++
 1 file changed, 8 insertions(+)
diff --git a/batched/dense/src/KokkosBatched_Vector_SIMD.hpp b/batched/dense/src/KokkosBatched_Vector_SIMD.hpp
index e27419e7c2..753904dbb9 100644
--- a/batched/dense/src/KokkosBatched_Vector_SIMD.hpp
+++ b/batched/dense/src/KokkosBatched_Vector_SIMD.hpp
@@ -513,6 +513,11 @@ class Vector<SIMD<double>, 4> {
 
 #if defined(__KOKKOSBATCHED_ENABLE_AVX__)
 #if defined(__AVX__) || defined(__AVX2__)
+
+#if CUDA_VERSION < 12022
+#undef _Float16
+#endif
+
 #include <immintrin.h>
 
 namespace KokkosBatched {
@@ -668,6 +673,9 @@ class Vector<SIMD<Kokkos::complex<double> >, 2> {
 #endif /* #if defined(__AVX__) || defined(__AVX2__) */
 
 #if defined(__AVX512F__)
+#if CUDA_VERSION < 12022
+#undef _Float16
+#endif
 #include <immintrin.h>
 
 namespace KokkosBatched {