Skip to content

Commit

Permalink
src/batched/dense: Barrier after broadcast
Browse files Browse the repository at this point in the history
  - A timing bug was hidden with the lenthier epilogue generated by
  gcc 7.2.0. This commit adds a memory barrier for GNU compilers after
  avx512 broadcast intrinsics to ensure the broadcasted writes land before
  the memory locations are read from. Fixes kokkos#1512.
  • Loading branch information
e10harvey committed Sep 12, 2022
1 parent 26f8704 commit 0f5c8cc
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/KokkosKernels_Macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@
#else
#define KOKKOSKERNELS_UNUSED_ATTRIBUTE
#endif // __GNUC__

#if defined(KOKKOS_COMPILER_GNU)
#define KOKKOSKERNELS_GNU_COMPILER_FENCE __sync_synchronize();
#else
#define KOKKOSKERNELS_GNU_COMPILER_FENCE
#endif // KOKKOS_COMPILER_GNU
/******* END other helper macros *******/

#endif // KOKKOSKERNELS_MACROS_HPP_
4 changes: 4 additions & 0 deletions src/batched/dense/KokkosBatched_Vector_SIMD.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,10 +591,12 @@ class Vector<SIMD<Kokkos::complex<double> >, 2> {
inline Vector() { _data = _mm256_setzero_pd(); }
inline Vector(const value_type &val) {
_data = _mm256_broadcast_pd((const __m128d *)&val);
KOKKOSKERNELS_GNU_COMPILER_FENCE
}
inline Vector(const mag_type &val) {
const value_type a(val);
_data = _mm256_broadcast_pd((__m128d const *)&a);
KOKKOSKERNELS_GNU_COMPILER_FENCE
}
inline Vector(const type &b) { _data = b._data; }
inline Vector(const __m256d &val) { _data = val; }
Expand Down Expand Up @@ -744,10 +746,12 @@ class Vector<SIMD<Kokkos::complex<double> >, 4> {
inline Vector(const value_type &val) {
_data = _mm512_mask_broadcast_f64x4(_mm512_set1_pd(val.imag()), 0x55,
_mm256_set1_pd(val.real()));
KOKKOSKERNELS_GNU_COMPILER_FENCE
}
inline Vector(const mag_type &val) {
_data = _mm512_mask_broadcast_f64x4(_mm512_setzero_pd(), 0x55,
_mm256_set1_pd(val));
KOKKOSKERNELS_GNU_COMPILER_FENCE
}
inline Vector(const type &b) { _data = b._data; }
inline Vector(const __m512d &val) { _data = val; }
Expand Down

0 comments on commit 0f5c8cc

Please sign in to comment.