diff --git a/sycl/include/sycl/ext/intel/esimd/memory.hpp b/sycl/include/sycl/ext/intel/esimd/memory.hpp index 904dcfd8d7a45..8d039fa5b20bd 100644 --- a/sycl/include/sycl/ext/intel/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/esimd/memory.hpp @@ -9263,8 +9263,37 @@ __ESIMD_API std::enable_if_t< detail::is_simd_view_type_v, simd> atomic_update(T *p, OffsetSimdViewT byte_offset, PropertyListT props = {}) { - simd_mask mask = 1; - return atomic_update(p, byte_offset.read(), mask, props); + return atomic_update(p, byte_offset.read(), props); +} + +/// simd +/// atomic_update(T *p, OffsetSimdViewT byte_offset, +/// props = {}); +/// +/// A variation of \c atomic_update API with \c offsets represented as +/// \c simd_view object without mask operand and allows the use without +/// specifying \c T and \c N template parameters. +/// +/// @tparam Op The atomic operation - can be \c atomic_op::inc, +/// \c atomic_op::dec, or \c atomic_op::load. +/// @param p The USM pointer. +/// @param byte_offset The simd_view of 32-bit or 64-bit offsets in bytes. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +/// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +template < + atomic_op Op, typename OffsetSimdViewT, typename T, + int N = OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 0 && + ext::oneapi::experimental::is_property_list_v && + detail::is_simd_view_type_v, + simd> +atomic_update(T *p, OffsetSimdViewT byte_offset, PropertyListT props = {}) { + return atomic_update(p, byte_offset.read(), props); } /// A variation of \c atomic_update API with \c offset represented as @@ -9400,16 +9429,61 @@ atomic_update(T *p, simd byte_offset, simd src0, /// simd /// atomic_update(T *ptr, simd byte_offset, -/// simd src0, props = {}); // (usm-au1-2) - -/// A variation of \c atomic_update API without mask operand. - +/// SrcSimdViewT src0, simd_mask mask, props = {}); +/// +/// Atomically updates \c N memory locations represented by a USM pointer and +/// a vector of offsets relative to the pointer, and returns a vector of old +/// values found at the memory locations before update. The update operation +/// has 1 additional argument. +/// A variation of \c atomic_update API with \c src0 represented as +/// \c simd_view object and allows the use without specifying \c T and \c N +/// template parameters. +/// /// @tparam Op The atomic operation - can be one of the following: /// \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c atomic_op::max, /// \c atomic_op::xchg, \c atomic_op::bit_and, \c atomic_op::bit_or, /// \c atomic_op::bit_xor, \c atomic_op::minsint, \c atomic_op::maxsint, /// \c atomic_op::fmax, \c atomic_op::fmin, \c atomic_op::fadd, \c /// atomic_op::fsub, \c atomic_op::store. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The additional argument. +/// @param mask Operation mask, only locations with non-zero in the +/// corresponding mask element are updated. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. Other properties are +/// ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +/// +template < + atomic_op Op, typename SrcSimdViewT, typename T, int N, typename Toffset, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 1 && + detail::is_simd_view_type_v && + ext::oneapi::experimental::is_property_list_v, + simd> +atomic_update(T *p, simd byte_offset, SrcSimdViewT src0, + simd_mask mask, PropertyListT props = {}) { + static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + "Size of src0 parameter must correspond to the size of " + "byte_offset parameter."); + return atomic_update(p, byte_offset, src0.read(), mask, props); +} + +/// simd +/// atomic_update(T *ptr, simd byte_offset, +/// simd src0, props = {}); // (usm-au1-2) + +/// A variation of \c atomic_update API without mask operand. + +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c +/// atomic_op::max, \c atomic_op::xchg, \c atomic_op::bit_and, \c +/// atomic_op::bit_or, \c atomic_op::bit_xor, \c atomic_op::minsint, \c +/// atomic_op::maxsint, \c atomic_op::fmax, \c atomic_op::fmin, \c +/// atomic_op::fadd, \c atomic_op::fsub, \c atomic_op::store. /// @tparam T The vector element type. /// @tparam N The number of memory locations to update. /// @param p The USM pointer. @@ -9434,6 +9508,47 @@ atomic_update(T *p, simd byte_offset, simd src0, return atomic_update(p, byte_offset, src0, mask, props); } +/// simd +/// atomic_update(T *ptr, simd byte_offset, +/// SrcSimdViewT src0, props = {}); + +/// A variation of \c atomic_update API with \c src0 represented as +/// \c simd_view object and no mask operand and allows the use without +/// specifying \c T and \c N template parameters. + +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c +/// atomic_op::max, \c atomic_op::xchg, \c atomic_op::bit_and, \c +/// atomic_op::bit_or, \c atomic_op::bit_xor, \c atomic_op::minsint, \c +/// atomic_op::maxsint, \c atomic_op::fmax, \c atomic_op::fmin, \c +/// atomic_op::fadd, \c atomic_op::fsub, \c atomic_op::store. +/// @tparam T The vector element type. +/// @tparam N The number of memory locations to update. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The additional argument. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. Other properties are +/// ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +/// +template < + atomic_op Op, typename SrcSimdViewT, typename T, int N, typename Toffset, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 1 && + detail::is_simd_view_type_v && + ext::oneapi::experimental::is_property_list_v, + simd> +atomic_update(T *p, simd byte_offset, SrcSimdViewT src0, + PropertyListT props = {}) { + static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + "Size of src0 parameter must correspond to the size of " + "byte_offset parameter."); + return atomic_update(p, byte_offset, src0.read(), props); +} + /// simd /// atomic_update(T *p, OffsetSimdViewT byte_offset, /// simd src0, @@ -9474,6 +9589,54 @@ atomic_update(T *p, OffsetSimdViewT offsets, simd src0, simd_mask mask, return atomic_update(p, offsets.read(), src0, mask, props); } +/// simd +/// atomic_update(T *p, OffsetSimdViewT byte_offset, +/// SrcSimdViewT src0, +/// simd_mask mask, props = {}); +/// +/// A variation of \c atomic_update API with \c byte_offset and \c src0 +/// represented as \c simd_view object and allows the use without specifying \c +/// T and \c N template parameters. +/// +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c +/// atomic_op::max, \c atomic_op::xchg, \c atomic_op::bit_and, \c +/// atomic_op::bit_or, \c atomic_op::bit_xor, \c atomic_op::minsint, \c +/// atomic_op::maxsint, \c atomic_op::fmax, \c atomic_op::fmin, \c +/// atomic_op::fadd, \c atomic_op::fsub, \c atomic_op::store. +/// @tparam T The vector element type. +/// @tparam N The number of memory locations to update. +/// @param p The USM pointer. +/// @param byte_offset The simd_view of 32-bit or 64-bit offsets in bytes. +/// @param src0 The additional argument. +/// @param mask Operation mask, only locations with non-zero in the +/// corresponding mask element are updated. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. Other properties are +/// ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +/// +template < + atomic_op Op, typename OffsetSimdViewT, typename SrcSimdViewT, typename T, + int N, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 1 && + ext::oneapi::experimental::is_property_list_v && + detail::is_simd_view_type_v && + detail::is_simd_view_type_v, + simd> +atomic_update(T *p, OffsetSimdViewT offsets, SrcSimdViewT src0, + simd_mask mask, PropertyListT props = {}) { + static_assert( + N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY() && + N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + "Size of src0 and offsets parameters must correspond to the size of " + "mask parameter."); + return atomic_update(p, offsets.read(), src0.read(), mask, props); +} + /// simd /// atomic_update(T *p, OffsetSimdViewT byte_offset, /// simd src0, @@ -9513,6 +9676,48 @@ atomic_update(T *p, OffsetSimdViewT offsets, simd src0, return atomic_update(p, offsets.read(), src0, mask, props); } +/// simd +/// atomic_update(T *p, OffsetSimdViewT byte_offset, +/// SrcSimdViewT src0, +/// props = {}); +/// +/// A variation of \c atomic_update API with \c byte_offset represented as +/// \c simd_view object and no mask operand and allows the use without +/// specifying \c T and \c N template parameters. +/// +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c +/// atomic_op::max, \c atomic_op::xchg, \c atomic_op::bit_and, \c +/// atomic_op::bit_or, \c atomic_op::bit_xor, \c atomic_op::minsint, \c +/// atomic_op::maxsint, \c atomic_op::fmax, \c atomic_op::fmin, \c +/// atomic_op::fadd, \c atomic_op::fsub, \c atomic_op::store. +/// @param p The USM pointer. +/// @param byte_offset The simd_view of 32-bit or 64-bit offsets in bytes. +/// @param src0 The additional argument. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. Other properties are +/// ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +/// +template < + atomic_op Op, typename OffsetSimdViewT, typename SrcSimdViewT, typename T, + int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 1 && + ext::oneapi::experimental::is_property_list_v && + detail::is_simd_view_type_v && + detail::is_simd_view_type_v, + simd> +atomic_update(T *p, OffsetSimdViewT offsets, SrcSimdViewT src0, + PropertyListT props = {}) { + static_assert(N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), + "Size of src0 parameter must correspond to the size of " + "offsets parameter."); + return atomic_update(p, offsets.read(), src0.read(), props); +} + /// A variation of \c atomic_update API with \c offset represented as /// scalar object. /// @@ -9642,6 +9847,127 @@ atomic_update(T *p, simd byte_offset, simd src0, } } +/// simd +/// atomic_update(T *p, simd byte_offset, +/// SrcSimdViewT src0, simd src1, +/// simd_mask mask, props = {}); +/// +/// A variation of \c atomic_update API with \c src0 represented as +/// \c simd_view object and allows the use without specifying \c T and \c N +/// template parameters. + +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param mask Operation mask, only locations with non-zero in the +/// corresponding mask element are updated. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +/// +template < + atomic_op Op, typename SrcSimdViewT, typename T, int N, typename Toffset, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + detail::is_simd_view_type_v && + ext::oneapi::experimental::is_property_list_v, + simd> +atomic_update(T *p, simd byte_offset, SrcSimdViewT src0, + simd src1, simd_mask mask, PropertyListT props = {}) { + static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + "Size of src0 parameter must correspond to the size of " + "byte_offset parameter."); + return atomic_update(p, byte_offset, src0.read(), src1, mask, + props); +} + +/// simd +/// atomic_update(T *p, simd byte_offset, +/// simd src0, SrcSimdViewT src1, +/// simd_mask mask, props = {}); +/// +/// A variation of \c atomic_update API with \c src1 represented as +/// \c simd_view object and allows the use without specifying \c T and \c N +/// template parameters. + +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param mask Operation mask, only locations with non-zero in the +/// corresponding mask element are updated. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +/// +template < + atomic_op Op, typename SrcSimdViewT, typename T, int N, typename Toffset, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + detail::is_simd_view_type_v && + ext::oneapi::experimental::is_property_list_v, + simd> +atomic_update(T *p, simd byte_offset, simd src0, + SrcSimdViewT src1, simd_mask mask, PropertyListT props = {}) { + static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + "Size of src1 parameter must correspond to the size of " + "byte_offset parameter."); + return atomic_update(p, byte_offset, src0, src1.read(), mask, + props); +} + +/// simd +/// atomic_update(T *p, simd byte_offset, +/// SrcSimdViewT src0, SrcSimdViewT src1, +/// simd_mask mask, props = {}); +/// +/// A variation of \c atomic_update API with \c src0 and \c src1 represented as +/// \c simd_view object and allows the use without specifying \c T and \c N +/// template parameters. + +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param mask Operation mask, only locations with non-zero in the +/// corresponding mask element are updated. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +/// +template < + atomic_op Op, typename SrcSimdViewT, typename T, int N, typename Toffset, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + detail::is_simd_view_type_v && + ext::oneapi::experimental::is_property_list_v, + simd> +atomic_update(T *p, simd byte_offset, SrcSimdViewT src0, + SrcSimdViewT src1, simd_mask mask, PropertyListT props = {}) { + static_assert( + N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + "Size of src1 and src0 parameters must correspond to the size of " + "byte_offset parameter."); + return atomic_update(p, byte_offset, src0.read(), src1.read(), mask, + props); +} + /// simd /// atomic_update(T *p, simd byte_offset, /// simd src0, simd src1, @@ -9674,6 +10000,119 @@ atomic_update(T *p, simd byte_offset, simd src0, return atomic_update(p, byte_offset, src0, src1, mask, props); } +/// simd +/// atomic_update(T *p, simd byte_offset, +/// SrcSimdViewT src0, simd src1, +/// props = {}); +/// +/// A variation of \c atomic_update API with \c src0 represented as +/// \c simd_view object without \c mask operand and allows the use without +/// specifying \c T and \c N template parameters. + +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +/// +template < + atomic_op Op, typename SrcSimdViewT, typename T, int N, typename Toffset, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + detail::is_simd_view_type_v && + ext::oneapi::experimental::is_property_list_v, + simd> +atomic_update(T *p, simd byte_offset, SrcSimdViewT src0, + simd src1, PropertyListT props = {}) { + static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + "Size of src0 parameter must correspond to the size of " + "byte_offset parameter."); + return atomic_update(p, byte_offset, src0.read(), src1, props); +} + +/// simd +/// atomic_update(T *p, simd byte_offset, +/// simd src0, SrcSimdViewT src1, +/// props = {}); +/// +/// A variation of \c atomic_update API with \c src1 represented as +/// \c simd_view object without \c mask operand and allows the use without +/// specifying \c T and \c N template parameters. + +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +/// +template < + atomic_op Op, typename SrcSimdViewT, typename T, int N, typename Toffset, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + detail::is_simd_view_type_v && + ext::oneapi::experimental::is_property_list_v, + simd> +atomic_update(T *p, simd byte_offset, simd src0, + SrcSimdViewT src1, PropertyListT props = {}) { + static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + "Size of src1 parameter must correspond to the size of " + "byte_offset parameter."); + return atomic_update(p, byte_offset, src0, src1.read(), props); +} + +/// simd +/// atomic_update(T *p, simd byte_offset, +/// SrcSimdViewT src0, SrcSimdViewT src1, +/// props = {}); +/// +/// A variation of \c atomic_update API with \c src0 and \c src1 represented as +/// \c simd_view object without \c mask operand and allows the use without +/// specifying \c T and \c N template parameters. + +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +/// +template < + atomic_op Op, typename SrcSimdViewT, typename T, int N, typename Toffset, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + detail::is_simd_view_type_v && + ext::oneapi::experimental::is_property_list_v, + simd> +atomic_update(T *p, simd byte_offset, SrcSimdViewT src0, + SrcSimdViewT src1, PropertyListT props = {}) { + static_assert( + N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + "Size of src1 and src0 parameters must correspond to the size of " + "byte_offset parameter."); + return atomic_update(p, byte_offset, src0.read(), src1.read(), + props); +} + /// simd /// atomic_update(T *p, OffsetSimdViewT byte_offset, /// simd src0, simd src1, @@ -9708,6 +10147,124 @@ atomic_update(T *p, OffsetSimdViewT byte_offset, simd src0, props); } +/// simd +/// atomic_update(T *p, OffsetSimdViewT byte_offset, +/// SrcSimdViewT src0, simd src1, +/// simd_mask mask, props = {}) +/// +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param mask Operation mask, only locations with non-zero in the +/// corresponding mask element are updated. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +template < + atomic_op Op, typename SrcSimdViewT, typename OffsetSimdViewT, typename T, + int N, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + ext::oneapi::experimental::is_property_list_v && + detail::is_simd_view_type_v && + detail::is_simd_view_type_v, + simd> +atomic_update(T *p, OffsetSimdViewT byte_offset, SrcSimdViewT src0, + simd src1, simd_mask mask, PropertyListT props = {}) { + static_assert( + N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() && + N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), + "Size of src0 and byte_offset parameters must correspond to the size of " + "mask parameter."); + return atomic_update(p, byte_offset.read(), src0.read(), src1, mask, + props); +} + +/// simd +/// atomic_update(T *p, OffsetSimdViewT byte_offset, +/// simd src0, SrcSimdViewT src1, +/// simd_mask mask, props = {}) +/// +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param mask Operation mask, only locations with non-zero in the +/// corresponding mask element are updated. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +template < + atomic_op Op, typename SrcSimdViewT, typename OffsetSimdViewT, typename T, + int N, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + ext::oneapi::experimental::is_property_list_v && + detail::is_simd_view_type_v && + detail::is_simd_view_type_v, + simd> +atomic_update(T *p, OffsetSimdViewT byte_offset, simd src0, + SrcSimdViewT src1, simd_mask mask, PropertyListT props = {}) { + static_assert( + N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() && + N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), + "Size of src1 and byte_offset parameters must correspond to the size of " + "mask parameter."); + return atomic_update(p, byte_offset.read(), src0, src1.read(), mask, + props); +} + +/// simd +/// atomic_update(T *p, OffsetSimdViewT byte_offset, +/// SrcSimdViewT src0, SrcSimdViewT src1, +/// simd_mask mask, props = {}) +/// +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param mask Operation mask, only locations with non-zero in the +/// corresponding mask element are updated. +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +template < + atomic_op Op, typename SrcSimdViewT, typename OffsetSimdViewT, typename T, + int N, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + ext::oneapi::experimental::is_property_list_v && + detail::is_simd_view_type_v && + detail::is_simd_view_type_v, + simd> +atomic_update(T *p, OffsetSimdViewT byte_offset, SrcSimdViewT src0, + SrcSimdViewT src1, simd_mask mask, PropertyListT props = {}) { + static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() && + N == OffsetSimdViewT::getSizeX() * + OffsetSimdViewT::getSizeY(), + "Size of src0, src1 and byte_offset parameters must correspond " + "to the size of " + "mask parameter."); + return atomic_update(p, byte_offset.read(), src0.read(), + src1.read(), mask, props); +} + /// simd /// atomic_update(T *p, OffsetSimdViewT byte_offset, /// simd src0, simd src1, @@ -9741,6 +10298,114 @@ atomic_update(T *p, OffsetSimdViewT byte_offset, simd src0, props); } +/// simd +/// atomic_update(T *p, OffsetSimdViewT byte_offset, +/// SrcSimdViewT src0, simd src1, +/// props = {}) +/// +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +template < + atomic_op Op, typename SrcSimdViewT, typename OffsetSimdViewT, typename T, + int N, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + ext::oneapi::experimental::is_property_list_v && + detail::is_simd_view_type_v && + detail::is_simd_view_type_v, + simd> +atomic_update(T *p, OffsetSimdViewT byte_offset, SrcSimdViewT src0, + simd src1, PropertyListT props = {}) { + static_assert( + N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() && + N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), + "Size of src0 and byte_offset parameters must correspond to the size of " + "src1 parameter."); + return atomic_update(p, byte_offset.read(), src0.read(), src1, + props); +} + +/// simd +/// atomic_update(T *p, OffsetSimdViewT byte_offset, +/// simd src0, SrcSimdViewT src1, +/// props = {}) +/// +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +template < + atomic_op Op, typename SrcSimdViewT, typename OffsetSimdViewT, typename T, + int N, + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + ext::oneapi::experimental::is_property_list_v && + detail::is_simd_view_type_v && + detail::is_simd_view_type_v, + simd> +atomic_update(T *p, OffsetSimdViewT byte_offset, simd src0, + SrcSimdViewT src1, PropertyListT props = {}) { + static_assert( + N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() && + N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), + "Size of src1 and byte_offset parameters must correspond to the size of " + "src0 parameter."); + return atomic_update(p, byte_offset.read(), src0, src1.read(), + props); +} + +/// simd +/// atomic_update(T *p, OffsetSimdViewT byte_offset, +/// SrcSimdViewT src0, SrcSimdViewT src1, +/// props = {}) +/// +/// @tparam Op The atomic operation - can be one of the following: +/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. +/// @param p The USM pointer. +/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. +/// @param src0 The first additional argument (new value). +/// @param src1 The second additional argument (expected value). +/// @param props The parameter 'props' specifies the optional compile-time +/// properties list. Only L1/L2 properties are used. +// Other properties are ignored. +/// @return A vector of the old values at the memory locations before the +/// update. +template < + atomic_op Op, typename SrcSimdViewT, typename OffsetSimdViewT, typename T, + int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), + typename PropertyListT = ext::oneapi::experimental::empty_properties_t> +__ESIMD_API std::enable_if_t< + __ESIMD_DNS::get_num_args() == 2 && + ext::oneapi::experimental::is_property_list_v && + detail::is_simd_view_type_v && + detail::is_simd_view_type_v, + simd> +atomic_update(T *p, OffsetSimdViewT byte_offset, SrcSimdViewT src0, + SrcSimdViewT src1, PropertyListT props = {}) { + static_assert(N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), + "Size of src0, src1 and byte_offset parameters must be equal."); + return atomic_update(p, byte_offset.read(), src0.read(), + src1.read(), props); +} + /// A variation of \c atomic_update API with \c byte_offset represented as /// scalar. /// diff --git a/sycl/test/esimd/memory_properties_atomic_update.cpp b/sycl/test/esimd/memory_properties_atomic_update.cpp index a71d64ef984d5..3189d98e2e36d 100644 --- a/sycl/test/esimd/memory_properties_atomic_update.cpp +++ b/sycl/test/esimd/memory_properties_atomic_update.cpp @@ -87,17 +87,22 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf, auto res_atomic_2 = atomic_update(ptr, offsets, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}} i8 8, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> undef, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}} i8 8, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> undef, <4 x i32> undef, i32 0, <4 x i32> undef) auto res_atomic_3 = atomic_update(ptr, offsets_view, pred, props_a); + res_atomic_3 = + atomic_update(ptr, offsets_view, pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}} i8 8, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> undef, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}} i8 8, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> undef, <4 x i32> undef, i32 0, <4 x i32> undef) auto res_atomic_4 = atomic_update(ptr, offsets_view, props_a); + res_atomic_4 = atomic_update(ptr, offsets_view, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}} i8 8, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> undef, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}} i8 8, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> undef, <4 x i32> undef, i32 0, <4 x i32> undef) auto res_atomic_5 = atomic_update( ptr, offsets_view.select(), props_a); + res_atomic_5 = atomic_update( + ptr, offsets_view.select(), props_a); // atomic_upate without cache hints: // CHECK: call <4 x i32> @llvm.genx.svm.atomic.inc.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, <4 x i64> {{[^)]+}}, <4 x i32> undef) @@ -212,41 +217,59 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf, auto res_atomic_1 = atomic_update(ptr, offsets, add, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) auto res_atomic_2 = atomic_update( ptr, offsets, add_view, pred, props_a); + res_atomic_2 = + atomic_update(ptr, offsets, add_view, pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) auto res_atomic_3 = atomic_update(ptr, offsets, add_view, props_a); + res_atomic_3 = + atomic_update(ptr, offsets, add_view, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) res_atomic_3 = atomic_update( ptr, offsets, add_view.select(), props_a); + res_atomic_3 = atomic_update( + ptr, offsets, add_view.select(), props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) auto res_atomic_4 = atomic_update( ptr, offsets_view, add, pred, props_a); + res_atomic_4 = + atomic_update(ptr, offsets_view, add, pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) auto res_atomic_5 = atomic_update(ptr, offsets_view, add, props_a); + res_atomic_5 = + atomic_update(ptr, offsets_view, add, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) res_atomic_5 = atomic_update( ptr, offsets_view.select(), add, props_a); + res_atomic_5 = atomic_update( + ptr, offsets_view.select(), add, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) auto res_atomic_6 = atomic_update( ptr, offsets_view, add_view, pred, props_a); + res_atomic_6 = atomic_update(ptr, offsets_view, add_view, + pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) auto res_atomic_7 = atomic_update( ptr, offsets_view, add_view, props_a); + res_atomic_7 = + atomic_update(ptr, offsets_view, add_view, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef) res_atomic_7 = atomic_update( ptr, offsets_view.select(), add_view.select(), props_a); + res_atomic_7 = atomic_update( + ptr, offsets_view.select(), add_view.select(), props_a); // atomic_update without cache hints: // CHECK: call <4 x i32> @llvm.genx.svm.atomic.add.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, <4 x i64> {{[^)]+}}, <4 x i32> undef) @@ -345,67 +368,97 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf, auto res_atomic_2 = atomic_update( ptr, offsets, swap, compare, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_3 = atomic_update( ptr, offsets, swap, compare_view, pred, props_a); + res_atomic_3 = atomic_update( + ptr, offsets, swap, compare_view, pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) res_atomic_3 = atomic_update( ptr, offsets, swap, compare_view.select(), pred, props_a); + res_atomic_3 = atomic_update( + ptr, offsets, swap, compare_view.select(), pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_4 = atomic_update( ptr, offsets, swap, compare_view, props_a); + res_atomic_4 = atomic_update(ptr, offsets, swap, + compare_view, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_5 = atomic_update( ptr, offsets, swap_view, compare, pred, props_a); + res_atomic_5 = atomic_update(ptr, offsets, swap_view, + compare, pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_6 = atomic_update( ptr, offsets, swap_view, compare, props_a); + res_atomic_6 = atomic_update(ptr, offsets, swap_view, + compare, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_7 = atomic_update( ptr, offsets, swap_view, compare_view, pred, props_a); + res_atomic_7 = atomic_update( + ptr, offsets, swap_view, compare_view, pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_8 = atomic_update( ptr, offsets, swap_view, compare_view, props_a); + res_atomic_8 = atomic_update(ptr, offsets, swap_view, + compare_view, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_9 = atomic_update( ptr, offsets_view, swap, compare, pred, props_a); + res_atomic_9 = atomic_update(ptr, offsets_view, swap, + compare, pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_10 = atomic_update( ptr, offsets_view, swap, compare, props_a); + res_atomic_10 = atomic_update(ptr, offsets_view, swap, + compare, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_11 = atomic_update( ptr, offsets_view, swap, compare_view, pred, props_a); + res_atomic_11 = atomic_update( + ptr, offsets_view, swap, compare_view, pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_12 = atomic_update( ptr, offsets_view, swap, compare_view, props_a); + res_atomic_12 = atomic_update(ptr, offsets_view, swap, + compare_view, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_13 = atomic_update( ptr, offsets_view, swap_view, compare, pred, props_a); + res_atomic_13 = atomic_update( + ptr, offsets_view, swap_view, compare, pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_14 = atomic_update( ptr, offsets_view, swap_view, compare, props_a); + res_atomic_14 = atomic_update( + ptr, offsets_view, swap_view, compare, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_15 = atomic_update( ptr, offsets_view, swap_view, compare_view, pred, props_a); + res_atomic_15 = atomic_update( + ptr, offsets_view, swap_view, compare_view, pred, props_a); - // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-2: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) auto res_atomic_16 = atomic_update( ptr, offsets_view, swap_view, compare_view, props_a); + res_atomic_16 = atomic_update( + ptr, offsets_view, swap_view, compare_view, props_a); - // CHECK-COUNT-13: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) + // CHECK-COUNT-26: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef) res_atomic_4 = atomic_update( ptr, offsets, swap, compare_view.select(), props_a); @@ -453,6 +506,53 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf, ptr, offsets_view.select(), swap_view.select(), compare_view.select(), props_a); + res_atomic_4 = atomic_update( + ptr, offsets, swap, compare_view.select(), props_a); + + res_atomic_5 = atomic_update( + ptr, offsets, swap_view.select(), compare, pred, props_a); + + res_atomic_6 = atomic_update( + ptr, offsets, swap_view.select(), compare, props_a); + + res_atomic_7 = atomic_update( + ptr, offsets, swap_view.select(), compare_view.select(), + pred, props_a); + + res_atomic_8 = atomic_update( + ptr, offsets, swap_view.select(), compare_view.select(), + props_a); + + res_atomic_9 = atomic_update( + ptr, offsets_view.select(), swap, compare, pred, props_a); + + res_atomic_10 = atomic_update( + ptr, offsets_view.select(), swap, compare, props_a); + + res_atomic_11 = atomic_update( + ptr, offsets_view.select(), swap, compare_view.select(), + pred, props_a); + + res_atomic_12 = atomic_update( + ptr, offsets_view.select(), swap, compare_view.select(), + props_a); + + res_atomic_13 = atomic_update( + ptr, offsets_view.select(), swap_view.select(), compare, + pred, props_a); + + res_atomic_14 = atomic_update( + ptr, offsets_view.select(), swap_view.select(), compare, + props_a); + + res_atomic_15 = atomic_update( + ptr, offsets_view.select(), swap_view.select(), + compare_view.select(), pred, props_a); + + res_atomic_16 = atomic_update( + ptr, offsets_view.select(), swap_view.select(), + compare_view.select(), props_a); + { constexpr int VL = 8; simd offsets = simd(1) * sizeof(int);