Merge branch 'sycl' into review/yang/dsan_nullpointer

intel · Aug 23, 2024 · 1308e45 · 1308e45
2 parents 3ad24d7 + c603a7f
commit 1308e45
Show file tree

Hide file tree

Showing 19 changed files with 138 additions and 186 deletions.
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -299,10 +299,6 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
   if (Name == "memset" || Name == "memcpy")
     return false;
 
-  // Prevent from asan interception in kernel
-  if (Name == "__asan_set_shadow_local_memory")
-    return false;
-
   // Determine if code size heuristics need to be applied.
   ApplyCodeSizeHeuristics =
       L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;

diff --git a/sycl-jit/test/materializer/debug_output.ll b/sycl-jit/test/materializer/debug_output.ll
diff --git a/sycl/include/sycl/detail/vector_convert.hpp b/sycl/include/sycl/detail/vector_convert.hpp
@@ -58,6 +58,7 @@
 #include <sycl/exception.hpp>                  // for errc
 
 #include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
+#include <sycl/vector.hpp>
 
 #ifndef __SYCL_DEVICE_ONLY__
 #include <cfenv> // for fesetround, fegetround
@@ -153,8 +154,6 @@ __imf_ushort_as_bfloat16(unsigned short x);
 
 namespace sycl {
 
-enum class rounding_mode { automatic = 0, rte = 1, rtz = 2, rtp = 3, rtn = 4 };
-
 inline namespace _V1 {
 #ifndef __SYCL_DEVICE_ONLY__
 // TODO: Refactor includes so we can just "#include".
@@ -870,6 +869,98 @@ auto ConvertImpl(std::byte val) {
 }
 #endif
 
+// We interpret bool as int8_t, std::byte as uint8_t for conversion to other
+// types.
+template <typename T>
+using ConvertBoolAndByteT =
+    typename detail::map_type<T,
+#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
+                              std::byte, /*->*/ std::uint8_t, //
+#endif
+                              bool, /*->*/ std::uint8_t, //
+                              T, /*->*/ T                //
+                              >::type;
 } // namespace detail
+
+template <typename DataT, int NumElements>
+template <typename convertT, rounding_mode roundingMode>
+vec<convertT, NumElements> vec<DataT, NumElements>::convert() const {
+  using T = detail::ConvertBoolAndByteT<DataT>;
+  using R = detail::ConvertBoolAndByteT<convertT>;
+  using bfloat16 = sycl::ext::oneapi::bfloat16;
+  static_assert(std::is_integral_v<R> || detail::is_floating_point<R>::value ||
+                    std::is_same_v<R, bfloat16>,
+                "Unsupported convertT");
+
+  using OpenCLT = detail::ConvertToOpenCLType_t<T>;
+  using OpenCLR = detail::ConvertToOpenCLType_t<R>;
+  vec<convertT, NumElements> Result;
+
+  // convertImpl can't be called with the same From and To types and therefore
+  // we need some special processing in a few cases.
+  if constexpr (std::is_same_v<DataT, convertT>) {
+    return *this;
+  } else if constexpr (std::is_same_v<OpenCLT, OpenCLR> ||
+                       std::is_same_v<T, R>) {
+    for (size_t I = 0; I < NumElements; ++I)
+      Result[I] = static_cast<convertT>(getValue(I));
+    return Result;
+  } else {
+
+#ifdef __SYCL_DEVICE_ONLY__
+    using OpenCLVecT = OpenCLT __attribute__((ext_vector_type(NumElements)));
+    using OpenCLVecR = OpenCLR __attribute__((ext_vector_type(NumElements)));
+
+    auto NativeVector = sycl::bit_cast<vector_t>(*this);
+    using ConvertTVecType = typename vec<convertT, NumElements>::vector_t;
+
+    // Whole vector conversion can only be done, if:
+    constexpr bool canUseNativeVectorConvert =
+#ifdef __NVPTX__
+        //  TODO: Likely unnecessary as
+        //  https://github.com/intel/llvm/issues/11840 has been closed
+        //  already.
+        false &&
+#endif
+        NumElements > 1 &&
+        // - vec storage has an equivalent OpenCL native vector it is
+        //   implicitly convertible to. There are some corner cases where it
+        //   is not the case with char, long and long long types.
+        std::is_convertible_v<vector_t, OpenCLVecT> &&
+        std::is_convertible_v<ConvertTVecType, OpenCLVecR> &&
+        // - it is not a signed to unsigned (or vice versa) conversion
+        //   see comments within 'convertImpl' for more details;
+        !detail::is_sint_to_from_uint<T, R>::value &&
+        // - destination type is not bool. bool is stored as integer under the
+        //   hood and therefore conversion to bool looks like conversion
+        //   between two integer types. Since bit pattern for true and false
+        //   is not defined, there is no guarantee that integer conversion
+        //   yields right results here;
+        !std::is_same_v<convertT, bool>;
+
+    if constexpr (canUseNativeVectorConvert) {
+      auto val = detail::convertImpl<T, R, roundingMode, NumElements,
+                                     OpenCLVecT, OpenCLVecR>(NativeVector);
+      Result.m_Data = sycl::bit_cast<decltype(Result.m_Data)>(val);
+    } else
+#endif // __SYCL_DEVICE_ONLY__
+    {
+      // Otherwise, we fallback to per-element conversion:
+      for (size_t I = 0; I < NumElements; ++I) {
+        auto val = detail::convertImpl<T, R, roundingMode, 1, OpenCLT, OpenCLR>(
+            getValue(I));
+#ifdef __SYCL_DEVICE_ONLY__
+        // On device, we interpret BF16 as uint16.
+        if constexpr (std::is_same_v<convertT, bfloat16>)
+          Result[I] = sycl::bit_cast<convertT>(val);
+        else
+#endif
+          Result[I] = static_cast<convertT>(val);
+      }
+    }
+  }
+  return Result;
+}
+
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/include/sycl/types.hpp b/sycl/include/sycl/types.hpp
@@ -21,6 +21,9 @@
 #include <sycl/half_type.hpp>                  // for StorageT, half, Vec16...
 #include <sycl/marray.hpp>                     // for __SYCL_BINOP, __SYCL_...
 #include <sycl/multi_ptr.hpp>                  // for multi_ptr
-#include <sycl/vector.hpp>                     // for sycl::vec and swizzles
+
+#include <sycl/vector.hpp>
+
+#include <sycl/detail/vector_convert.hpp>
 
 #include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp
@@ -36,7 +36,6 @@
 #include <sycl/detail/type_list.hpp>           // for is_contained
 #include <sycl/detail/type_traits.hpp>         // for is_floating_point
 #include <sycl/detail/vector_arith.hpp>
-#include <sycl/detail/vector_convert.hpp>      // for convertImpl
 #include <sycl/half_type.hpp>                  // for StorageT, half, Vec16...
 
 #include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
@@ -53,6 +52,10 @@
 #include <utility>     // for index_sequence, make_...
 
 namespace sycl {
+
+// TODO: Fix in the next ABI breaking windows.
+enum class rounding_mode { automatic = 0, rte = 1, rtz = 2, rtp = 3, rtn = 4 };
+
 inline namespace _V1 {
 
 struct elem {
@@ -406,18 +409,6 @@ class __SYCL_EBO vec
   static constexpr size_t byte_size() noexcept { return sizeof(m_Data); }
 
 private:
-  // We interpret bool as int8_t, std::byte as uint8_t for conversion to other
-  // types.
-  template <typename T>
-  using ConvertBoolAndByteT =
-      typename detail::map_type<T,
-#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
-                                std::byte, /*->*/ std::uint8_t, //
-#endif
-                                bool, /*->*/ std::uint8_t, //
-                                T, /*->*/ T                //
-                                >::type;
-
   // getValue should be able to operate on different underlying
   // types: enum cl_float#N , builtin vector float#N, builtin type float.
   constexpr auto getValue(int Index) const {
@@ -439,88 +430,10 @@ class __SYCL_EBO vec
   }
 
 public:
+  // Out-of-class definition is in `sycl/detail/vector_convert.hpp`
   template <typename convertT,
             rounding_mode roundingMode = rounding_mode::automatic>
-  vec<convertT, NumElements> convert() const {
-
-    using T = ConvertBoolAndByteT<DataT>;
-    using R = ConvertBoolAndByteT<convertT>;
-    using bfloat16 = sycl::ext::oneapi::bfloat16;
-    static_assert(std::is_integral_v<R> ||
-                      detail::is_floating_point<R>::value ||
-                      std::is_same_v<R, bfloat16>,
-                  "Unsupported convertT");
-
-    using OpenCLT = detail::ConvertToOpenCLType_t<T>;
-    using OpenCLR = detail::ConvertToOpenCLType_t<R>;
-    vec<convertT, NumElements> Result;
-
-    // convertImpl can't be called with the same From and To types and therefore
-    // we need some special processing in a few cases.
-    if constexpr (std::is_same_v<DataT, convertT>) {
-      return *this;
-    } else if constexpr (std::is_same_v<OpenCLT, OpenCLR> ||
-                         std::is_same_v<T, R>) {
-      for (size_t I = 0; I < NumElements; ++I)
-        Result[I] = static_cast<convertT>(getValue(I));
-      return Result;
-    } else {
-
-#ifdef __SYCL_DEVICE_ONLY__
-      using OpenCLVecT = OpenCLT __attribute__((ext_vector_type(NumElements)));
-      using OpenCLVecR = OpenCLR __attribute__((ext_vector_type(NumElements)));
-
-      auto NativeVector = sycl::bit_cast<vector_t>(*this);
-      using ConvertTVecType = typename vec<convertT, NumElements>::vector_t;
-
-      // Whole vector conversion can only be done, if:
-      constexpr bool canUseNativeVectorConvert =
-#ifdef __NVPTX__
-          //  TODO: Likely unnecessary as
-          //  https://github.com/intel/llvm/issues/11840 has been closed
-          //  already.
-          false &&
-#endif
-          NumElements > 1 &&
-          // - vec storage has an equivalent OpenCL native vector it is
-          //   implicitly convertible to. There are some corner cases where it
-          //   is not the case with char, long and long long types.
-          std::is_convertible_v<vector_t, OpenCLVecT> &&
-          std::is_convertible_v<ConvertTVecType, OpenCLVecR> &&
-          // - it is not a signed to unsigned (or vice versa) conversion
-          //   see comments within 'convertImpl' for more details;
-          !detail::is_sint_to_from_uint<T, R>::value &&
-          // - destination type is not bool. bool is stored as integer under the
-          //   hood and therefore conversion to bool looks like conversion
-          //   between two integer types. Since bit pattern for true and false
-          //   is not defined, there is no guarantee that integer conversion
-          //   yields right results here;
-          !std::is_same_v<convertT, bool>;
-
-      if constexpr (canUseNativeVectorConvert) {
-        auto val = detail::convertImpl<T, R, roundingMode, NumElements, OpenCLVecT,
-                                OpenCLVecR>(NativeVector);
-        Result.m_Data = sycl::bit_cast<decltype(Result.m_Data)>(val);
-      } else
-#endif // __SYCL_DEVICE_ONLY__
-      {
-        // Otherwise, we fallback to per-element conversion:
-        for (size_t I = 0; I < NumElements; ++I) {
-          auto val =
-              detail::convertImpl<T, R, roundingMode, 1, OpenCLT, OpenCLR>(
-                  getValue(I));
-#ifdef __SYCL_DEVICE_ONLY__
-          // On device, we interpret BF16 as uint16.
-          if constexpr (std::is_same_v<convertT, bfloat16>)
-            Result[I] = sycl::bit_cast<convertT>(val);
-          else
-#endif
-            Result[I] = static_cast<convertT>(val);
-        }
-      }
-    }
-    return Result;
-  }
+  vec<convertT, NumElements> convert() const;
 
   template <typename asT> asT as() const { return sycl::bit_cast<asT>(*this); }
 

diff --git a/sycl/test-e2e/AmdNvidiaJIT/kernel_and_bundle.cpp b/sycl/test-e2e/AmdNvidiaJIT/kernel_and_bundle.cpp
@@ -1,11 +1,18 @@
+// UNSUPPORTED: windows
 // REQUIRES: cuda || hip
 
-// https://github.com/intel/llvm/issues/14989
-// UNSUPPORTED: hip_amd
+// This test relies on debug output from a pass, make sure that the compiler
+// can generate it.
+// REQUIRES: has_ndebug
 
 // RUN: %{build} -fsycl-embed-ir -o %t.out
 // RUN: env SYCL_JIT_AMDGCN_PTX_KERNELS=1 env SYCL_JIT_COMPILER_DEBUG="sycl-spec-const-materializer" %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt
 
+// Test the JIT compilation in an e2e fashion, the only way to make sure that
+// the JIT pipeline has been executed and that the original binary has been
+// replaced with the JIT-ed one is to inspect the output of one of its passes,
+// that otherwise does not get run.
+
 #include <sycl/detail/core.hpp>
 #include <sycl/specialization_id.hpp>
 

diff --git a/sycl/test-e2e/Basic/buffer/buffer_release.cpp b/sycl/test-e2e/Basic/buffer/buffer_release.cpp
@@ -1,6 +1,4 @@
 // REQUIRES: cpu
-// UNSUPPORTED: windows
-//   DeferredMemory Destruction not presently supported on Windows.
 
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out

diff --git a/sycl/test-e2e/Basic/buffer/subbuffer.cpp b/sycl/test-e2e/Basic/buffer/subbuffer.cpp
@@ -1,6 +1,7 @@
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
 
+// See https://github.com/intel/llvm/issues/15151
 // UNSUPPORTED: (opencl && gpu)
 
 //

diff --git a/sycl/test-e2e/Basic/build_log.cpp b/sycl/test-e2e/Basic/build_log.cpp
@@ -1,5 +1,5 @@
 // REQUIRES: opencl || level_zero, gpu, ocloc
-// UNSUPPORTED: gpu-intel-dg1 || windows
+// UNSUPPORTED: gpu-intel-dg1
 //
 // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device dg1" %s -o %t.out
 // RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s

diff --git a/sycl/test-e2e/Basic/enqueue_barrier.cpp b/sycl/test-e2e/Basic/enqueue_barrier.cpp
@@ -1,10 +1,6 @@
 // RUN: %{build} -o %t.out
 // RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s
 
-// The test is failing sporadically on Windows OpenCL RTs
-// Disabling on windows until fixed
-// UNSUPPORTED: windows
-
 #include <sycl/ext/intel/fpga_device_selector.hpp>
 #include <sycl/detail/core.hpp>
 

diff --git a/sycl/test-e2e/Basic/event_profiling_info.cpp b/sycl/test-e2e/Basic/event_profiling_info.cpp
@@ -10,9 +10,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// Fails there.
-// UNSUPPORTED: opencl && arch-intel_gpu_pvc
-
 #include <cassert>
 #include <iostream>
 #include <sycl/detail/core.hpp>

diff --git a/sycl/test-e2e/Basic/image/image_accessor_readsampler.cpp b/sycl/test-e2e/Basic/image/image_accessor_readsampler.cpp
@@ -1,6 +1,5 @@
 // REQUIRES: aspect-ext_intel_legacy_image
-// UNSUPPORTED: cuda || hip || (windows && level_zero)
-// unsupported on windows (level-zero) due to fail of Jenkins/pre-ci-windows
+// UNSUPPORTED: cuda || hip
 // CUDA cannot support SYCL 1.2.1 images.
 //
 // RUN: %{build} -o %t.out