Skip to content

Commit

Permalink
Merge branch 'sycl' into review/yang/dsan_nullpointer
Browse files Browse the repository at this point in the history
  • Loading branch information
AllanZyne committed Aug 23, 2024
2 parents 3ad24d7 + c603a7f commit 1308e45
Show file tree
Hide file tree
Showing 19 changed files with 138 additions and 186 deletions.
4 changes: 0 additions & 4 deletions llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,6 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
if (Name == "memset" || Name == "memcpy")
return false;

// Prevent from asan interception in kernel
if (Name == "__asan_set_shadow_local_memory")
return false;

// Determine if code size heuristics need to be applied.
ApplyCodeSizeHeuristics =
L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
Expand Down
60 changes: 0 additions & 60 deletions sycl-jit/test/materializer/debug_output.ll

This file was deleted.

95 changes: 93 additions & 2 deletions sycl/include/sycl/detail/vector_convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include <sycl/exception.hpp> // for errc

#include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
#include <sycl/vector.hpp>

#ifndef __SYCL_DEVICE_ONLY__
#include <cfenv> // for fesetround, fegetround
Expand Down Expand Up @@ -153,8 +154,6 @@ __imf_ushort_as_bfloat16(unsigned short x);

namespace sycl {

enum class rounding_mode { automatic = 0, rte = 1, rtz = 2, rtp = 3, rtn = 4 };

inline namespace _V1 {
#ifndef __SYCL_DEVICE_ONLY__
// TODO: Refactor includes so we can just "#include".
Expand Down Expand Up @@ -870,6 +869,98 @@ auto ConvertImpl(std::byte val) {
}
#endif

// We interpret bool as int8_t, std::byte as uint8_t for conversion to other
// types.
template <typename T>
using ConvertBoolAndByteT =
typename detail::map_type<T,
#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
std::byte, /*->*/ std::uint8_t, //
#endif
bool, /*->*/ std::uint8_t, //
T, /*->*/ T //
>::type;
} // namespace detail

template <typename DataT, int NumElements>
template <typename convertT, rounding_mode roundingMode>
vec<convertT, NumElements> vec<DataT, NumElements>::convert() const {
using T = detail::ConvertBoolAndByteT<DataT>;
using R = detail::ConvertBoolAndByteT<convertT>;
using bfloat16 = sycl::ext::oneapi::bfloat16;
static_assert(std::is_integral_v<R> || detail::is_floating_point<R>::value ||
std::is_same_v<R, bfloat16>,
"Unsupported convertT");

using OpenCLT = detail::ConvertToOpenCLType_t<T>;
using OpenCLR = detail::ConvertToOpenCLType_t<R>;
vec<convertT, NumElements> Result;

// convertImpl can't be called with the same From and To types and therefore
// we need some special processing in a few cases.
if constexpr (std::is_same_v<DataT, convertT>) {
return *this;
} else if constexpr (std::is_same_v<OpenCLT, OpenCLR> ||
std::is_same_v<T, R>) {
for (size_t I = 0; I < NumElements; ++I)
Result[I] = static_cast<convertT>(getValue(I));
return Result;
} else {

#ifdef __SYCL_DEVICE_ONLY__
using OpenCLVecT = OpenCLT __attribute__((ext_vector_type(NumElements)));
using OpenCLVecR = OpenCLR __attribute__((ext_vector_type(NumElements)));

auto NativeVector = sycl::bit_cast<vector_t>(*this);
using ConvertTVecType = typename vec<convertT, NumElements>::vector_t;

// Whole vector conversion can only be done, if:
constexpr bool canUseNativeVectorConvert =
#ifdef __NVPTX__
// TODO: Likely unnecessary as
// https://github.com/intel/llvm/issues/11840 has been closed
// already.
false &&
#endif
NumElements > 1 &&
// - vec storage has an equivalent OpenCL native vector it is
// implicitly convertible to. There are some corner cases where it
// is not the case with char, long and long long types.
std::is_convertible_v<vector_t, OpenCLVecT> &&
std::is_convertible_v<ConvertTVecType, OpenCLVecR> &&
// - it is not a signed to unsigned (or vice versa) conversion
// see comments within 'convertImpl' for more details;
!detail::is_sint_to_from_uint<T, R>::value &&
// - destination type is not bool. bool is stored as integer under the
// hood and therefore conversion to bool looks like conversion
// between two integer types. Since bit pattern for true and false
// is not defined, there is no guarantee that integer conversion
// yields right results here;
!std::is_same_v<convertT, bool>;

if constexpr (canUseNativeVectorConvert) {
auto val = detail::convertImpl<T, R, roundingMode, NumElements,
OpenCLVecT, OpenCLVecR>(NativeVector);
Result.m_Data = sycl::bit_cast<decltype(Result.m_Data)>(val);
} else
#endif // __SYCL_DEVICE_ONLY__
{
// Otherwise, we fallback to per-element conversion:
for (size_t I = 0; I < NumElements; ++I) {
auto val = detail::convertImpl<T, R, roundingMode, 1, OpenCLT, OpenCLR>(
getValue(I));
#ifdef __SYCL_DEVICE_ONLY__
// On device, we interpret BF16 as uint16.
if constexpr (std::is_same_v<convertT, bfloat16>)
Result[I] = sycl::bit_cast<convertT>(val);
else
#endif
Result[I] = static_cast<convertT>(val);
}
}
}
return Result;
}

} // namespace _V1
} // namespace sycl
5 changes: 4 additions & 1 deletion sycl/include/sycl/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
#include <sycl/half_type.hpp> // for StorageT, half, Vec16...
#include <sycl/marray.hpp> // for __SYCL_BINOP, __SYCL_...
#include <sycl/multi_ptr.hpp> // for multi_ptr
#include <sycl/vector.hpp> // for sycl::vec and swizzles

#include <sycl/vector.hpp>

#include <sycl/detail/vector_convert.hpp>

#include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
99 changes: 6 additions & 93 deletions sycl/include/sycl/vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
#include <sycl/detail/type_list.hpp> // for is_contained
#include <sycl/detail/type_traits.hpp> // for is_floating_point
#include <sycl/detail/vector_arith.hpp>
#include <sycl/detail/vector_convert.hpp> // for convertImpl
#include <sycl/half_type.hpp> // for StorageT, half, Vec16...

#include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
Expand All @@ -53,6 +52,10 @@
#include <utility> // for index_sequence, make_...

namespace sycl {

// TODO: Fix in the next ABI breaking windows.
enum class rounding_mode { automatic = 0, rte = 1, rtz = 2, rtp = 3, rtn = 4 };

inline namespace _V1 {

struct elem {
Expand Down Expand Up @@ -406,18 +409,6 @@ class __SYCL_EBO vec
static constexpr size_t byte_size() noexcept { return sizeof(m_Data); }

private:
// We interpret bool as int8_t, std::byte as uint8_t for conversion to other
// types.
template <typename T>
using ConvertBoolAndByteT =
typename detail::map_type<T,
#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
std::byte, /*->*/ std::uint8_t, //
#endif
bool, /*->*/ std::uint8_t, //
T, /*->*/ T //
>::type;

// getValue should be able to operate on different underlying
// types: enum cl_float#N , builtin vector float#N, builtin type float.
constexpr auto getValue(int Index) const {
Expand All @@ -439,88 +430,10 @@ class __SYCL_EBO vec
}

public:
// Out-of-class definition is in `sycl/detail/vector_convert.hpp`
template <typename convertT,
rounding_mode roundingMode = rounding_mode::automatic>
vec<convertT, NumElements> convert() const {

using T = ConvertBoolAndByteT<DataT>;
using R = ConvertBoolAndByteT<convertT>;
using bfloat16 = sycl::ext::oneapi::bfloat16;
static_assert(std::is_integral_v<R> ||
detail::is_floating_point<R>::value ||
std::is_same_v<R, bfloat16>,
"Unsupported convertT");

using OpenCLT = detail::ConvertToOpenCLType_t<T>;
using OpenCLR = detail::ConvertToOpenCLType_t<R>;
vec<convertT, NumElements> Result;

// convertImpl can't be called with the same From and To types and therefore
// we need some special processing in a few cases.
if constexpr (std::is_same_v<DataT, convertT>) {
return *this;
} else if constexpr (std::is_same_v<OpenCLT, OpenCLR> ||
std::is_same_v<T, R>) {
for (size_t I = 0; I < NumElements; ++I)
Result[I] = static_cast<convertT>(getValue(I));
return Result;
} else {

#ifdef __SYCL_DEVICE_ONLY__
using OpenCLVecT = OpenCLT __attribute__((ext_vector_type(NumElements)));
using OpenCLVecR = OpenCLR __attribute__((ext_vector_type(NumElements)));

auto NativeVector = sycl::bit_cast<vector_t>(*this);
using ConvertTVecType = typename vec<convertT, NumElements>::vector_t;

// Whole vector conversion can only be done, if:
constexpr bool canUseNativeVectorConvert =
#ifdef __NVPTX__
// TODO: Likely unnecessary as
// https://github.com/intel/llvm/issues/11840 has been closed
// already.
false &&
#endif
NumElements > 1 &&
// - vec storage has an equivalent OpenCL native vector it is
// implicitly convertible to. There are some corner cases where it
// is not the case with char, long and long long types.
std::is_convertible_v<vector_t, OpenCLVecT> &&
std::is_convertible_v<ConvertTVecType, OpenCLVecR> &&
// - it is not a signed to unsigned (or vice versa) conversion
// see comments within 'convertImpl' for more details;
!detail::is_sint_to_from_uint<T, R>::value &&
// - destination type is not bool. bool is stored as integer under the
// hood and therefore conversion to bool looks like conversion
// between two integer types. Since bit pattern for true and false
// is not defined, there is no guarantee that integer conversion
// yields right results here;
!std::is_same_v<convertT, bool>;

if constexpr (canUseNativeVectorConvert) {
auto val = detail::convertImpl<T, R, roundingMode, NumElements, OpenCLVecT,
OpenCLVecR>(NativeVector);
Result.m_Data = sycl::bit_cast<decltype(Result.m_Data)>(val);
} else
#endif // __SYCL_DEVICE_ONLY__
{
// Otherwise, we fallback to per-element conversion:
for (size_t I = 0; I < NumElements; ++I) {
auto val =
detail::convertImpl<T, R, roundingMode, 1, OpenCLT, OpenCLR>(
getValue(I));
#ifdef __SYCL_DEVICE_ONLY__
// On device, we interpret BF16 as uint16.
if constexpr (std::is_same_v<convertT, bfloat16>)
Result[I] = sycl::bit_cast<convertT>(val);
else
#endif
Result[I] = static_cast<convertT>(val);
}
}
}
return Result;
}
vec<convertT, NumElements> convert() const;

template <typename asT> asT as() const { return sycl::bit_cast<asT>(*this); }

Expand Down
11 changes: 9 additions & 2 deletions sycl/test-e2e/AmdNvidiaJIT/kernel_and_bundle.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
// UNSUPPORTED: windows
// REQUIRES: cuda || hip

// https://github.com/intel/llvm/issues/14989
// UNSUPPORTED: hip_amd
// This test relies on debug output from a pass, make sure that the compiler
// can generate it.
// REQUIRES: has_ndebug

// RUN: %{build} -fsycl-embed-ir -o %t.out
// RUN: env SYCL_JIT_AMDGCN_PTX_KERNELS=1 env SYCL_JIT_COMPILER_DEBUG="sycl-spec-const-materializer" %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt

// Test the JIT compilation in an e2e fashion, the only way to make sure that
// the JIT pipeline has been executed and that the original binary has been
// replaced with the JIT-ed one is to inspect the output of one of its passes,
// that otherwise does not get run.

#include <sycl/detail/core.hpp>
#include <sycl/specialization_id.hpp>

Expand Down
2 changes: 0 additions & 2 deletions sycl/test-e2e/Basic/buffer/buffer_release.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
// REQUIRES: cpu
// UNSUPPORTED: windows
// DeferredMemory Destruction not presently supported on Windows.

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
Expand Down
1 change: 1 addition & 0 deletions sycl/test-e2e/Basic/buffer/subbuffer.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// See https://github.com/intel/llvm/issues/15151
// UNSUPPORTED: (opencl && gpu)

//
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Basic/build_log.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// REQUIRES: opencl || level_zero, gpu, ocloc
// UNSUPPORTED: gpu-intel-dg1 || windows
// UNSUPPORTED: gpu-intel-dg1
//
// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device dg1" %s -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s
Expand Down
4 changes: 0 additions & 4 deletions sycl/test-e2e/Basic/enqueue_barrier.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
// RUN: %{build} -o %t.out
// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s

// The test is failing sporadically on Windows OpenCL RTs
// Disabling on windows until fixed
// UNSUPPORTED: windows

#include <sycl/ext/intel/fpga_device_selector.hpp>
#include <sycl/detail/core.hpp>

Expand Down
3 changes: 0 additions & 3 deletions sycl/test-e2e/Basic/event_profiling_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@
//
//===----------------------------------------------------------------------===//

// Fails there.
// UNSUPPORTED: opencl && arch-intel_gpu_pvc

#include <cassert>
#include <iostream>
#include <sycl/detail/core.hpp>
Expand Down
3 changes: 1 addition & 2 deletions sycl/test-e2e/Basic/image/image_accessor_readsampler.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// REQUIRES: aspect-ext_intel_legacy_image
// UNSUPPORTED: cuda || hip || (windows && level_zero)
// unsupported on windows (level-zero) due to fail of Jenkins/pre-ci-windows
// UNSUPPORTED: cuda || hip
// CUDA cannot support SYCL 1.2.1 images.
//
// RUN: %{build} -o %t.out
Expand Down
Loading

0 comments on commit 1308e45

Please sign in to comment.