From 8a231d192ccd52c1d63077f91ef19603af1455dd Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 5 Dec 2024 07:09:06 -0800 Subject: [PATCH 01/22] Add XFAIL trackers for several tests --- sycl/test-e2e/InvokeSimd/Spec/tuple.cpp | 1 + sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp | 1 + sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp b/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp index 6c9d869b3684a..cf4f1e4d5ec75 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp @@ -1,5 +1,6 @@ // TODO: enable when Jira ticket resolved // XFAIL: * +// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 // // Check that full compilation works: // RUN: %{build} -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.out diff --git a/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp b/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp index bdc5fe9a7e19b..8c2910a6eb5f3 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp @@ -1,5 +1,6 @@ // TODO: enable when Jira ticket resolved // XFAIL: * +// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 // // Check that full compilation works: // RUN: %{build} -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.out diff --git a/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp b/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp index 38f654ea769b3..6a6b1fc16f383 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp @@ -1,5 +1,6 @@ // TODO: enable when Jira ticket resolved // XFAIL: * +// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 // // Check that full compilation works: // RUN: %{build} -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.out From abd8fd3f12a03d4de94550e33fb221d83e139840 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 5 Dec 2024 07:10:52 -0800 Subject: [PATCH 02/22] Add XFAIL trackers for several tests --- sycl/test-e2e/Basic/buffer/reinterpret.cpp | 1 + sycl/test-e2e/Basic/queue/queue.cpp | 2 ++ sycl/test-e2e/Basic/queue/release.cpp | 1 + sycl/test-e2e/Basic/span.cpp | 1 + sycl/test-e2e/Basic/stream/auto_flush.cpp | 2 ++ sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp | 2 ++ sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp | 1 + sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp | 1 + sycl/test-e2e/GroupAlgorithm/root_group.cpp | 2 ++ sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp | 1 + sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp | 1 + .../InvokeSimd/Feature/ImplicitSubgroup/invoke_simd_struct.cpp | 1 + sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct.cpp | 1 + sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp | 1 + sycl/test-e2e/Reduction/reduction_nd_conditional.cpp | 3 ++- sycl/test-e2e/Reduction/reduction_nd_dw.cpp | 1 + sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp | 3 ++- sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp | 1 + sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp | 1 + sycl/test-e2e/Reduction/reduction_nd_rw.cpp | 1 + sycl/test-e2e/Reduction/reduction_range_queue_shortcut.cpp | 1 + sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp | 1 + sycl/test-e2e/Reduction/reduction_span_pack.cpp | 1 + sycl/test-e2e/Reduction/reduction_usm.cpp | 1 + sycl/test-e2e/Reduction/reduction_usm_dw.cpp | 1 + 25 files changed, 31 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/Basic/buffer/reinterpret.cpp b/sycl/test-e2e/Basic/buffer/reinterpret.cpp index 691af19ff34f8..7efa1f61030ea 100644 --- a/sycl/test-e2e/Basic/buffer/reinterpret.cpp +++ b/sycl/test-e2e/Basic/buffer/reinterpret.cpp @@ -2,6 +2,7 @@ // RUN: %{run} %t.out // // XFAIL: level_zero&&gpu +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14430 //==---------- reinterpret.cpp --- SYCL buffer reinterpret basic test ------==// // diff --git a/sycl/test-e2e/Basic/queue/queue.cpp b/sycl/test-e2e/Basic/queue/queue.cpp index bde85310cc06c..94b628197a3d3 100644 --- a/sycl/test-e2e/Basic/queue/queue.cpp +++ b/sycl/test-e2e/Basic/queue/queue.cpp @@ -2,6 +2,8 @@ // RUN: %{run} %t.out // // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16197 + //==--------------- queue.cpp - SYCL queue test ----------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/sycl/test-e2e/Basic/queue/release.cpp b/sycl/test-e2e/Basic/queue/release.cpp index d241b742a0216..b041871fb31a3 100644 --- a/sycl/test-e2e/Basic/queue/release.cpp +++ b/sycl/test-e2e/Basic/queue/release.cpp @@ -2,6 +2,7 @@ // RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s %if !windows %{--check-prefixes=CHECK-RELEASE%} // // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16197 #include int main() { diff --git a/sycl/test-e2e/Basic/span.cpp b/sycl/test-e2e/Basic/span.cpp index 17c84359e8140..e258997a10fb8 100644 --- a/sycl/test-e2e/Basic/span.cpp +++ b/sycl/test-e2e/Basic/span.cpp @@ -3,6 +3,7 @@ // // Fails to release USM pointer on HIP for NVIDIA // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14404 // REQUIRES: aspect-usm_shared_allocations #include diff --git a/sycl/test-e2e/Basic/stream/auto_flush.cpp b/sycl/test-e2e/Basic/stream/auto_flush.cpp index d25cf943a6f3a..ddc5b8e19e0f6 100644 --- a/sycl/test-e2e/Basic/stream/auto_flush.cpp +++ b/sycl/test-e2e/Basic/stream/auto_flush.cpp @@ -2,6 +2,8 @@ // RUN: %{run} %t.out %if !gpu || linux %{ | FileCheck %s %} // // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16198 + //==-------------- copy.cpp - SYCL stream obect auto flushing test ---------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp b/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp index 98f36dd106bae..ee93969e8a547 100644 --- a/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp @@ -4,6 +4,8 @@ // hip_nvidia has problems constructing queues due to `No device of requested // type available`. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16199 + //==-------- queue_old_interop.cpp - SYCL queue OpenCL interop test --------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp b/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp index 8b32a880a6470..b1999b7bbf4e2 100644 --- a/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp @@ -3,6 +3,7 @@ // RUN: %{run} %t.out // // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16201 #include #include diff --git a/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp b/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp index 4f9063c18e784..50f8db4a060b9 100644 --- a/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp @@ -3,6 +3,7 @@ // RUN: %{run} %t.out // // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16201 #include "Inputs/split-per-source.h" diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp index fd467f14800e8..fc76171e36fb7 100644 --- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp +++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp @@ -1,5 +1,7 @@ // Fails with opencl non-cpu, enable when fixed. // XFAIL: (opencl && !cpu && !accelerator) +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14641 + // RUN: %{build} -I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} // RUN: %{run} %t.out diff --git a/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp b/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp index 7356f94a69ff8..5fcd1de9b2ca5 100644 --- a/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp +++ b/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp @@ -2,6 +2,7 @@ // RUN: %{run} %t.out // // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16204 #include diff --git a/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp b/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp index 1091cf90b6dca..2f9822138a2fe 100644 --- a/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp +++ b/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp @@ -2,6 +2,7 @@ // RUN: %{run} %t.out // // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16204 // The test checks that multiple calls to the same template instantiation of a // group local memory function result in separate allocations, even with device diff --git a/sycl/test-e2e/InvokeSimd/Feature/ImplicitSubgroup/invoke_simd_struct.cpp b/sycl/test-e2e/InvokeSimd/Feature/ImplicitSubgroup/invoke_simd_struct.cpp index 74b82dfc85ddc..fafd570f4bab2 100644 --- a/sycl/test-e2e/InvokeSimd/Feature/ImplicitSubgroup/invoke_simd_struct.cpp +++ b/sycl/test-e2e/InvokeSimd/Feature/ImplicitSubgroup/invoke_simd_struct.cpp @@ -1,6 +1,7 @@ // TODO: Passing/returning structures via invoke_simd() API is not implemented // in GPU driver yet. Enable the test when GPU RT supports it. // XFAIL: gpu && run-mode +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14543 // // RUN: %{build} -DIMPL_SUBGROUP -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.out // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out diff --git a/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct.cpp b/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct.cpp index 014027460301c..961a91b83b4f7 100644 --- a/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct.cpp +++ b/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct.cpp @@ -1,6 +1,7 @@ // TODO: Passing/returning structures via invoke_simd() API is not implemented // in GPU driver yet. Enable the test when GPU RT supports it. // XFAIL: gpu, run-mode +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14543 // // RUN: %{build} -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.out // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out diff --git a/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp b/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp index 5c2a9edc4682c..b22b380465a9b 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp @@ -3,6 +3,7 @@ // Group algorithms are not supported on NVidia. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // This test only checks that the method queue::parallel_for() accepting // reduction, can be properly translated into queue::submit + parallel_for(). diff --git a/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp b/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp index b8ab5f0952e13..76bbccb358176 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp @@ -5,7 +5,8 @@ // parallel_for with reduction requires work group size not bigger than 1` on // Nvidia. // XFAIL: hip_nvidia - +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 + // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_dw.cpp b/sycl/test-e2e/Reduction/reduction_nd_dw.cpp index 7bfab8d98cdf1..093c97d050c41 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_dw.cpp @@ -3,6 +3,7 @@ // // Group algorithms are not supported on Nvidia. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp b/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp index a4ebe61e84ed0..746ea97148cee 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp @@ -6,7 +6,8 @@ // work group size not bigger than 1` on Nvidia. // XFAIL: hip_nvidia - +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 + // This test performs basic checks of parallel_for(nd_range, reduction, func) // used with 'double' type. diff --git a/sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp b/sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp index 03b9ebe7ca423..cc82325754984 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp @@ -6,6 +6,7 @@ // `The implementation handling parallel_for with reduction requires // work group size not bigger than 1`. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp b/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp index 68f1ee3397576..bbefd1dbe51fb 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp @@ -3,6 +3,7 @@ // Group algorithms are not supported on NVidia. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_rw.cpp b/sycl/test-e2e/Reduction/reduction_nd_rw.cpp index 901188866a41a..2b26b135582d9 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_rw.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_rw.cpp @@ -3,6 +3,7 @@ // // `Group algorithms are not supported on host device.` on Nvidia. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_range_queue_shortcut.cpp b/sycl/test-e2e/Reduction/reduction_range_queue_shortcut.cpp index f06aab2f517a1..81d6026a2f9e0 100644 --- a/sycl/test-e2e/Reduction/reduction_range_queue_shortcut.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_queue_shortcut.cpp @@ -3,6 +3,7 @@ // Group algorithms are not supported on NVidia. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp b/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp index 2d1cae039550a..4a6ee054f4539 100644 --- a/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp @@ -4,6 +4,7 @@ // Error message `Group algorithms are not // supported on host device.` on Nvidia. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_span_pack.cpp b/sycl/test-e2e/Reduction/reduction_span_pack.cpp index e6e5252c57577..ec55e26f781a8 100644 --- a/sycl/test-e2e/Reduction/reduction_span_pack.cpp +++ b/sycl/test-e2e/Reduction/reduction_span_pack.cpp @@ -3,6 +3,7 @@ // // `Group algorithms are not supported on host device.` on Nvidia. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_usm.cpp b/sycl/test-e2e/Reduction/reduction_usm.cpp index 9a27956982117..353de97907cf0 100644 --- a/sycl/test-e2e/Reduction/reduction_usm.cpp +++ b/sycl/test-e2e/Reduction/reduction_usm.cpp @@ -3,6 +3,7 @@ // // `Group algorithms are not supported on host device.` on Nvidia. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_usm_dw.cpp b/sycl/test-e2e/Reduction/reduction_usm_dw.cpp index 5b36fcba18e56..56e07083587cd 100644 --- a/sycl/test-e2e/Reduction/reduction_usm_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_usm_dw.cpp @@ -3,6 +3,7 @@ // `Group algorithms are not supported on host device` on Nvidia. // XFAIL: hip_nvidia +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows From 5edd0ad19797c0a0a3ae3b2e9319341460499c8a Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 5 Dec 2024 07:26:50 -0800 Subject: [PATCH 03/22] Add XFAIL trackers for several more tests --- sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple.cpp | 1 + .../test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_return.cpp | 1 + sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_vadd.cpp | 1 + sycl/test-e2e/Reduction/reduction_nd_conditional.cpp | 3 +-- sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp | 3 +-- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple.cpp b/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple.cpp index 34fe12ebb70e9..daada8ecf343d 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple.cpp @@ -1,5 +1,6 @@ // TODO: enable when Jira ticket resolved // XFAIL: * +// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 // // Check that full compilation works: // RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple.cpp -o %t.out diff --git a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_return.cpp b/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_return.cpp index 90c3468b4151f..270c109150a2c 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_return.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_return.cpp @@ -1,5 +1,6 @@ // TODO: enable when Jira ticket resolved // XFAIL: * +// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 // // Check that full compilation works: // RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple_return.cpp -o %t.out diff --git a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_vadd.cpp b/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_vadd.cpp index 7d2bed1426cd8..89828d6a76000 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_vadd.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_vadd.cpp @@ -1,5 +1,6 @@ // TODO: enable when Jira ticket resolved // XFAIL: * +// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 // // Check that full compilation works: // RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple_vadd.cpp -o %t.out diff --git a/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp b/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp index 76bbccb358176..0611367572373 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp @@ -5,8 +5,7 @@ // parallel_for with reduction requires work group size not bigger than 1` on // Nvidia. // XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 - +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp b/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp index 746ea97148cee..74b1ffe425419 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp @@ -6,8 +6,7 @@ // work group size not bigger than 1` on Nvidia. // XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 - +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // This test performs basic checks of parallel_for(nd_range, reduction, func) // used with 'double' type. From dda50ca91e1e64c1bd90f70a5a35b6b2f78a2fdc Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 5 Dec 2024 07:35:17 -0800 Subject: [PATCH 04/22] Add XFAIL trackers for several more tests --- sycl/test-e2e/Reduction/reduction_nd_conditional.cpp | 2 +- sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp b/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp index 0611367572373..c5fcbac3ff29b 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp @@ -5,7 +5,7 @@ // parallel_for with reduction requires work group size not bigger than 1` on // Nvidia. // XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp b/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp index 74b1ffe425419..18c9e7bc88db4 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp @@ -6,7 +6,7 @@ // work group size not bigger than 1` on Nvidia. // XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // This test performs basic checks of parallel_for(nd_range, reduction, func) // used with 'double' type. From 87856a72381b1caea69e7286b21dad7a2cbedfd0 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 5 Dec 2024 07:43:57 -0800 Subject: [PATCH 05/22] Reduce number of tests without XFAIL tracker --- sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp index c867ce1f4f420..c42505b01f3b5 100644 --- a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp +++ b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp @@ -51,7 +51,7 @@ // tests to match the required format and in that case you should just update // (i.e. reduce) the number and the list below. // -// NUMBER-OF-XFAIL-WITHOUT-TRACKER: 77 +// NUMBER-OF-XFAIL-WITHOUT-TRACKER: 46 // // List of improperly XFAIL-ed tests. // Remove the CHECK once the test has been properly XFAIL-ed. From 8d1047365fcb007e2471e684b277dd18feaab3ca Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 5 Dec 2024 08:16:36 -0800 Subject: [PATCH 06/22] Fix wrong tracker in span.cpp test --- sycl/test-e2e/Basic/span.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/Basic/span.cpp b/sycl/test-e2e/Basic/span.cpp index e258997a10fb8..735b705f18fc1 100644 --- a/sycl/test-e2e/Basic/span.cpp +++ b/sycl/test-e2e/Basic/span.cpp @@ -3,7 +3,7 @@ // // Fails to release USM pointer on HIP for NVIDIA // XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14404 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14432 // REQUIRES: aspect-usm_shared_allocations #include From 0f2daf2ac2576a25e188c03eaf281d7e9cc7aacb Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 5 Dec 2024 08:21:14 -0800 Subject: [PATCH 07/22] Delete tests failing because of GSD-4509 issue --- .../Spec/ImplicitSubgroup/tuple.cpp | 16 -- .../Spec/ImplicitSubgroup/tuple_return.cpp | 16 -- .../Spec/ImplicitSubgroup/tuple_vadd.cpp | 16 -- sycl/test-e2e/InvokeSimd/Spec/tuple.cpp | 148 ----------------- .../test-e2e/InvokeSimd/Spec/tuple_return.cpp | 150 ----------------- sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp | 152 ------------------ 6 files changed, 498 deletions(-) delete mode 100644 sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple.cpp delete mode 100644 sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_return.cpp delete mode 100644 sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_vadd.cpp delete mode 100644 sycl/test-e2e/InvokeSimd/Spec/tuple.cpp delete mode 100644 sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp delete mode 100644 sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp diff --git a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple.cpp b/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple.cpp deleted file mode 100644 index daada8ecf343d..0000000000000 --- a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// TODO: enable when Jira ticket resolved -// XFAIL: * -// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 -// -// Check that full compilation works: -// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple.cpp -o %t.out -// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out -// -// VISALTO enable run -// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out - -/* - * This tests is the same as InvokeSimd/spec/tuple.cpp, but compiles without - * optional subgroup attribute specified and intended to check that compiler is - * able to choose subgroup size correctly. - */ diff --git a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_return.cpp b/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_return.cpp deleted file mode 100644 index 270c109150a2c..0000000000000 --- a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_return.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// TODO: enable when Jira ticket resolved -// XFAIL: * -// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 -// -// Check that full compilation works: -// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple_return.cpp -o %t.out -// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out -// -// VISALTO enable run -// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out - -/* - * This tests is the same as InvokeSimd/spec/tuple_return.cpp, but compiles - * without optional subgroup attribute specified and intended to check that - * compiler is able to choose subgroup size correctly. - */ diff --git a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_vadd.cpp b/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_vadd.cpp deleted file mode 100644 index 89828d6a76000..0000000000000 --- a/sycl/test-e2e/InvokeSimd/Spec/ImplicitSubgroup/tuple_vadd.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// TODO: enable when Jira ticket resolved -// XFAIL: * -// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 -// -// Check that full compilation works: -// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple_vadd.cpp -o %t.out -// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out -// -// VISALTO enable run -// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out - -/* - * This tests is the same as InvokeSimd/spec/tuple_vadd.cpp, but compiles - * without optional subgroup attribute specified and intended to check that - * compiler is able to choose subgroup size correctly. - */ diff --git a/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp b/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp deleted file mode 100644 index cf4f1e4d5ec75..0000000000000 --- a/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp +++ /dev/null @@ -1,148 +0,0 @@ -// TODO: enable when Jira ticket resolved -// XFAIL: * -// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 -// -// Check that full compilation works: -// RUN: %{build} -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.out -// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out -// -// VISALTO enable run -// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out - -// Tests invoke_simd support in the compiler/headers -/* Test case description: - * ---------------------- - * This is a minimal test case to test invoke_simd support for tuples, - * as defined in the invoke_simd spec. - * - * This test case simply creates a scalar tuple per work-item - * which gets implicitly vectorized into a - * tuple, simd>. Then, inside the ESIMD function, - * we simply get the first tuple element (simd) and return it. - * - * This test also runs with all types of VISA link time optimizations enabled. - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include - -/* Subgroup size attribute is optional - * In case it is absent compiler decides what subgroup size to use - */ -#ifdef IMPL_SUBGROUP -#define SUBGROUP_ATTR -#else -#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] -#endif - -using namespace sycl::ext::oneapi::experimental; -namespace esimd = sycl::ext::intel::esimd; -constexpr int VL = 16; - -__attribute__((always_inline)) esimd::simd -ESIMD_CALLEE(std::tuple, esimd::simd> tup, - esimd::simd a) SYCL_ESIMD_FUNCTION { - esimd::simd float_vector = std::get<0>(tup); - esimd::simd int_vector = std::get<1>(tup); - return float_vector; -} - -[[intel::device_indirectly_callable]] SYCL_EXTERNAL - simd __regcall SIMD_CALLEE( - std::tuple, simd> tup, - simd a) SYCL_ESIMD_FUNCTION; - -using namespace sycl; - -int main(void) { - constexpr unsigned Size = 1024; - constexpr unsigned GroupSize = 4 * VL; - - auto q = queue{gpu_selector_v}; - auto dev = q.get_device(); - std::cout << "Running on " << dev.get_info() - << "\n"; - auto ctxt = q.get_context(); - - float *A = - static_cast(malloc_shared(Size * sizeof(float), dev, ctxt)); - float *C = - static_cast(malloc_shared(Size * sizeof(float), dev, ctxt)); - - int *D = static_cast(malloc_shared(Size * sizeof(int), dev, ctxt)); - - for (unsigned i = 0; i < Size; ++i) { - A[i] = i; - C[i] = -1; - D[i] = 1; - } - - sycl::range<1> GlobalRange{Size}; - // Number of workitems in each workgroup. - sycl::range<1> LocalRange{GroupSize}; - - sycl::nd_range<1> Range(GlobalRange, LocalRange); - - try { - auto e = q.submit([&](handler &cgh) { - cgh.parallel_for(Range, [=](nd_item<1> ndi) SUBGROUP_ATTR { - sub_group sg = ndi.get_sub_group(); - group<1> g = ndi.get_group(); - uint32_t i = - sg.get_group_linear_id() * VL + g.get_group_linear_id() * GroupSize; - uint32_t wi_id = i + sg.get_local_id(); - - std::tuple tup(A[wi_id], D[wi_id]); - float res = invoke_simd(sg, SIMD_CALLEE, tup, A[wi_id]); - C[wi_id] = res; - }); - }); - e.wait(); - } catch (sycl::exception const &e) { - sycl::free(A, q); - sycl::free(C, q); - sycl::free(D, q); - - std::cout << "SYCL exception caught: " << e.what() << '\n'; - return e.code().value(); - } - - int err_cnt = 0; - - for (unsigned i = 0; i < Size; ++i) { - if (A[i] != C[i]) { - if (++err_cnt < 10) { - std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i] - << "\n"; - } - } - } - if (err_cnt > 0) { - std::cout << " pass rate: " - << ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% (" - << (Size - err_cnt) << "/" << Size << ")\n"; - } - - sycl::free(A, q); - sycl::free(C, q); - sycl::free(D, q); - - std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n"); - return err_cnt > 0 ? 1 : 0; -} - -[[intel::device_indirectly_callable]] SYCL_EXTERNAL - simd __regcall SIMD_CALLEE( - std::tuple, simd> tup, - simd a) SYCL_ESIMD_FUNCTION { - esimd::simd res = ESIMD_CALLEE(tup, a); - return res; -} diff --git a/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp b/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp deleted file mode 100644 index 8c2910a6eb5f3..0000000000000 --- a/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp +++ /dev/null @@ -1,150 +0,0 @@ -// TODO: enable when Jira ticket resolved -// XFAIL: * -// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 -// -// Check that full compilation works: -// RUN: %{build} -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.out -// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out -// -// VISALTO enable run -// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out - -// Tests invoke_simd support in the compiler/headers -/* Test case purpose: - * ---------------------- - * To test returning a tuple from invoke_simd. - * - * Test case description: - * ---------------------- - * This test case performs a vector add of A and B by passing A[wi_id] and - * B[wi_id] to an invoke_simd callee which simply combines these into a - * tuple and returns it. Then, the indivual values a and b are gotten - * back out of the tuple, added together, and stored in C[wi_id]. - * - * This test also runs with all types of VISA link time optimizations enabled. - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include - -/* Subgroup size attribute is optional - * In case it is absent compiler decides what subgroup size to use - */ -#ifdef IMPL_SUBGROUP -#define SUBGROUP_ATTR -#else -#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] -#endif - -using namespace sycl::ext::oneapi::experimental; -namespace esimd = sycl::ext::intel::esimd; -constexpr int VL = 16; - -__attribute__((always_inline)) -std::tuple, esimd::simd> -ESIMD_CALLEE(esimd::simd va, - esimd::simd vb) SYCL_ESIMD_FUNCTION { - std::tuple, esimd::simd> tup(va, vb); - return tup; -} - -[[intel::device_indirectly_callable]] SYCL_EXTERNAL - std::tuple, simd> __regcall SIMD_CALLEE( - simd va, simd vb) SYCL_ESIMD_FUNCTION; - -using namespace sycl; - -int main(void) { - constexpr unsigned Size = 1024; - constexpr unsigned GroupSize = 4 * VL; - - auto q = queue{gpu_selector_v}; - auto dev = q.get_device(); - std::cout << "Running on " << dev.get_info() - << "\n"; - auto ctxt = q.get_context(); - - float *A = - static_cast(malloc_shared(Size * sizeof(float), dev, ctxt)); - float *B = - static_cast(malloc_shared(Size * sizeof(float), dev, ctxt)); - float *C = - static_cast(malloc_shared(Size * sizeof(float), dev, ctxt)); - - for (unsigned i = 0; i < Size; ++i) { - A[i] = B[i] = i; - C[i] = -1; - } - - sycl::range<1> GlobalRange{Size}; - // Number of workitems in each workgroup. - sycl::range<1> LocalRange{GroupSize}; - - sycl::nd_range<1> Range(GlobalRange, LocalRange); - - try { - auto e = q.submit([&](handler &cgh) { - cgh.parallel_for(Range, [=](nd_item<1> ndi) SUBGROUP_ATTR { - sub_group sg = ndi.get_sub_group(); - group<1> g = ndi.get_group(); - uint32_t i = - sg.get_group_linear_id() * VL + g.get_group_linear_id() * GroupSize; - uint32_t wi_id = i + sg.get_local_id(); - - std::tuple tup = - invoke_simd(sg, SIMD_CALLEE, A[wi_id], B[wi_id]); - float a = std::get<0>(tup); - float b = std::get<1>(tup); - float res = a + b; - C[wi_id] = res; - }); - }); - e.wait(); - } catch (sycl::exception const &e) { - sycl::free(A, q); - sycl::free(B, q); - sycl::free(C, q); - - std::cout << "SYCL exception caught: " << e.what() << '\n'; - return e.code().value(); - } - - int err_cnt = 0; - - for (unsigned i = 0; i < Size; ++i) { - if (A[i] + B[i] != C[i]) { - if (++err_cnt < 10) { - std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i] - << " + " << B[i] << "\n"; - } - } - } - if (err_cnt > 0) { - std::cout << " pass rate: " - << ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% (" - << (Size - err_cnt) << "/" << Size << ")\n"; - } - - sycl::free(A, q); - sycl::free(B, q); - sycl::free(C, q); - - std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n"); - return err_cnt > 0 ? 1 : 0; -} - -[[intel::device_indirectly_callable]] SYCL_EXTERNAL - std::tuple, simd> __regcall SIMD_CALLEE( - simd va, simd vb) SYCL_ESIMD_FUNCTION { - std::tuple, esimd::simd> res = - ESIMD_CALLEE(va, vb); - return res; -} diff --git a/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp b/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp deleted file mode 100644 index 6a6b1fc16f383..0000000000000 --- a/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp +++ /dev/null @@ -1,152 +0,0 @@ -// TODO: enable when Jira ticket resolved -// XFAIL: * -// XFAIL-TRACKER: https://jira.devtools.intel.com/browse/GSD-4509 -// -// Check that full compilation works: -// RUN: %{build} -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.out -// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out -// -// VISALTO enable run -// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %{run} %t.out - -// Tests invoke_simd support in the compiler/headers -/* Test case purpose: - * ------------------ - * To test invoke_simd support for tuples, as defined in the invoke_simd spec. - * - * Test case description: - * ---------------------- - * This test case performs a vector addition of 2 vectors of float, a and b. - * Instead of passing in 2 separate simd to the SIMD and ESIMD - * functions, we pass in a single tuple, simd> and - * then get and add these tuple elements, - * - * Implementation notes: - * -------------------- - * I've included an equivalent set of regular (non-tuple) vadd functions to - * verify that the overall logic of the program is correct. Switch the - * invoke_simd() call to see that the regular vadd works correctly. - * - * This test also runs with all types of VISA link time optimizations enabled. - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include - -/* Subgroup size attribute is optional - * In case it is absent compiler decides what subgroup size to use - */ -#ifdef IMPL_SUBGROUP -#define SUBGROUP_ATTR -#else -#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] -#endif - -using namespace sycl::ext::oneapi::experimental; -namespace esimd = sycl::ext::intel::esimd; -constexpr int VL = 16; - -__attribute__((always_inline)) esimd::simd ESIMD_CALLEE_TUPLE_VADD( - std::tuple, esimd::simd> tup) - SYCL_ESIMD_FUNCTION { - esimd::simd va = std::get<0>(tup); - esimd::simd vb = std::get<1>(tup); - return va + vb; -} - -[[intel::device_indirectly_callable]] SYCL_EXTERNAL - simd __regcall SIMD_CALLEE_TUPLE_VADD( - std::tuple, simd> tup) SYCL_ESIMD_FUNCTION; - -using namespace sycl; - -int main(void) { - constexpr unsigned Size = 1024; - constexpr unsigned GroupSize = 4 * VL; - - auto q = queue{gpu_selector_v}; - auto dev = q.get_device(); - std::cout << "Running on " << dev.get_info() - << "\n"; - auto ctxt = q.get_context(); - - float *A = - static_cast(malloc_shared(Size * sizeof(float), dev, ctxt)); - float *B = - static_cast(malloc_shared(Size * sizeof(float), dev, ctxt)); - float *C = - static_cast(malloc_shared(Size * sizeof(float), dev, ctxt)); - - for (unsigned i = 0; i < Size; ++i) { - A[i] = B[i] = i; - C[i] = -1; - } - - sycl::range<1> GlobalRange{Size}; - // Number of workitems in each workgroup. - sycl::range<1> LocalRange{GroupSize}; - - sycl::nd_range<1> Range(GlobalRange, LocalRange); - - try { - auto e = q.submit([&](handler &cgh) { - cgh.parallel_for(Range, [=](nd_item<1> ndi) SUBGROUP_ATTR { - sub_group sg = ndi.get_sub_group(); - group<1> g = ndi.get_group(); - uint32_t i = - sg.get_group_linear_id() * VL + g.get_group_linear_id() * GroupSize; - uint32_t wi_id = i + sg.get_local_id(); - - std::tuple tup(A[wi_id], B[wi_id]); - float res = invoke_simd(sg, SIMD_CALLEE_TUPLE_VADD, tup); - C[wi_id] = res; - }); - }); - e.wait(); - } catch (sycl::exception const &e) { - sycl::free(A, q); - sycl::free(B, q); - sycl::free(C, q); - - std::cout << "SYCL exception caught: " << e.what() << '\n'; - return e.code().value(); - } - - int err_cnt = 0; - - for (unsigned i = 0; i < Size; ++i) { - if (A[i] + B[i] != C[i]) { - if (++err_cnt < 10) { - std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i] - << " + " << B[i] << "\n"; - } - } - } - if (err_cnt > 0) { - std::cout << " pass rate: " - << ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% (" - << (Size - err_cnt) << "/" << Size << ")\n"; - } - - sycl::free(A, q); - sycl::free(B, q); - sycl::free(C, q); - - std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n"); - return err_cnt > 0 ? 1 : 0; -} - -[[intel::device_indirectly_callable]] SYCL_EXTERNAL - simd __regcall SIMD_CALLEE_TUPLE_VADD( - std::tuple, simd> tup) SYCL_ESIMD_FUNCTION { - esimd::simd res = ESIMD_CALLEE_TUPLE_VADD(tup); - return res; -} From 7bd59220529aec218877f154bf5b88ba6011553f Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 6 Dec 2024 07:44:11 -0800 Subject: [PATCH 08/22] Remove XFAILs for hip nvidia plaform --- sycl/test-e2e/Basic/queue/queue.cpp | 3 --- sycl/test-e2e/Basic/span.cpp | 3 --- sycl/test-e2e/Basic/stream/auto_flush.cpp | 3 --- sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp | 5 ----- sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp | 3 --- sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp | 3 --- sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp | 3 --- sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp | 3 --- sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp | 3 --- sycl/test-e2e/Reduction/reduction_nd_conditional.cpp | 5 ----- sycl/test-e2e/Reduction/reduction_nd_dw.cpp | 4 ---- sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp | 6 ------ sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp | 6 ------ sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp | 3 --- sycl/test-e2e/Reduction/reduction_nd_rw.cpp | 4 ---- sycl/test-e2e/Reduction/reduction_range_queue_shortcut.cpp | 4 ---- sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp | 5 ----- sycl/test-e2e/Reduction/reduction_span_pack.cpp | 4 ---- sycl/test-e2e/Reduction/reduction_usm.cpp | 4 ---- sycl/test-e2e/Reduction/reduction_usm_dw.cpp | 4 ---- sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp | 2 -- sycl/test-e2e/Scheduler/MemObjRemapping.cpp | 3 +-- sycl/test-e2e/Scheduler/MultipleDevices.cpp | 2 -- sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp | 2 -- sycl/test-e2e/Tracing/buffer_printers.cpp | 2 -- 25 files changed, 1 insertion(+), 88 deletions(-) diff --git a/sycl/test-e2e/Basic/queue/queue.cpp b/sycl/test-e2e/Basic/queue/queue.cpp index 94b628197a3d3..20697acfbee89 100644 --- a/sycl/test-e2e/Basic/queue/queue.cpp +++ b/sycl/test-e2e/Basic/queue/queue.cpp @@ -1,8 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16197 //==--------------- queue.cpp - SYCL queue test ----------------------------==// // diff --git a/sycl/test-e2e/Basic/span.cpp b/sycl/test-e2e/Basic/span.cpp index 735b705f18fc1..2a112ba4740d5 100644 --- a/sycl/test-e2e/Basic/span.cpp +++ b/sycl/test-e2e/Basic/span.cpp @@ -1,9 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out // -// Fails to release USM pointer on HIP for NVIDIA -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14432 // REQUIRES: aspect-usm_shared_allocations #include diff --git a/sycl/test-e2e/Basic/stream/auto_flush.cpp b/sycl/test-e2e/Basic/stream/auto_flush.cpp index ddc5b8e19e0f6..f346eca5b72cf 100644 --- a/sycl/test-e2e/Basic/stream/auto_flush.cpp +++ b/sycl/test-e2e/Basic/stream/auto_flush.cpp @@ -1,8 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out %if !gpu || linux %{ | FileCheck %s %} -// -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16198 //==-------------- copy.cpp - SYCL stream obect auto flushing test ---------==// // diff --git a/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp b/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp index ee93969e8a547..ab9059ce98976 100644 --- a/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp @@ -1,10 +1,5 @@ // RUN: %{build} -D__SYCL_INTERNAL_API -o %t.out // RUN: %{run-unfiltered-devices} %t.out -// -// hip_nvidia has problems constructing queues due to `No device of requested -// type available`. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16199 //==-------- queue_old_interop.cpp - SYCL queue OpenCL interop test --------==// // diff --git a/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp b/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp index b1999b7bbf4e2..15677a64ea5e5 100644 --- a/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp @@ -1,9 +1,6 @@ // RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-device-code-split=per_kernel -o %t.out \ // RUN: -fsycl-dead-args-optimization // RUN: %{run} %t.out -// -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16201 #include #include diff --git a/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp b/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp index 50f8db4a060b9..29c6102f71284 100644 --- a/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp @@ -1,9 +1,6 @@ // RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-device-code-split=per_source -I %S/Inputs -o %t.out %S/Inputs/split-per-source-second-file.cpp \ // RUN: -fsycl-dead-args-optimization // RUN: %{run} %t.out -// -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16201 #include "Inputs/split-per-source.h" diff --git a/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp b/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp index 5fcd1de9b2ca5..da0f3881ffc59 100644 --- a/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp +++ b/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp @@ -1,8 +1,5 @@ // RUN: %{build} -Wno-error=deprecated-declarations -o %t.out // RUN: %{run} %t.out -// -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16204 #include diff --git a/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp b/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp index 2f9822138a2fe..b0b3a0e8a8309 100644 --- a/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp +++ b/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp @@ -1,8 +1,5 @@ // RUN: %{build} -Wno-error=deprecated-declarations -fno-sycl-early-optimizations -o %t.out // RUN: %{run} %t.out -// -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16204 // The test checks that multiple calls to the same template instantiation of a // group local memory function result in separate allocations, even with device diff --git a/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp b/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp index b22b380465a9b..f9ff2f64140c7 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp @@ -1,9 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// Group algorithms are not supported on NVidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // This test only checks that the method queue::parallel_for() accepting // reduction, can be properly translated into queue::submit + parallel_for(). diff --git a/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp b/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp index c5fcbac3ff29b..6e71cc363ad98 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_conditional.cpp @@ -1,11 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out // -// Error message `The implementation handling -// parallel_for with reduction requires work group size not bigger than 1` on -// Nvidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_dw.cpp b/sycl/test-e2e/Reduction/reduction_nd_dw.cpp index 093c97d050c41..dd45a974b0950 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_dw.cpp @@ -1,9 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// -// Group algorithms are not supported on Nvidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp b/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp index 18c9e7bc88db4..7e7b9fac72ecf 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_ext_double.cpp @@ -1,12 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// -// Error -// message `The implementation handling parallel_for with reduction requires -// work group size not bigger than 1` on Nvidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // This test performs basic checks of parallel_for(nd_range, reduction, func) // used with 'double' type. diff --git a/sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp b/sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp index cc82325754984..8277360d39059 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp @@ -1,12 +1,6 @@ // REQUIRES: aspect-fp16 // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// -// Error message on Nvidia: -// `The implementation handling parallel_for with reduction requires -// work group size not bigger than 1`. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp b/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp index bbefd1dbe51fb..6a2c062ba9ffa 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp @@ -1,9 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// Group algorithms are not supported on NVidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_rw.cpp b/sycl/test-e2e/Reduction/reduction_nd_rw.cpp index 2b26b135582d9..d329033446191 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_rw.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_rw.cpp @@ -1,9 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// -// `Group algorithms are not supported on host device.` on Nvidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_range_queue_shortcut.cpp b/sycl/test-e2e/Reduction/reduction_range_queue_shortcut.cpp index 81d6026a2f9e0..1d94e6faaacef 100644 --- a/sycl/test-e2e/Reduction/reduction_range_queue_shortcut.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_queue_shortcut.cpp @@ -1,10 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// Group algorithms are not supported on NVidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 - // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp b/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp index 4a6ee054f4539..9c9f366f6a588 100644 --- a/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp @@ -1,10 +1,5 @@ // RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out -// -// Error message `Group algorithms are not -// supported on host device.` on Nvidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_span_pack.cpp b/sycl/test-e2e/Reduction/reduction_span_pack.cpp index ec55e26f781a8..023e78fe5e85d 100644 --- a/sycl/test-e2e/Reduction/reduction_span_pack.cpp +++ b/sycl/test-e2e/Reduction/reduction_span_pack.cpp @@ -1,9 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// -// `Group algorithms are not supported on host device.` on Nvidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_usm.cpp b/sycl/test-e2e/Reduction/reduction_usm.cpp index 353de97907cf0..24dd84f66236f 100644 --- a/sycl/test-e2e/Reduction/reduction_usm.cpp +++ b/sycl/test-e2e/Reduction/reduction_usm.cpp @@ -1,9 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// -// `Group algorithms are not supported on host device.` on Nvidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_usm_dw.cpp b/sycl/test-e2e/Reduction/reduction_usm_dw.cpp index 56e07083587cd..6d00451a0701e 100644 --- a/sycl/test-e2e/Reduction/reduction_usm_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_usm_dw.cpp @@ -1,10 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// `Group algorithms are not supported on host device` on Nvidia. -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14973 - // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp b/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp index e1309ee0edb1a..0c0e1750805f2 100644 --- a/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp +++ b/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp @@ -1,7 +1,5 @@ // RUN: %{build} -o %t.out // RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s -// -// XFAIL: hip_nvidia // The tested functionality is disabled with Level Zero until it is supported by // the adapter. diff --git a/sycl/test-e2e/Scheduler/MemObjRemapping.cpp b/sycl/test-e2e/Scheduler/MemObjRemapping.cpp index f4de8a56217cd..55181dfbcc565 100644 --- a/sycl/test-e2e/Scheduler/MemObjRemapping.cpp +++ b/sycl/test-e2e/Scheduler/MemObjRemapping.cpp @@ -1,7 +1,6 @@ // RUN: %{build} -Wno-error=deprecated-declarations -o %t.out // RUN: env SYCL_HOST_UNIFIED_MEMORY=1 SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s -// -// XFAIL: hip_nvidia + #include #include #include diff --git a/sycl/test-e2e/Scheduler/MultipleDevices.cpp b/sycl/test-e2e/Scheduler/MultipleDevices.cpp index 3641e5d58b5ad..3976512e2d6e7 100644 --- a/sycl/test-e2e/Scheduler/MultipleDevices.cpp +++ b/sycl/test-e2e/Scheduler/MultipleDevices.cpp @@ -1,7 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out -// -// XFAIL: hip_nvidia //===- MultipleDevices.cpp - Test checking multi-device execution --------===// // diff --git a/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp b/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp index 880ec728f8951..fb634c832114d 100644 --- a/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp +++ b/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp @@ -1,7 +1,5 @@ // RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-dead-args-optimization -o %t.out // RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s %if !windows %{--check-prefix=CHECK-RELEASE%} -// -// XFAIL: hip_nvidia //==------------------- ReleaseResourcesTests.cpp --------------------------==// // diff --git a/sycl/test-e2e/Tracing/buffer_printers.cpp b/sycl/test-e2e/Tracing/buffer_printers.cpp index 4e29cbb02ff6c..d712b0009c1ae 100644 --- a/sycl/test-e2e/Tracing/buffer_printers.cpp +++ b/sycl/test-e2e/Tracing/buffer_printers.cpp @@ -1,7 +1,5 @@ // RUN: %{build} -o %t.out // RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s -// -// XFAIL: hip_nvidia #include #include From b5471f8b3c03168975efdadaab1ce5bc6eb165db Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 6 Dec 2024 07:55:28 -0800 Subject: [PATCH 09/22] Fix formatting --- sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp | 1 - sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp | 1 - sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp b/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp index f9ff2f64140c7..eb27a5b76c9d9 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_N_queue_shortcut.cpp @@ -1,7 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out - // This test only checks that the method queue::parallel_for() accepting // reduction, can be properly translated into queue::submit + parallel_for(). diff --git a/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp b/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp index 6a2c062ba9ffa..474c67aaf9b47 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_queue_shortcut.cpp @@ -1,7 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out - // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp index c42505b01f3b5..a4e0a388fe9c0 100644 --- a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp +++ b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp @@ -51,7 +51,7 @@ // tests to match the required format and in that case you should just update // (i.e. reduce) the number and the list below. // -// NUMBER-OF-XFAIL-WITHOUT-TRACKER: 46 +// NUMBER-OF-XFAIL-WITHOUT-TRACKER: 41 // // List of improperly XFAIL-ed tests. // Remove the CHECK once the test has been properly XFAIL-ed. From 6df2d88c46e913b749fcb911e57702cc7e5b41dc Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 6 Dec 2024 08:19:04 -0800 Subject: [PATCH 10/22] Fix typos and remove the tests from the list of improperly XFAIL-ed tests --- "\\" | 113 ++++++++++++++++++ sycl/test-e2e/Basic/queue/release.cpp | 3 - .../no-xfail-without-tracker.cpp | 36 ------ 3 files changed, 113 insertions(+), 39 deletions(-) create mode 100644 "\\" diff --git "a/\\" "b/\\" new file mode 100644 index 0000000000000..2a112ba4740d5 --- /dev/null +++ "b/\\" @@ -0,0 +1,113 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// +// REQUIRES: aspect-usm_shared_allocations +#include + +#include + +#include +#include +#include + +using namespace sycl; + +void testSpanCapture() { + // This test creates spans that are backed by USM. + // ensures they can be captured by device lambda + // and that read and write operations function correctly + // across capture. + queue Q; + + constexpr long numReadTests = 2; + const range<1> NumberOfReadTestsRange(numReadTests); + buffer SpanRead(NumberOfReadTestsRange); + + // span from a vector + // We will create a vector, backed by a USM allocator. And a span from that. + using vec_alloc = usm_allocator; + // Create allocator for device associated with q + vec_alloc myAlloc(Q); + // Create std vector with the allocator + std::vector vecUSM(4, myAlloc); + std::iota(vecUSM.begin(), vecUSM.end(), 1); + sycl::span vecUSM_span{vecUSM}; + vecUSM_span[0] += 100; // 101 modify first value using span affordance. + + // span from USM memory + auto *usm_data = malloc_shared(4, Q); + sycl::span usm_span(usm_data, 4); + std::iota(usm_span.begin(), usm_span.end(), 1); + usm_span[0] += 100; // 101 modify first value using span affordance. + + event E = Q.submit([&](handler &cgh) { + auto can_read_from_span_acc = SpanRead.get_access(cgh); + cgh.single_task([=] { + // read from the spans. + can_read_from_span_acc[0] = vecUSM_span[0]; + can_read_from_span_acc[1] = usm_span[0]; + + // write to the spans + vecUSM_span[1] += 1000; + usm_span[1] += 1000; + }); + }); + E.wait(); + + // check out the read operations, should have gotten 101 from each + host_accessor can_read_from_span_acc(SpanRead, read_only); + for (int i = 0; i < numReadTests; i++) { + assert(can_read_from_span_acc[i] == 101 && + "read check should have gotten 100"); + } + + // were the spans successfully modified via write? + assert(vecUSM_span[1] == 1002 && + "vecUSM_span write check should have gotten 1001"); + assert(usm_span[1] == 1002 && "usm_span write check should have gotten 1001"); + + free(usm_data, Q); +} + +void set_all_span_values(sycl::span container, int v) { + for (auto &e : container) + e = v; +} + +void testSpanOnDevice() { + // this test creates a simple span on device, + // passes it to a function that operates on it + // and ensures it worked correctly + queue Q; + constexpr long numReadTests = 4; + const range<1> NumberOfReadTestsRange(numReadTests); + buffer SpanRead(NumberOfReadTestsRange); + + event E = Q.submit([&](handler &cgh) { + auto can_read_from_span_acc = SpanRead.get_access(cgh); + cgh.single_task([=] { + // create a span on device, pass it to function that modifies it + // read values back out. + int a[]{1, 2, 3, 4}; + sycl::span a_span{a}; + set_all_span_values(a_span, 10); + for (int i = 0; i < numReadTests; i++) + can_read_from_span_acc[i] = a_span[i]; + }); + }); + E.wait(); + + // check out the read operations, should have gotten 10 from each + host_accessor can_read_from_span_acc(SpanRead, read_only); + for (int i = 0; i < numReadTests; i++) { + assert(can_read_from_span_acc[i] == 10 && + "read check should have gotten 10"); + } +} + +int main() { + testSpanCapture(); + testSpanOnDevice(); + + return 0; +} diff --git a/sycl/test-e2e/Basic/queue/release.cpp b/sycl/test-e2e/Basic/queue/release.cpp index b041871fb31a3..13ee5d6ee22bf 100644 --- a/sycl/test-e2e/Basic/queue/release.cpp +++ b/sycl/test-e2e/Basic/queue/release.cpp @@ -1,8 +1,5 @@ // RUN: %{build} -o %t.out // RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s %if !windows %{--check-prefixes=CHECK-RELEASE%} -// -// XFAIL: hip_nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16197 #include int main() { diff --git a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp index a4e0a388fe9c0..66257a935610e 100644 --- a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp +++ b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp @@ -58,33 +58,14 @@ // // CHECK: AddressSanitizer/nullpointer/private_nullptr.cpp // CHECK-NEXT: Basic/aspects.cpp -// CHECK-NEXT: Basic/buffer/reinterpret.cpp // CHECK-NEXT: Basic/device_event.cpp // CHECK-NEXT: Basic/diagnostics/handler.cpp // CHECK-NEXT: Basic/max_linear_work_group_size_props.cpp // CHECK-NEXT: Basic/max_work_group_size_props.cpp // CHECK-NEXT: Basic/partition_supported.cpp -// CHECK-NEXT: Basic/queue/queue.cpp -// CHECK-NEXT: Basic/queue/release.cpp -// CHECK-NEXT: Basic/span.cpp -// CHECK-NEXT: Basic/stream/auto_flush.cpp -// CHECK-NEXT: DeprecatedFeatures/queue_old_interop.cpp -// CHECK-NEXT: DeviceCodeSplit/split-per-kernel.cpp -// CHECK-NEXT: DeviceCodeSplit/split-per-source-main.cpp // CHECK-NEXT: DeviceLib/assert-windows.cpp // CHECK-NEXT: ESIMD/hardware_dispatch.cpp -// CHECK-NEXT: GroupAlgorithm/root_group.cpp -// CHECK-NEXT: GroupLocalMemory/group_local_memory.cpp -// CHECK-NEXT: GroupLocalMemory/no_early_opt.cpp // CHECK-NEXT: InlineAsm/asm_multiple_instructions.cpp -// CHECK-NEXT: InvokeSimd/Feature/ImplicitSubgroup/invoke_simd_struct.cpp -// CHECK-NEXT: InvokeSimd/Feature/invoke_simd_struct.cpp -// CHECK-NEXT: InvokeSimd/Spec/ImplicitSubgroup/tuple.cpp -// CHECK-NEXT: InvokeSimd/Spec/ImplicitSubgroup/tuple_return.cpp -// CHECK-NEXT: InvokeSimd/Spec/ImplicitSubgroup/tuple_vadd.cpp -// CHECK-NEXT: InvokeSimd/Spec/tuple.cpp -// CHECK-NEXT: InvokeSimd/Spec/tuple_return.cpp -// CHECK-NEXT: InvokeSimd/Spec/tuple_vadd.cpp // CHECK-NEXT: KernelAndProgram/kernel-bundle-merge-options.cpp // CHECK-NEXT: Matrix/SG32/joint_matrix_annotated_ptr.cpp // CHECK-NEXT: Matrix/SG32/joint_matrix_bfloat16_colmajorA_colmajorB.cpp @@ -108,28 +89,11 @@ // CHECK-NEXT: Printf/mixed-address-space.cpp // CHECK-NEXT: Printf/percent-symbol.cpp // CHECK-NEXT: Reduction/reduction_big_data.cpp -// CHECK-NEXT: Reduction/reduction_nd_N_queue_shortcut.cpp -// CHECK-NEXT: Reduction/reduction_nd_conditional.cpp -// CHECK-NEXT: Reduction/reduction_nd_dw.cpp -// CHECK-NEXT: Reduction/reduction_nd_ext_double.cpp -// CHECK-NEXT: Reduction/reduction_nd_ext_half.cpp -// CHECK-NEXT: Reduction/reduction_nd_queue_shortcut.cpp // CHECK-NEXT: Reduction/reduction_nd_reducer_skip.cpp -// CHECK-NEXT: Reduction/reduction_nd_rw.cpp -// CHECK-NEXT: Reduction/reduction_range_queue_shortcut.cpp -// CHECK-NEXT: Reduction/reduction_range_usm_dw.cpp // CHECK-NEXT: Reduction/reduction_reducer_op_eq.cpp -// CHECK-NEXT: Reduction/reduction_span_pack.cpp -// CHECK-NEXT: Reduction/reduction_usm.cpp -// CHECK-NEXT: Reduction/reduction_usm_dw.cpp // CHECK-NEXT: Regression/build_log.cpp // CHECK-NEXT: Regression/complex_global_object.cpp // CHECK-NEXT: Regression/context_is_destroyed_after_exception.cpp // CHECK-NEXT: Regression/kernel_bundle_ignore_sycl_external.cpp // CHECK-NEXT: Regression/multiple-targets.cpp // CHECK-NEXT: Regression/reduction_resource_leak_dw.cpp -// CHECK-NEXT: Scheduler/InOrderQueueDeps.cpp -// CHECK-NEXT: Scheduler/MemObjRemapping.cpp -// CHECK-NEXT: Scheduler/MultipleDevices.cpp -// CHECK-NEXT: Scheduler/ReleaseResourcesTest.cpp -// CHECK-NEXT: Tracing/buffer_printers.cpp From 357539d89045e77baa6e49aa8bb973dc33ffd22f Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 6 Dec 2024 11:19:55 -0500 Subject: [PATCH 11/22] Delete rogue file --- "\\" | 113 ----------------------------------------------------------- 1 file changed, 113 deletions(-) delete mode 100644 "\\" diff --git "a/\\" "b/\\" deleted file mode 100644 index 2a112ba4740d5..0000000000000 --- "a/\\" +++ /dev/null @@ -1,113 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// -// REQUIRES: aspect-usm_shared_allocations -#include - -#include - -#include -#include -#include - -using namespace sycl; - -void testSpanCapture() { - // This test creates spans that are backed by USM. - // ensures they can be captured by device lambda - // and that read and write operations function correctly - // across capture. - queue Q; - - constexpr long numReadTests = 2; - const range<1> NumberOfReadTestsRange(numReadTests); - buffer SpanRead(NumberOfReadTestsRange); - - // span from a vector - // We will create a vector, backed by a USM allocator. And a span from that. - using vec_alloc = usm_allocator; - // Create allocator for device associated with q - vec_alloc myAlloc(Q); - // Create std vector with the allocator - std::vector vecUSM(4, myAlloc); - std::iota(vecUSM.begin(), vecUSM.end(), 1); - sycl::span vecUSM_span{vecUSM}; - vecUSM_span[0] += 100; // 101 modify first value using span affordance. - - // span from USM memory - auto *usm_data = malloc_shared(4, Q); - sycl::span usm_span(usm_data, 4); - std::iota(usm_span.begin(), usm_span.end(), 1); - usm_span[0] += 100; // 101 modify first value using span affordance. - - event E = Q.submit([&](handler &cgh) { - auto can_read_from_span_acc = SpanRead.get_access(cgh); - cgh.single_task([=] { - // read from the spans. - can_read_from_span_acc[0] = vecUSM_span[0]; - can_read_from_span_acc[1] = usm_span[0]; - - // write to the spans - vecUSM_span[1] += 1000; - usm_span[1] += 1000; - }); - }); - E.wait(); - - // check out the read operations, should have gotten 101 from each - host_accessor can_read_from_span_acc(SpanRead, read_only); - for (int i = 0; i < numReadTests; i++) { - assert(can_read_from_span_acc[i] == 101 && - "read check should have gotten 100"); - } - - // were the spans successfully modified via write? - assert(vecUSM_span[1] == 1002 && - "vecUSM_span write check should have gotten 1001"); - assert(usm_span[1] == 1002 && "usm_span write check should have gotten 1001"); - - free(usm_data, Q); -} - -void set_all_span_values(sycl::span container, int v) { - for (auto &e : container) - e = v; -} - -void testSpanOnDevice() { - // this test creates a simple span on device, - // passes it to a function that operates on it - // and ensures it worked correctly - queue Q; - constexpr long numReadTests = 4; - const range<1> NumberOfReadTestsRange(numReadTests); - buffer SpanRead(NumberOfReadTestsRange); - - event E = Q.submit([&](handler &cgh) { - auto can_read_from_span_acc = SpanRead.get_access(cgh); - cgh.single_task([=] { - // create a span on device, pass it to function that modifies it - // read values back out. - int a[]{1, 2, 3, 4}; - sycl::span a_span{a}; - set_all_span_values(a_span, 10); - for (int i = 0; i < numReadTests; i++) - can_read_from_span_acc[i] = a_span[i]; - }); - }); - E.wait(); - - // check out the read operations, should have gotten 10 from each - host_accessor can_read_from_span_acc(SpanRead, read_only); - for (int i = 0; i < numReadTests; i++) { - assert(can_read_from_span_acc[i] == 10 && - "read check should have gotten 10"); - } -} - -int main() { - testSpanCapture(); - testSpanOnDevice(); - - return 0; -} From 999006d5fb7ba068296cad671abd8e2f32d2d86e Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Mon, 9 Dec 2024 08:49:16 -0800 Subject: [PATCH 12/22] Add tracking infor for XFAIL tests and remove hip_nvidia xfails --- sycl/test-e2e/Basic/aspects.cpp | 3 --- sycl/test-e2e/Basic/device_event.cpp | 4 ---- sycl/test-e2e/Basic/diagnostics/handler.cpp | 5 +---- sycl/test-e2e/Basic/partition_supported.cpp | 3 --- .../KernelAndProgram/kernel-bundle-merge-options.cpp | 1 - sycl/test-e2e/Printf/int.cpp | 2 -- sycl/test-e2e/Printf/mixed-address-space.cpp | 2 +- sycl/test-e2e/Printf/percent-symbol.cpp | 2 +- .../Regression/context_is_destroyed_after_exception.cpp | 2 -- sycl/test-e2e/Regression/multiple-targets.cpp | 3 --- 10 files changed, 3 insertions(+), 24 deletions(-) diff --git a/sycl/test-e2e/Basic/aspects.cpp b/sycl/test-e2e/Basic/aspects.cpp index 114f069eee40b..ea1bbec27762d 100644 --- a/sycl/test-e2e/Basic/aspects.cpp +++ b/sycl/test-e2e/Basic/aspects.cpp @@ -1,8 +1,5 @@ // RUN: %{build} -o %t.out -DSYCL_DISABLE_IMAGE_ASPECT_WARNING // RUN: %{run-unfiltered-devices} %t.out -// -// Hip is missing some of the parameters tested here so it fails with NVIDIA -// XFAIL: hip_nvidia //==--------------- aspects.cpp - SYCL device test ------------------------==// // diff --git a/sycl/test-e2e/Basic/device_event.cpp b/sycl/test-e2e/Basic/device_event.cpp index 25631fac20843..c306fa69c4538 100644 --- a/sycl/test-e2e/Basic/device_event.cpp +++ b/sycl/test-e2e/Basic/device_event.cpp @@ -1,9 +1,5 @@ // RUN: %{build} -o %t.run // RUN: %{run} %t.run -// -// Returns error "Barrier is not supported on the host device -// yet." with Nvidia. -// XFAIL: hip_nvidia //==--------device_event.cpp - SYCL class device_event test ----------------==// // diff --git a/sycl/test-e2e/Basic/diagnostics/handler.cpp b/sycl/test-e2e/Basic/diagnostics/handler.cpp index 70b5dbba1472c..a36a2c197f0a2 100644 --- a/sycl/test-e2e/Basic/diagnostics/handler.cpp +++ b/sycl/test-e2e/Basic/diagnostics/handler.cpp @@ -1,9 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out | FileCheck %s -// -// Appears to fail on HIP Nvidia because 'no device of requested type available' -// when constructing a queue with an exception_list. -// XFAIL: hip_nvidia + //==------------------- handler.cpp ----------------------------------------==// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/test-e2e/Basic/partition_supported.cpp b/sycl/test-e2e/Basic/partition_supported.cpp index 1d659e726c8ef..ef0cf813d216d 100644 --- a/sycl/test-e2e/Basic/partition_supported.cpp +++ b/sycl/test-e2e/Basic/partition_supported.cpp @@ -1,8 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// -// Nvidia should not allow sub_devices but does not throw corresponding error. -// XFAIL: hip_nvidia /* Check that: 1) if partition_equally is supported, then we check that the correct invalid errc is returned if more than max_compute_units are requested diff --git a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp index f8f84be97f6cb..2b16b5d8663e9 100644 --- a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp +++ b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp @@ -5,7 +5,6 @@ // Debug option -g is not passed to device code compiler when CL-style driver // is used and /DEBUG options is passed. -// XFAIL: cl_options #include "kernel-bundle-merge-options.hpp" diff --git a/sycl/test-e2e/Printf/int.cpp b/sycl/test-e2e/Printf/int.cpp index fccb679cb0297..479434b0ab234 100644 --- a/sycl/test-e2e/Printf/int.cpp +++ b/sycl/test-e2e/Printf/int.cpp @@ -6,8 +6,6 @@ // // UNSUPPORTED: hip_amd // FIXME: The 'short' type gets overflown with sporadic values on CUDA. -// XFAIL: cuda -// // RUN: %{build} -o %t.out // RUN: %{run} %t.out | FileCheck %s // FIXME: Remove dedicated constant address space testing once generic AS diff --git a/sycl/test-e2e/Printf/mixed-address-space.cpp b/sycl/test-e2e/Printf/mixed-address-space.cpp index 9f089847307cd..d79013007ca03 100644 --- a/sycl/test-e2e/Printf/mixed-address-space.cpp +++ b/sycl/test-e2e/Printf/mixed-address-space.cpp @@ -3,7 +3,7 @@ // // UNSUPPORTED: hip_amd // XFAIL: cuda && windows -// +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14733 // FIXME: Drop the test once generic AS support is considered stable and the // dedicated constant AS overload of printf is removed from the library. // diff --git a/sycl/test-e2e/Printf/percent-symbol.cpp b/sycl/test-e2e/Printf/percent-symbol.cpp index 30d65698a0a40..f08cd3e085d0d 100644 --- a/sycl/test-e2e/Printf/percent-symbol.cpp +++ b/sycl/test-e2e/Printf/percent-symbol.cpp @@ -6,7 +6,7 @@ // // UNSUPPORTED: hip_amd // XFAIL: cuda && windows -// +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14733 // RUN: %{build} -o %t.out // RUN: %{run} %t.out | FileCheck %s // FIXME: Remove dedicated constant address space testing once generic AS diff --git a/sycl/test-e2e/Regression/context_is_destroyed_after_exception.cpp b/sycl/test-e2e/Regression/context_is_destroyed_after_exception.cpp index 57e48df0c664a..813b3be226626 100644 --- a/sycl/test-e2e/Regression/context_is_destroyed_after_exception.cpp +++ b/sycl/test-e2e/Regression/context_is_destroyed_after_exception.cpp @@ -2,8 +2,6 @@ // RUN: %{build} -o %t.out // RUN: env SYCL_UR_TRACE=2 %{run} %t.out %if !windows %{2>&1 | FileCheck %s %} -// -// XFAIL: hip_nvidia #include diff --git a/sycl/test-e2e/Regression/multiple-targets.cpp b/sycl/test-e2e/Regression/multiple-targets.cpp index 8a8c893f45ff2..a2498c3301b99 100644 --- a/sycl/test-e2e/Regression/multiple-targets.cpp +++ b/sycl/test-e2e/Regression/multiple-targets.cpp @@ -15,9 +15,6 @@ // // RUN: %clangxx -fsycl -fsycl-targets=spir64,%{sycl_triple} -fsycl-device-code-split=per_kernel -o %t4.out %s // RUN: %{run} %t4.out -// -// XFAIL: hip_nvidia -// #include From e9df0a4d0cd9ac7dead4471217334eff3e5888f2 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Mon, 9 Dec 2024 13:14:13 -0500 Subject: [PATCH 13/22] Uncomment check commands to get failures --- .../KernelAndProgram/kernel-bundle-merge-options.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp index 2b16b5d8663e9..a10a0ec140e87 100644 --- a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp +++ b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp @@ -13,7 +13,7 @@ // TODO: Uncomment when build options are properly passed to compile and link // commands for kernel_bundle -// xCHECK: <--- urProgramCompile( -// xCHECK-SAME: -g -// xCHECK: <--- urProgramLink( -// xCHECK-SAME: -g +// CHECK: <--- urProgramCompile( +// CHECK-SAME: -g +// CHECK: <--- urProgramLink( +// CHECK-SAME: -g From 3be9b214da3e3f3254cb2ce1c6adfb40aab689ba Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Mon, 9 Dec 2024 14:16:02 -0500 Subject: [PATCH 14/22] Add XFAIL tracker issue to kernel-bundle-merge-options.cpp --- .../KernelAndProgram/kernel-bundle-merge-options.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp index a10a0ec140e87..ba09fa1ea16ea 100644 --- a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp +++ b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp @@ -5,6 +5,8 @@ // Debug option -g is not passed to device code compiler when CL-style driver // is used and /DEBUG options is passed. +// XFAIL: cl_options +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16309 #include "kernel-bundle-merge-options.hpp" @@ -13,7 +15,7 @@ // TODO: Uncomment when build options are properly passed to compile and link // commands for kernel_bundle -// CHECK: <--- urProgramCompile( -// CHECK-SAME: -g -// CHECK: <--- urProgramLink( -// CHECK-SAME: -g +// xCHECK: <--- urProgramCompile( +// xCHECK-SAME: -g +// xCHECK: <--- urProgramLink( +// xCHECK-SAME: -g From 58a3767a87377be658e4dd5da0aba7200e880770 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Mon, 9 Dec 2024 14:20:27 -0500 Subject: [PATCH 15/22] Update number of XFAILs without tracker --- sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp index 66257a935610e..3f90f8243a3fc 100644 --- a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp +++ b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp @@ -51,7 +51,7 @@ // tests to match the required format and in that case you should just update // (i.e. reduce) the number and the list below. // -// NUMBER-OF-XFAIL-WITHOUT-TRACKER: 41 +// NUMBER-OF-XFAIL-WITHOUT-TRACKER: 31 // // List of improperly XFAIL-ed tests. // Remove the CHECK once the test has been properly XFAIL-ed. From 48ed23ed1c310c0e2308ca9b09492592f46ebb2f Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Mon, 9 Dec 2024 14:45:03 -0500 Subject: [PATCH 16/22] Update no-xfail-without-tracker.cpp --- .../e2e_test_requirements/no-xfail-without-tracker.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp index 3f90f8243a3fc..c0355385350d5 100644 --- a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp +++ b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp @@ -57,16 +57,11 @@ // Remove the CHECK once the test has been properly XFAIL-ed. // // CHECK: AddressSanitizer/nullpointer/private_nullptr.cpp -// CHECK-NEXT: Basic/aspects.cpp -// CHECK-NEXT: Basic/device_event.cpp -// CHECK-NEXT: Basic/diagnostics/handler.cpp // CHECK-NEXT: Basic/max_linear_work_group_size_props.cpp // CHECK-NEXT: Basic/max_work_group_size_props.cpp -// CHECK-NEXT: Basic/partition_supported.cpp // CHECK-NEXT: DeviceLib/assert-windows.cpp // CHECK-NEXT: ESIMD/hardware_dispatch.cpp // CHECK-NEXT: InlineAsm/asm_multiple_instructions.cpp -// CHECK-NEXT: KernelAndProgram/kernel-bundle-merge-options.cpp // CHECK-NEXT: Matrix/SG32/joint_matrix_annotated_ptr.cpp // CHECK-NEXT: Matrix/SG32/joint_matrix_bfloat16_colmajorA_colmajorB.cpp // CHECK-NEXT: Matrix/SG32/joint_matrix_bfloat16_packedB.cpp @@ -85,15 +80,10 @@ // CHECK-NEXT: NewOffloadDriver/sycl-external-with-optional-features.cpp // CHECK-NEXT: OptionalKernelFeatures/throw-exception-for-out-of-registers-on-kernel-launch.cpp // CHECK-NEXT: PerformanceTests/Reduction/reduce_over_sub_group.cpp -// CHECK-NEXT: Printf/int.cpp -// CHECK-NEXT: Printf/mixed-address-space.cpp -// CHECK-NEXT: Printf/percent-symbol.cpp // CHECK-NEXT: Reduction/reduction_big_data.cpp // CHECK-NEXT: Reduction/reduction_nd_reducer_skip.cpp // CHECK-NEXT: Reduction/reduction_reducer_op_eq.cpp // CHECK-NEXT: Regression/build_log.cpp // CHECK-NEXT: Regression/complex_global_object.cpp -// CHECK-NEXT: Regression/context_is_destroyed_after_exception.cpp // CHECK-NEXT: Regression/kernel_bundle_ignore_sycl_external.cpp -// CHECK-NEXT: Regression/multiple-targets.cpp // CHECK-NEXT: Regression/reduction_resource_leak_dw.cpp From 78eefa4b80406f890f98828735e5359810994839 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Mon, 9 Dec 2024 16:38:28 -0500 Subject: [PATCH 17/22] Update int.cpp --- sycl/test-e2e/Printf/int.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/test-e2e/Printf/int.cpp b/sycl/test-e2e/Printf/int.cpp index 479434b0ab234..17b3e212c5988 100644 --- a/sycl/test-e2e/Printf/int.cpp +++ b/sycl/test-e2e/Printf/int.cpp @@ -6,6 +6,9 @@ // // UNSUPPORTED: hip_amd // FIXME: The 'short' type gets overflown with sporadic values on CUDA. +// XFAIL: cuda +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14734 + // RUN: %{build} -o %t.out // RUN: %{run} %t.out | FileCheck %s // FIXME: Remove dedicated constant address space testing once generic AS From a50cc22052a74344b3e8720863872c02978093d4 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 12 Dec 2024 07:01:07 -0800 Subject: [PATCH 18/22] Add XFAIL trackers for joint matrix tests --- sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp | 3 ++- .../Matrix/SG32/joint_matrix_bfloat16_colmajorA_colmajorB.cpp | 2 +- sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_packedB.cpp | 1 + sycl/test-e2e/Matrix/SG32/joint_matrix_colA_rowB_colC.cpp | 1 + .../Matrix/SG32/joint_matrix_int8_colmajorA_colmajorB.cpp | 1 + .../Matrix/SG32/joint_matrix_int8_rowmajorA_rowmajorB.cpp | 1 + sycl/test-e2e/Matrix/SG32/joint_matrix_out_bounds.cpp | 1 + sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp | 1 + sycl/test-e2e/Matrix/SG32/joint_matrix_rowmajorA_rowmajorB.cpp | 1 + sycl/test-e2e/Matrix/SG32/joint_matrix_unaligned_k.cpp | 1 + 10 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp index 21f101e000b52..96a6cb00dd062 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp @@ -13,7 +13,8 @@ // Currently row major B fails when annotated_ptr is used // XFAIL: gpu - +// XFAIL-TRACKER: GSD-4181 + #include "../common.hpp" #define SG_SZ 32 diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_colmajorA_colmajorB.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_colmajorA_colmajorB.cpp index aba19833ee581..57a41d55f8fee 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_colmajorA_colmajorB.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_colmajorA_colmajorB.cpp @@ -15,7 +15,7 @@ // then VNNI transform. This is currently only available on AMX // XFAIL: gpu - +// XFAIL-TRACKER: GSD-5768 #include "../common.hpp" #include #include diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_packedB.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_packedB.cpp index 36346ba4a9f1a..1944a331bfe3a 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_packedB.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_packedB.cpp @@ -15,6 +15,7 @@ // RUN: %{run} %t.out // XFAIL: gpu +// XFAIL-TRACKER: GSD-4181 #include "../common.hpp" diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_colA_rowB_colC.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_colA_rowB_colC.cpp index 0c3b778457036..4186ad1acc943 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_colA_rowB_colC.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_colA_rowB_colC.cpp @@ -12,6 +12,7 @@ // RUN: %{run} %t.out // XFAIL: run-mode +// XFAIL-TRACKER: GSD-5768 #include "../common.hpp" diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_int8_colmajorA_colmajorB.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_int8_colmajorA_colmajorB.cpp index 177e4d6d861fa..7db1b0a618823 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_int8_colmajorA_colmajorB.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_int8_colmajorA_colmajorB.cpp @@ -15,6 +15,7 @@ // then VNNI transform. This is currently only available on AMX // XFAIL: gpu +// XFAIL-TRACKER: GSD-5768 #include "../common.hpp" diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_int8_rowmajorA_rowmajorB.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_int8_rowmajorA_rowmajorB.cpp index d73a2300888d3..3068b6bcca684 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_int8_rowmajorA_rowmajorB.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_int8_rowmajorA_rowmajorB.cpp @@ -14,6 +14,7 @@ // RUN: %{run} %t.out // XFAIL: gpu +// XFAIL-TRACKER: GSD-4181 #include "../common.hpp" diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_out_bounds.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_out_bounds.cpp index 3464677d17398..97a6d17b4e1ee 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_out_bounds.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_out_bounds.cpp @@ -14,6 +14,7 @@ // RUN: %{run} %t.out // XFAIL:gpu +// XFAIL-TRACKER: GSD-4181 #include "../common.hpp" diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp index a6ce09fe13b9f..7e1520e95ff1c 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp @@ -11,6 +11,7 @@ // RUN: %{run} %t.out // XFAIL: gpu +// XFAIL-TRACKER: GSD-4181 // SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2 // UNSUPPORTED: gpu-intel-dg2 diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_rowmajorA_rowmajorB.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_rowmajorA_rowmajorB.cpp index 0fe45a24c499f..ca7310f6f5e15 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_rowmajorA_rowmajorB.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_rowmajorA_rowmajorB.cpp @@ -16,6 +16,7 @@ // Sub-group size 32 support for this test is not currently available in IGC // XFAIL: gpu +// XFAIL-TRACKER: GSD-4181 #include "../common.hpp" diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_unaligned_k.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_unaligned_k.cpp index ceb196e07b9ca..342648810fea4 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_unaligned_k.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_unaligned_k.cpp @@ -14,6 +14,7 @@ // RUN: %{run} %t.out // XFAIL:gpu +// XFAIL-TRACKER: GSD-4181 #include "../common.hpp" From 7cbfaca72e032279c6e05027bed40277f3218a16 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 12 Dec 2024 07:11:00 -0800 Subject: [PATCH 19/22] Update count of improperly XFAIL-ed tests --- .../joint_matrix_bfloat16_colmajorA_colmajorB.cpp | 1 + .../Matrix/joint_matrix_colA_rowB_colC.cpp | 1 + .../joint_matrix_int8_colmajorA_colmajorB.cpp | 1 + .../no-xfail-without-tracker.cpp | 15 +-------------- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB.cpp b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB.cpp index a9326e2ba312b..82bedf7043e9d 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB.cpp @@ -14,6 +14,7 @@ // then VNNI transform. This is currently only available on AMX // XFAIL: gpu +// XFAIL-TRACKER: GSD-5768 #include "common.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_colA_rowB_colC.cpp b/sycl/test-e2e/Matrix/joint_matrix_colA_rowB_colC.cpp index 338564207c4ac..c62175a8af439 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_colA_rowB_colC.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_colA_rowB_colC.cpp @@ -11,6 +11,7 @@ // RUN: %{run} %t.out // XFAIL: run-mode +// XFAIL-TRACKER: GSD-5768 #include "common.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_int8_colmajorA_colmajorB.cpp b/sycl/test-e2e/Matrix/joint_matrix_int8_colmajorA_colmajorB.cpp index f8dc8452668a1..aa4bac536e357 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_int8_colmajorA_colmajorB.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_int8_colmajorA_colmajorB.cpp @@ -14,6 +14,7 @@ // then VNNI transform. This is currently only available on AMX // XFAIL: gpu +// XFAIL-TRACKER: GSD-5768 #include "common.hpp" diff --git a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp index c0355385350d5..4134c192bebb0 100644 --- a/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp +++ b/sycl/test/e2e_test_requirements/no-xfail-without-tracker.cpp @@ -51,7 +51,7 @@ // tests to match the required format and in that case you should just update // (i.e. reduce) the number and the list below. // -// NUMBER-OF-XFAIL-WITHOUT-TRACKER: 31 +// NUMBER-OF-XFAIL-WITHOUT-TRACKER: 18 // // List of improperly XFAIL-ed tests. // Remove the CHECK once the test has been properly XFAIL-ed. @@ -62,19 +62,6 @@ // CHECK-NEXT: DeviceLib/assert-windows.cpp // CHECK-NEXT: ESIMD/hardware_dispatch.cpp // CHECK-NEXT: InlineAsm/asm_multiple_instructions.cpp -// CHECK-NEXT: Matrix/SG32/joint_matrix_annotated_ptr.cpp -// CHECK-NEXT: Matrix/SG32/joint_matrix_bfloat16_colmajorA_colmajorB.cpp -// CHECK-NEXT: Matrix/SG32/joint_matrix_bfloat16_packedB.cpp -// CHECK-NEXT: Matrix/SG32/joint_matrix_colA_rowB_colC.cpp -// CHECK-NEXT: Matrix/SG32/joint_matrix_int8_colmajorA_colmajorB.cpp -// CHECK-NEXT: Matrix/SG32/joint_matrix_int8_rowmajorA_rowmajorB.cpp -// CHECK-NEXT: Matrix/SG32/joint_matrix_out_bounds.cpp -// CHECK-NEXT: Matrix/SG32/joint_matrix_prefetch.cpp -// CHECK-NEXT: Matrix/SG32/joint_matrix_rowmajorA_rowmajorB.cpp -// CHECK-NEXT: Matrix/SG32/joint_matrix_unaligned_k.cpp -// CHECK-NEXT: Matrix/joint_matrix_bfloat16_colmajorA_colmajorB.cpp -// CHECK-NEXT: Matrix/joint_matrix_colA_rowB_colC.cpp -// CHECK-NEXT: Matrix/joint_matrix_int8_colmajorA_colmajorB.cpp // CHECK-NEXT: NewOffloadDriver/multisource.cpp // CHECK-NEXT: NewOffloadDriver/split-per-source-main.cpp // CHECK-NEXT: NewOffloadDriver/sycl-external-with-optional-features.cpp From 03e9e85fc6d46c30d569c4458fff9dc6d19f152a Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 12 Dec 2024 07:21:58 -0800 Subject: [PATCH 20/22] Update count of improperly XFAIL-ed tests --- sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp index 96a6cb00dd062..13656b8e3f4ec 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp @@ -14,7 +14,7 @@ // Currently row major B fails when annotated_ptr is used // XFAIL: gpu // XFAIL-TRACKER: GSD-4181 - + #include "../common.hpp" #define SG_SZ 32 From e33db4cb7037ce831b3fa3923f970e517779e5a0 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Tue, 17 Dec 2024 10:31:22 -0800 Subject: [PATCH 21/22] Remove XFAILs from remainign E2E tests --- sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp | 1 - sycl/test-e2e/Basic/max_work_group_size_props.cpp | 1 - sycl/test-e2e/DeviceLib/assert-windows.cpp | 1 - sycl/test-e2e/ESIMD/hardware_dispatch.cpp | 1 - sycl/test-e2e/InlineAsm/asm_multiple_instructions.cpp | 1 - sycl/test-e2e/NewOffloadDriver/multisource.cpp | 1 - sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp | 1 - .../NewOffloadDriver/sycl-external-with-optional-features.cpp | 1 - .../throw-exception-for-out-of-registers-on-kernel-launch.cpp | 1 - .../PerformanceTests/Reduction/reduce_over_sub_group.cpp | 1 - sycl/test-e2e/Reduction/reduction_big_data.cpp | 3 --- sycl/test-e2e/Reduction/reduction_nd_reducer_skip.cpp | 2 -- sycl/test-e2e/Reduction/reduction_reducer_op_eq.cpp | 2 -- sycl/test-e2e/Regression/build_log.cpp | 2 -- sycl/test-e2e/Regression/complex_global_object.cpp | 1 - .../test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp | 1 - sycl/test-e2e/Regression/reduction_resource_leak_dw.cpp | 2 -- 17 files changed, 23 deletions(-) diff --git a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp index 7009ca367d8e9..a762f009a1d72 100644 --- a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp +++ b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp @@ -2,7 +2,6 @@ // RUN: %{run} %t.out // This property is not yet supported by all UR adapters -// XFAIL: level_zero, opencl, hip #include diff --git a/sycl/test-e2e/Basic/max_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_work_group_size_props.cpp index 96439971d904a..8f1e1825f38b2 100644 --- a/sycl/test-e2e/Basic/max_work_group_size_props.cpp +++ b/sycl/test-e2e/Basic/max_work_group_size_props.cpp @@ -2,7 +2,6 @@ // RUN: %{run} %t.out // This property is not yet supported by all UR adapters -// XFAIL: level_zero, opencl, hip #include diff --git a/sycl/test-e2e/DeviceLib/assert-windows.cpp b/sycl/test-e2e/DeviceLib/assert-windows.cpp index c138bda944b69..77f81b80eea9b 100644 --- a/sycl/test-e2e/DeviceLib/assert-windows.cpp +++ b/sycl/test-e2e/DeviceLib/assert-windows.cpp @@ -2,7 +2,6 @@ // // FIXME: OpenCL CPU backend compiler crashes on a call to _wassert. // Disable the test until the fix reaches SYCL test infrastructure. -// XFAIL: * // // RUN: %{build} -o %t.out // diff --git a/sycl/test-e2e/ESIMD/hardware_dispatch.cpp b/sycl/test-e2e/ESIMD/hardware_dispatch.cpp index 4166d5dfbd44e..546fdac46425a 100644 --- a/sycl/test-e2e/ESIMD/hardware_dispatch.cpp +++ b/sycl/test-e2e/ESIMD/hardware_dispatch.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// XFAIL: igc-dev // REQUIRES: ocloc && arch-intel_gpu_tgllp // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_tgllp %s -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/InlineAsm/asm_multiple_instructions.cpp b/sycl/test-e2e/InlineAsm/asm_multiple_instructions.cpp index 1e0140611389f..4e8c64eb4597a 100644 --- a/sycl/test-e2e/InlineAsm/asm_multiple_instructions.cpp +++ b/sycl/test-e2e/InlineAsm/asm_multiple_instructions.cpp @@ -6,7 +6,6 @@ // RUN: %{run} %t.out // The test is failing when writing directly to output buffer. // If temporary variable is used (see TO_PASS mode) the test succeeded. -// XFAIL: gpu && run-mode #include "include/asmhelper.h" #include #include diff --git a/sycl/test-e2e/NewOffloadDriver/multisource.cpp b/sycl/test-e2e/NewOffloadDriver/multisource.cpp index 73c7ab87f3222..21bb4a9ea9cfe 100644 --- a/sycl/test-e2e/NewOffloadDriver/multisource.cpp +++ b/sycl/test-e2e/NewOffloadDriver/multisource.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// XFAIL: cuda // Separate kernel sources and host code sources // Test with `--offload-new-driver` // RUN: %{build} --offload-new-driver -c -o %t.kernel.o -DINIT_KERNEL -DCALC_KERNEL diff --git a/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp b/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp index f3e1108becba1..2b527996f2af1 100644 --- a/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp +++ b/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp @@ -2,7 +2,6 @@ // RUN: --offload-new-driver -fsycl-dead-args-optimization // RUN: %{run} %t.out // -// XFAIL: hip_nvidia, cuda #include "Inputs/split-per-source.h" diff --git a/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp b/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp index c8d86d84747a9..d1e8ef20a9a70 100644 --- a/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp +++ b/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp @@ -3,7 +3,6 @@ // RUN: %{build} -DSOURCE2 --offload-new-driver -c -o %t2.o // RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t1.o %t2.o -o %t.exe // RUN: %{run} %t.exe -// XFAIL: cuda #ifdef SOURCE1 #include diff --git a/sycl/test-e2e/OptionalKernelFeatures/throw-exception-for-out-of-registers-on-kernel-launch.cpp b/sycl/test-e2e/OptionalKernelFeatures/throw-exception-for-out-of-registers-on-kernel-launch.cpp index a701b7960c232..bfe01871fb70c 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/throw-exception-for-out-of-registers-on-kernel-launch.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/throw-exception-for-out-of-registers-on-kernel-launch.cpp @@ -1,7 +1,6 @@ // REQUIRES: cuda // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// XFAIL: * #include #include diff --git a/sycl/test-e2e/PerformanceTests/Reduction/reduce_over_sub_group.cpp b/sycl/test-e2e/PerformanceTests/Reduction/reduce_over_sub_group.cpp index c28a3406b33c0..ea09a668d9997 100644 --- a/sycl/test-e2e/PerformanceTests/Reduction/reduce_over_sub_group.cpp +++ b/sycl/test-e2e/PerformanceTests/Reduction/reduce_over_sub_group.cpp @@ -1,4 +1,3 @@ -// XFAIL: native_cpu // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Reduction/reduction_big_data.cpp b/sycl/test-e2e/Reduction/reduction_big_data.cpp index 40e4e4f3e7ff7..c376fa3f31a34 100644 --- a/sycl/test-e2e/Reduction/reduction_big_data.cpp +++ b/sycl/test-e2e/Reduction/reduction_big_data.cpp @@ -1,9 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out // -// Group algorithms are not supported on Nvidia. -// XFAIL: hip_nvidia -// // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_nd_reducer_skip.cpp b/sycl/test-e2e/Reduction/reduction_nd_reducer_skip.cpp index ab2a583ce2f4c..cb345c1dd85fe 100644 --- a/sycl/test-e2e/Reduction/reduction_nd_reducer_skip.cpp +++ b/sycl/test-e2e/Reduction/reduction_nd_reducer_skip.cpp @@ -1,8 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out // -// Group algorithms are not supported on Nvidia. -// XFAIL: hip_nvidia // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Reduction/reduction_reducer_op_eq.cpp b/sycl/test-e2e/Reduction/reduction_reducer_op_eq.cpp index 02419e04a4240..1fa43c91ef553 100644 --- a/sycl/test-e2e/Reduction/reduction_reducer_op_eq.cpp +++ b/sycl/test-e2e/Reduction/reduction_reducer_op_eq.cpp @@ -1,8 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out // -// On nvidia a reduction appears to be unexpectedly executed via the host. -// XFAIL: hip_nvidia // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Regression/build_log.cpp b/sycl/test-e2e/Regression/build_log.cpp index d3dd3971122c5..bae74102a583e 100644 --- a/sycl/test-e2e/Regression/build_log.cpp +++ b/sycl/test-e2e/Regression/build_log.cpp @@ -5,8 +5,6 @@ // RUN: FileCheck %s --check-prefix=CHECK-EXPECTED-ERROR --input-file %t.out // CHECK-EXPECTED-ERROR: error: backend compiler failed build -// XFAIL: * - #include #include diff --git a/sycl/test-e2e/Regression/complex_global_object.cpp b/sycl/test-e2e/Regression/complex_global_object.cpp index 06a7187a46926..132a48cc5cdbf 100644 --- a/sycl/test-e2e/Regression/complex_global_object.cpp +++ b/sycl/test-e2e/Regression/complex_global_object.cpp @@ -1,6 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// XFAIL: gpu && linux // SYCL runtime may construct global objects at function scope. The test ensures // such objects do not cause problems if the first call to SYCL is inside main diff --git a/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp b/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp index 765cbbad75327..413dc4ab9827f 100644 --- a/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp +++ b/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp @@ -1,7 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out // -// XFAIL: cuda // UNSUPPORTED: hip // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Regression/reduction_resource_leak_dw.cpp b/sycl/test-e2e/Regression/reduction_resource_leak_dw.cpp index ac4cf20ad00f8..81eb463f2d19c 100644 --- a/sycl/test-e2e/Regression/reduction_resource_leak_dw.cpp +++ b/sycl/test-e2e/Regression/reduction_resource_leak_dw.cpp @@ -1,6 +1,4 @@ // REQUIRES: level_zero, level_zero_dev_kit -// XFAIL: windows -// // RUN: %{build} %level_zero_options -o %t.out // RUN: %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s // From f5816a4eafe6224daccbbae573ba1d1cdd4e57bb Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Tue, 17 Dec 2024 10:31:52 -0800 Subject: [PATCH 22/22] Remove XFAILs from remainign E2E tests --- sycl/test-e2e/AddressSanitizer/nullpointer/private_nullptr.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/test-e2e/AddressSanitizer/nullpointer/private_nullptr.cpp b/sycl/test-e2e/AddressSanitizer/nullpointer/private_nullptr.cpp index 27b4b5a6fab17..f33ed512dcd97 100644 --- a/sycl/test-e2e/AddressSanitizer/nullpointer/private_nullptr.cpp +++ b/sycl/test-e2e/AddressSanitizer/nullpointer/private_nullptr.cpp @@ -7,7 +7,6 @@ // RUN: %{run} not %t3.out 2>&1 | FileCheck %s // FIXME: There's an issue in gfx driver, so this test pending here. -// XFAIL: * #include #include