diff --git a/src/sycl2020-acc/SYCLStream2020.cpp b/src/sycl2020-acc/SYCLStream2020.cpp index 0de24bbb..73ba104f 100644 --- a/src/sycl2020-acc/SYCLStream2020.cpp +++ b/src/sycl2020-acc/SYCLStream2020.cpp @@ -9,6 +9,9 @@ #include +#include "sycl_ext_enqueue_functions.h" +namespace syclex = sycl::ext::oneapi::experimental; + // Cache list of devices bool cached = false; std::vector devices; @@ -75,11 +78,11 @@ SYCLStream::SYCLStream(const size_t ARRAY_SIZE, const int device_index) template void SYCLStream::copy() { - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { sycl::accessor ka {d_a, cgh, sycl::read_only}; sycl::accessor kc {d_c, cgh, sycl::write_only}; - cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx) { kc[idx] = ka[idx]; }); @@ -91,11 +94,11 @@ template void SYCLStream::mul() { const T scalar = startScalar; - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { sycl::accessor kb {d_b, cgh, sycl::write_only}; sycl::accessor kc {d_c, cgh, sycl::read_only}; - cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx) { kb[idx] = scalar * kc[idx]; }); @@ -106,12 +109,12 @@ void SYCLStream::mul() template void SYCLStream::add() { - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { sycl::accessor ka {d_a, cgh, sycl::read_only}; sycl::accessor kb {d_b, cgh, sycl::read_only}; sycl::accessor kc {d_c, cgh, sycl::write_only}; - cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx) { kc[idx] = ka[idx] + kb[idx]; }); @@ -123,12 +126,12 @@ template void SYCLStream::triad() { const T scalar = startScalar; - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { sycl::accessor ka {d_a, cgh, sycl::write_only}; sycl::accessor kb {d_b, cgh, sycl::read_only}; sycl::accessor kc {d_c, cgh, sycl::read_only}; - cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx) { ka[idx] = kb[idx] + scalar * kc[idx]; }); @@ -141,12 +144,12 @@ void SYCLStream::nstream() { const T scalar = startScalar; - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { sycl::accessor ka {d_a, cgh}; sycl::accessor kb {d_b, cgh, sycl::read_only}; sycl::accessor kc {d_c, cgh, sycl::read_only}; - cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx) { ka[idx] += kb[idx] + scalar * kc[idx]; }); @@ -158,24 +161,23 @@ template T SYCLStream::dot() { - queue->submit([&](sycl::handler &cgh) - { + syclex::submit(*queue, [&](sycl::handler &cgh) { sycl::accessor ka {d_a, cgh, sycl::read_only}; sycl::accessor kb {d_b, cgh, sycl::read_only}; - cgh.parallel_for(sycl::range<1>{array_size}, + syclex::parallel_for( + cgh, sycl::range<1>{array_size}, + [=](sycl::id<1> idx, auto &sum) { + sum += ka[idx] * kb[idx]; + }, // Reduction object, to perform summation - initialises the result to zero // hipSYCL doesn't sypport the initialize_to_identity property yet #if defined(__HIPSYCL__) || defined(__OPENSYCL__) - sycl::reduction(d_sum. template get_access(cgh), sycl::plus()), + sycl::reduction(d_sum.template get_access(cgh), sycl::plus()) #else - sycl::reduction(d_sum, cgh, sycl::plus(), sycl::property::reduction::initialize_to_identity{}), + sycl::reduction(d_sum, cgh, sycl::plus(), sycl::property::reduction::initialize_to_identity{}) #endif - [=](sycl::id<1> idx, auto& sum) - { - sum += ka[idx] * kb[idx]; - }); - + ); }); // Get access on the host, and return a copy of the data (single number) @@ -188,13 +190,13 @@ T SYCLStream::dot() template void SYCLStream::init_arrays(T initA, T initB, T initC) { - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { sycl::accessor ka {d_a, cgh, sycl::write_only, sycl::no_init}; sycl::accessor kb {d_b, cgh, sycl::write_only, sycl::no_init}; sycl::accessor kc {d_c, cgh, sycl::write_only, sycl::no_init}; - cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx) { ka[idx] = initA; kb[idx] = initB; diff --git a/src/sycl2020-usm/SYCLStream2020.cpp b/src/sycl2020-usm/SYCLStream2020.cpp index 21a8a47b..a6f7355c 100644 --- a/src/sycl2020-usm/SYCLStream2020.cpp +++ b/src/sycl2020-usm/SYCLStream2020.cpp @@ -9,6 +9,9 @@ #include +#include "sycl_ext_enqueue_functions.h" +namespace syclex = sycl::ext::oneapi::experimental; + // Cache list of devices bool cached = false; std::vector devices; @@ -83,9 +86,9 @@ SYCLStream::~SYCLStream() { template void SYCLStream::copy() { - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { - cgh.parallel_for(sycl::range<1>{array_size}, [=, c = this->c, a = this->a](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, c = this->c, a = this->a](sycl::id<1> idx) { c[idx] = a[idx]; }); @@ -97,9 +100,9 @@ template void SYCLStream::mul() { const T scalar = startScalar; - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { - cgh.parallel_for(sycl::range<1>{array_size}, [=, b = this->b, c = this->c](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, b = this->b, c = this->c](sycl::id<1> idx) { b[idx] = scalar * c[idx]; }); @@ -110,9 +113,9 @@ void SYCLStream::mul() template void SYCLStream::add() { - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { - cgh.parallel_for(sycl::range<1>{array_size}, [=, c = this->c, a = this->a, b = this->b](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, c = this->c, a = this->a, b = this->b](sycl::id<1> idx) { c[idx] = a[idx] + b[idx]; }); @@ -124,9 +127,9 @@ template void SYCLStream::triad() { const T scalar = startScalar; - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { - cgh.parallel_for(sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx) { a[idx] = b[idx] + scalar * c[idx]; }); @@ -139,9 +142,9 @@ void SYCLStream::nstream() { const T scalar = startScalar; - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { - cgh.parallel_for(sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx) { a[idx] += b[idx] + scalar * c[idx]; }); @@ -152,20 +155,21 @@ void SYCLStream::nstream() template T SYCLStream::dot() { - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { - cgh.parallel_for(sycl::range<1>{array_size}, + syclex::parallel_for(cgh, sycl::range<1>{array_size}, + [a = this->a, b = this->b](sycl::id<1> idx, auto& sum) + { + sum += a[idx] * b[idx]; + }, // Reduction object, to perform summation - initialises the result to zero // hipSYCL doesn't sypport the initialize_to_identity property yet #if defined(__HIPSYCL__) || defined(__OPENSYCL__) - sycl::reduction(sum, sycl::plus()), + sycl::reduction(sum, sycl::plus()) #else - sycl::reduction(sum, sycl::plus(), sycl::property::reduction::initialize_to_identity{}), + sycl::reduction(sum, sycl::plus(), sycl::property::reduction::initialize_to_identity{}) #endif - [a = this->a, b = this->b](sycl::id<1> idx, auto& sum) - { - sum += a[idx] * b[idx]; - }); + ); }); queue->wait(); @@ -175,9 +179,9 @@ T SYCLStream::dot() template void SYCLStream::init_arrays(T initA, T initB, T initC) { - queue->submit([&](sycl::handler &cgh) + syclex::submit(*queue, [&](sycl::handler &cgh) { - cgh.parallel_for(sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx) + syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx) { a[idx] = initA; b[idx] = initB; diff --git a/src/sycl_ext_enqueue_functions.h b/src/sycl_ext_enqueue_functions.h new file mode 100644 index 00000000..6afe3b96 --- /dev/null +++ b/src/sycl_ext_enqueue_functions.h @@ -0,0 +1,66 @@ +#pragma once + +#include "CL/sycl.hpp" + +// Shim for https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_enqueue_functions.asciidoc +// Tested against 2023.2.0 +namespace sycl::ext::oneapi::experimental { + + + template + class launch_config { + Range r; + Properties p; + public: + launch_config(Range r, Properties p = {}) : r(r), p(p) {} + }; + + template + void submit(sycl::queue q, CommandGroupFunc &&cgf) { + q.submit(std::forward(cgf)); + } + + template + sycl::event submit_with_event(sycl::queue q, CommandGroupFunc &&cgf) { + return q.submit(std::forward(cgf)); + } + + + template + void single_task(sycl::queue &q, const KernelType &k) { + q.single_task(k); + } + + template + void single_task(sycl::handler &h, const KernelType &k) { + h.single_task(k); + } + + template + void parallel_for(sycl::queue &q, sycl::range r, + const KernelType &k, Reductions &&... reductions) { + q.parallel_for(r, std::forward(reductions)..., k); + } + + template + void parallel_for(sycl::handler &h, sycl::range r, + const KernelType &k, Reductions &&... reductions) { + h.parallel_for(r, std::forward(reductions)..., k); + } + + template + void parallel_for(sycl::queue &q, sycl::range r, + const sycl::kernel &k, Args &&... args) { + q.parallel_for(r, std::forward(args)..., k); + } + + template + void parallel_for(sycl::handler &h, sycl::range r, + const sycl::kernel &k, Args &&... args) { + h.parallel_for(r, std::forward(args)..., k); + } + + +} \ No newline at end of file