Skip to content

Commit

Permalink
Add implementation of sycl_ext_oneapi_enqueue_functions
Browse files Browse the repository at this point in the history
  • Loading branch information
tom91136 committed Dec 7, 2023
1 parent 15fb08b commit 2683153
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 42 deletions.
46 changes: 24 additions & 22 deletions src/sycl2020-acc/SYCLStream2020.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

#include <iostream>

#include "sycl_ext_enqueue_functions.h"
namespace syclex = sycl::ext::oneapi::experimental;

// Cache list of devices
bool cached = false;
std::vector<sycl::device> devices;
Expand Down Expand Up @@ -75,11 +78,11 @@ SYCLStream<T>::SYCLStream(const size_t ARRAY_SIZE, const int device_index)
template <class T>
void SYCLStream<T>::copy()
{
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
sycl::accessor ka {d_a, cgh, sycl::read_only};
sycl::accessor kc {d_c, cgh, sycl::write_only};
cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx)
{
kc[idx] = ka[idx];
});
Expand All @@ -91,11 +94,11 @@ template <class T>
void SYCLStream<T>::mul()
{
const T scalar = startScalar;
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
sycl::accessor kb {d_b, cgh, sycl::write_only};
sycl::accessor kc {d_c, cgh, sycl::read_only};
cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx)
{
kb[idx] = scalar * kc[idx];
});
Expand All @@ -106,12 +109,12 @@ void SYCLStream<T>::mul()
template <class T>
void SYCLStream<T>::add()
{
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
sycl::accessor ka {d_a, cgh, sycl::read_only};
sycl::accessor kb {d_b, cgh, sycl::read_only};
sycl::accessor kc {d_c, cgh, sycl::write_only};
cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx)
{
kc[idx] = ka[idx] + kb[idx];
});
Expand All @@ -123,12 +126,12 @@ template <class T>
void SYCLStream<T>::triad()
{
const T scalar = startScalar;
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
sycl::accessor ka {d_a, cgh, sycl::write_only};
sycl::accessor kb {d_b, cgh, sycl::read_only};
sycl::accessor kc {d_c, cgh, sycl::read_only};
cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx)
{
ka[idx] = kb[idx] + scalar * kc[idx];
});
Expand All @@ -141,12 +144,12 @@ void SYCLStream<T>::nstream()
{
const T scalar = startScalar;

queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
sycl::accessor ka {d_a, cgh};
sycl::accessor kb {d_b, cgh, sycl::read_only};
sycl::accessor kc {d_c, cgh, sycl::read_only};
cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx)
{
ka[idx] += kb[idx] + scalar * kc[idx];
});
Expand All @@ -158,24 +161,23 @@ template <class T>
T SYCLStream<T>::dot()
{

queue->submit([&](sycl::handler &cgh)
{
syclex::submit(*queue, [&](sycl::handler &cgh) {
sycl::accessor ka {d_a, cgh, sycl::read_only};
sycl::accessor kb {d_b, cgh, sycl::read_only};

cgh.parallel_for(sycl::range<1>{array_size},
syclex::parallel_for(
cgh, sycl::range<1>{array_size},
[=](sycl::id<1> idx, auto &sum) {
sum += ka[idx] * kb[idx];
},
// Reduction object, to perform summation - initialises the result to zero
// hipSYCL doesn't sypport the initialize_to_identity property yet
#if defined(__HIPSYCL__) || defined(__OPENSYCL__)
sycl::reduction(d_sum. template get_access<sycl::access_mode::read_write>(cgh), sycl::plus<T>()),
sycl::reduction(d_sum.template get_access<sycl::access_mode::read_write>(cgh), sycl::plus<T>())
#else
sycl::reduction(d_sum, cgh, sycl::plus<T>(), sycl::property::reduction::initialize_to_identity{}),
sycl::reduction(d_sum, cgh, sycl::plus<T>(), sycl::property::reduction::initialize_to_identity{})
#endif
[=](sycl::id<1> idx, auto& sum)
{
sum += ka[idx] * kb[idx];
});

);
});

// Get access on the host, and return a copy of the data (single number)
Expand All @@ -188,13 +190,13 @@ T SYCLStream<T>::dot()
template <class T>
void SYCLStream<T>::init_arrays(T initA, T initB, T initC)
{
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
sycl::accessor ka {d_a, cgh, sycl::write_only, sycl::no_init};
sycl::accessor kb {d_b, cgh, sycl::write_only, sycl::no_init};
sycl::accessor kc {d_c, cgh, sycl::write_only, sycl::no_init};

cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=](sycl::id<1> idx)
{
ka[idx] = initA;
kb[idx] = initB;
Expand Down
44 changes: 24 additions & 20 deletions src/sycl2020-usm/SYCLStream2020.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

#include <iostream>

#include "sycl_ext_enqueue_functions.h"
namespace syclex = sycl::ext::oneapi::experimental;

// Cache list of devices
bool cached = false;
std::vector<sycl::device> devices;
Expand Down Expand Up @@ -83,9 +86,9 @@ SYCLStream<T>::~SYCLStream() {
template <class T>
void SYCLStream<T>::copy()
{
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
cgh.parallel_for(sycl::range<1>{array_size}, [=, c = this->c, a = this->a](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, c = this->c, a = this->a](sycl::id<1> idx)
{
c[idx] = a[idx];
});
Expand All @@ -97,9 +100,9 @@ template <class T>
void SYCLStream<T>::mul()
{
const T scalar = startScalar;
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
cgh.parallel_for(sycl::range<1>{array_size}, [=, b = this->b, c = this->c](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, b = this->b, c = this->c](sycl::id<1> idx)
{
b[idx] = scalar * c[idx];
});
Expand All @@ -110,9 +113,9 @@ void SYCLStream<T>::mul()
template <class T>
void SYCLStream<T>::add()
{
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
cgh.parallel_for(sycl::range<1>{array_size}, [=, c = this->c, a = this->a, b = this->b](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, c = this->c, a = this->a, b = this->b](sycl::id<1> idx)
{
c[idx] = a[idx] + b[idx];
});
Expand All @@ -124,9 +127,9 @@ template <class T>
void SYCLStream<T>::triad()
{
const T scalar = startScalar;
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
cgh.parallel_for(sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx)
{
a[idx] = b[idx] + scalar * c[idx];
});
Expand All @@ -139,9 +142,9 @@ void SYCLStream<T>::nstream()
{
const T scalar = startScalar;

queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
cgh.parallel_for(sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx)
{
a[idx] += b[idx] + scalar * c[idx];
});
Expand All @@ -152,20 +155,21 @@ void SYCLStream<T>::nstream()
template <class T>
T SYCLStream<T>::dot()
{
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
cgh.parallel_for(sycl::range<1>{array_size},
syclex::parallel_for(cgh, sycl::range<1>{array_size},
[a = this->a, b = this->b](sycl::id<1> idx, auto& sum)
{
sum += a[idx] * b[idx];
},
// Reduction object, to perform summation - initialises the result to zero
// hipSYCL doesn't sypport the initialize_to_identity property yet
#if defined(__HIPSYCL__) || defined(__OPENSYCL__)
sycl::reduction(sum, sycl::plus<T>()),
sycl::reduction(sum, sycl::plus<T>())
#else
sycl::reduction(sum, sycl::plus<T>(), sycl::property::reduction::initialize_to_identity{}),
sycl::reduction(sum, sycl::plus<T>(), sycl::property::reduction::initialize_to_identity{})
#endif
[a = this->a, b = this->b](sycl::id<1> idx, auto& sum)
{
sum += a[idx] * b[idx];
});
);

});
queue->wait();
Expand All @@ -175,9 +179,9 @@ T SYCLStream<T>::dot()
template <class T>
void SYCLStream<T>::init_arrays(T initA, T initB, T initC)
{
queue->submit([&](sycl::handler &cgh)
syclex::submit(*queue, [&](sycl::handler &cgh)
{
cgh.parallel_for(sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx)
syclex::parallel_for(cgh, sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx)
{
a[idx] = initA;
b[idx] = initB;
Expand Down
66 changes: 66 additions & 0 deletions src/sycl_ext_enqueue_functions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#pragma once

#include "CL/sycl.hpp"

// Shim for https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_enqueue_functions.asciidoc
// Tested against 2023.2.0
namespace sycl::ext::oneapi::experimental {


template<typename Range, typename Properties = detail::empty_properties_t>
class launch_config {
Range r;
Properties p;
public:
launch_config(Range r, Properties p = {}) : r(r), p(p) {}
};

template<typename CommandGroupFunc>
void submit(sycl::queue q, CommandGroupFunc &&cgf) {
q.submit(std::forward<CommandGroupFunc>(cgf));
}

template<typename CommandGroupFunc>
sycl::event submit_with_event(sycl::queue q, CommandGroupFunc &&cgf) {
return q.submit(std::forward<CommandGroupFunc>(cgf));
}


template<typename KernelName = ::sycl::detail::auto_name, typename KernelType>
void single_task(sycl::queue &q, const KernelType &k) {
q.single_task<KernelName>(k);
}

template<typename KernelName = ::sycl::detail::auto_name, typename KernelType>
void single_task(sycl::handler &h, const KernelType &k) {
h.single_task<KernelName>(k);
}

template<typename KernelName = ::sycl::detail::auto_name, int Dimensions,
typename KernelType, typename... Reductions>
void parallel_for(sycl::queue &q, sycl::range<Dimensions> r,
const KernelType &k, Reductions &&... reductions) {
q.parallel_for<KernelName>(r, std::forward<Reductions>(reductions)..., k);
}

template<typename KernelName = ::sycl::detail::auto_name, int Dimensions,
typename KernelType, typename... Reductions>
void parallel_for(sycl::handler &h, sycl::range<Dimensions> r,
const KernelType &k, Reductions &&... reductions) {
h.parallel_for<KernelName>(r, std::forward<Reductions>(reductions)..., k);
}

template<typename KernelName = ::sycl::detail::auto_name, int Dimensions, typename... Args>
void parallel_for(sycl::queue &q, sycl::range<Dimensions> r,
const sycl::kernel &k, Args &&... args) {
q.parallel_for<KernelName>(r, std::forward<Args>(args)..., k);
}

template<typename KernelName = ::sycl::detail::auto_name, int Dimensions, typename... Args>
void parallel_for(sycl::handler &h, sycl::range<Dimensions> r,
const sycl::kernel &k, Args &&... args) {
h.parallel_for<KernelName>(r, std::forward<Args>(args)..., k);
}


}

0 comments on commit 2683153

Please sign in to comment.