Skip to content

Commit

Permalink
[SYCL] Fixes for subbuffer reinterpretation (#626)
Browse files Browse the repository at this point in the history
This patch fixes several problems:
1) There was no proper dependency for AllocaSubBuffer. When accessor to
subbuffer was instantiated and was passed to kernel created with OpenCL
interoperability, correct cl_mem object wasn't found which forced
kernel argument to be nullptr.
2) Memory range wasn't passed in buffer constructor in `reinterpret` function.
This led to unexpected behavior after data transmission from device to host.
We can pass the same memory range only in cases when reinterpret dimension
equals to source dimension.
For other cases `reinterpret` didn't change its behaviour.

Signed-off-by: Ivan Karachun <ivan.karachun@intel.com>
  • Loading branch information
Ivan Karachun authored and bader committed Sep 12, 2019
1 parent b38a8e0 commit 916c32d
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 9 deletions.
34 changes: 28 additions & 6 deletions sycl/include/CL/sycl/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,9 @@ class buffer {
bool is_sub_buffer() const { return IsSubBuffer; }

template <typename ReinterpretT, int ReinterpretDim>
buffer<ReinterpretT, ReinterpretDim, AllocatorT>
typename std::enable_if<
ReinterpretDim == dimensions,
buffer<ReinterpretT, ReinterpretDim, AllocatorT>>::type
reinterpret(range<ReinterpretDim> reinterpretRange) const {
if (sizeof(ReinterpretT) * reinterpretRange.size() != get_size())
throw cl::sycl::invalid_object_error(
Expand All @@ -237,7 +239,26 @@ class buffer {
Offset[dimensions - 1] * sizeof(T) / sizeof(ReinterpretT);

return buffer<ReinterpretT, ReinterpretDim, AllocatorT>(
impl, reinterpretRange, NewOffset, IsSubBuffer);
impl, reinterpretRange, MemRange, NewOffset, IsSubBuffer);
}

template <typename ReinterpretT, int ReinterpretDim>
typename std::enable_if<
ReinterpretDim != dimensions,
buffer<ReinterpretT, ReinterpretDim, AllocatorT>>::type
reinterpret(range<ReinterpretDim> reinterpretRange) const {
if (sizeof(ReinterpretT) * reinterpretRange.size() != get_size())
throw cl::sycl::invalid_object_error(
"Total size in bytes represented by the type and range of the "
"reinterpreted SYCL buffer does not equal the total size in bytes "
"represented by the type and range of this SYCL buffer");

id<ReinterpretDim> NewOffset{};
NewOffset[ReinterpretDim - 1] =
Offset[dimensions - 1] * sizeof(T) / sizeof(ReinterpretT);

return buffer<ReinterpretT, ReinterpretDim, AllocatorT>(
impl, reinterpretRange, reinterpretRange, NewOffset, IsSubBuffer);
}

template <typename propertyT> bool has_property() const {
Expand Down Expand Up @@ -267,10 +288,11 @@ class buffer {

// Reinterpret contructor
buffer(shared_ptr_class<detail::buffer_impl<AllocatorT>> Impl,
range<dimensions> reinterpretRange, id<dimensions>reinterpretOffset,
bool isSubBuffer)
: impl(Impl), Range(reinterpretRange), MemRange(reinterpretRange),
IsSubBuffer(isSubBuffer), Offset(reinterpretOffset) {};
range<dimensions> reinterpretRange,
range<dimensions> reinterpretMemRange,
id<dimensions> reinterpretOffset, bool isSubBuffer)
: impl(Impl), Range(reinterpretRange), MemRange(reinterpretMemRange),
IsSubBuffer(isSubBuffer), Offset(reinterpretOffset){};
};
} // namespace sycl
} // namespace cl
Expand Down
7 changes: 5 additions & 2 deletions sycl/include/CL/sycl/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,9 @@ class ReleaseCommand : public Command {
class AllocaCommandBase : public Command {
public:
AllocaCommandBase(CommandType Type, QueueImplPtr Queue, Requirement Req)
: Command(Type, Queue), MReleaseCmd(Queue, this), MReq(std::move(Req)) {}
: Command(Type, Queue), MReleaseCmd(Queue, this), MReq(std::move(Req)) {
MReq.MAccessMode = access::mode::read_write;
}

ReleaseCommand *getReleaseCmd() { return &MReleaseCmd; }

Expand Down Expand Up @@ -178,7 +180,8 @@ class AllocaSubBufCommand : public AllocaCommandBase {
public:
AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req,
AllocaCommandBase *ParentAlloca)
: AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue), Req),
: AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue),
std::move(Req)),
MParentAlloca(ParentAlloca) {
addDep(DepDesc(MParentAlloca, &MReq, MParentAlloca));
}
Expand Down
3 changes: 2 additions & 1 deletion sycl/source/detail/scheduler/graph_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ Scheduler::GraphBuilder::insertMemCpyCmd(MemObjRecord *Record, Requirement *Req,
Deps.insert(AllocaCmdDst);
}

AllocaCommandBase *AllocaCmdSrc = findAllocaForReq(Record, Req, SrcQueue);
AllocaCommandBase *AllocaCmdSrc =
getOrCreateAllocaForReq(Record, Req, SrcQueue, true);

// Full copy of buffer is needed to avoid loss of data that may be caused
// by copying specific range form host to device and backwards.
Expand Down
52 changes: 52 additions & 0 deletions sycl/test/basic_tests/buffer/subbuffer_interop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include <cassert>
#include <memory>
#include <numeric>

using namespace cl::sycl;

Expand Down Expand Up @@ -280,5 +281,56 @@ int main() {
}
}
}

const char *cl_src = "kernel void test(global int *p) { "
" printf(\"offset on device = \%d\\n\", *p);"
" if (p) *p *= 3;"
"}";

{
cl::sycl::queue Q;

// Create OpenCL program
cl_int err;
auto context_cl = Q.get_context().get();
auto device_cl = Q.get_device().get();
cl_program program_cl =
clCreateProgramWithSource(context_cl, 1, &cl_src, nullptr, &err);
err = clBuildProgram(program_cl, 1, &device_cl, nullptr, nullptr, nullptr);
cl_kernel kernel_cl = clCreateKernel(program_cl, "test", &err);
cl::sycl::kernel kernel_sycl(kernel_cl, Q.get_context());

// Create buffer
constexpr int N = 256;
std::vector<int> v(2 * N);
std::iota(v.begin(), v.end(), 0);
cl::sycl::buffer<int, 1> buf(v.data(), v.size());
cl::sycl::buffer<int, 1> subbuf(buf, N, N);

auto subbuf_copy =
new cl::sycl::buffer<int, 1>(subbuf.reinterpret<int, 1>(N));

// Test offsets
{
auto host_acc = subbuf_copy->get_access<cl::sycl::access::mode::read>();
std::cout << "On host: offset = " << host_acc[0] << std::endl;
assert(host_acc[0] == 256 && "Invalid subbuffer origin");
}

Q.submit([&](cl::sycl::handler &cgh) {
auto acc = subbuf_copy->get_access<cl::sycl::access::mode::write>(cgh);
cgh.set_args(acc);
cgh.single_task(kernel_sycl);
});

Q.wait_and_throw();

{
auto host_acc = subbuf_copy->get_access<cl::sycl::access::mode::read>();
std::cout << "On host: offset = " << host_acc[0] << std::endl;
assert(host_acc[0] == 256 * 3 && "Invalid subbuffer origin");
}
}

return Failed;
}

0 comments on commit 916c32d

Please sign in to comment.