diff --git a/sycl/include/CL/sycl/buffer.hpp b/sycl/include/CL/sycl/buffer.hpp index 559821f4484dc..502c20a21e987 100644 --- a/sycl/include/CL/sycl/buffer.hpp +++ b/sycl/include/CL/sycl/buffer.hpp @@ -222,7 +222,9 @@ class buffer { bool is_sub_buffer() const { return IsSubBuffer; } template - buffer + typename std::enable_if< + ReinterpretDim == dimensions, + buffer>::type reinterpret(range reinterpretRange) const { if (sizeof(ReinterpretT) * reinterpretRange.size() != get_size()) throw cl::sycl::invalid_object_error( @@ -237,7 +239,26 @@ class buffer { Offset[dimensions - 1] * sizeof(T) / sizeof(ReinterpretT); return buffer( - impl, reinterpretRange, NewOffset, IsSubBuffer); + impl, reinterpretRange, MemRange, NewOffset, IsSubBuffer); + } + + template + typename std::enable_if< + ReinterpretDim != dimensions, + buffer>::type + reinterpret(range reinterpretRange) const { + if (sizeof(ReinterpretT) * reinterpretRange.size() != get_size()) + throw cl::sycl::invalid_object_error( + "Total size in bytes represented by the type and range of the " + "reinterpreted SYCL buffer does not equal the total size in bytes " + "represented by the type and range of this SYCL buffer"); + + id NewOffset{}; + NewOffset[ReinterpretDim - 1] = + Offset[dimensions - 1] * sizeof(T) / sizeof(ReinterpretT); + + return buffer( + impl, reinterpretRange, reinterpretRange, NewOffset, IsSubBuffer); } template bool has_property() const { @@ -267,10 +288,11 @@ class buffer { // Reinterpret contructor buffer(shared_ptr_class> Impl, - range reinterpretRange, idreinterpretOffset, - bool isSubBuffer) - : impl(Impl), Range(reinterpretRange), MemRange(reinterpretRange), - IsSubBuffer(isSubBuffer), Offset(reinterpretOffset) {}; + range reinterpretRange, + range reinterpretMemRange, + id reinterpretOffset, bool isSubBuffer) + : impl(Impl), Range(reinterpretRange), MemRange(reinterpretMemRange), + IsSubBuffer(isSubBuffer), Offset(reinterpretOffset){}; }; } // namespace sycl } // namespace cl diff --git a/sycl/include/CL/sycl/detail/scheduler/commands.hpp b/sycl/include/CL/sycl/detail/scheduler/commands.hpp index 11950df04b12a..39996c84a8c2d 100644 --- a/sycl/include/CL/sycl/detail/scheduler/commands.hpp +++ b/sycl/include/CL/sycl/detail/scheduler/commands.hpp @@ -139,7 +139,9 @@ class ReleaseCommand : public Command { class AllocaCommandBase : public Command { public: AllocaCommandBase(CommandType Type, QueueImplPtr Queue, Requirement Req) - : Command(Type, Queue), MReleaseCmd(Queue, this), MReq(std::move(Req)) {} + : Command(Type, Queue), MReleaseCmd(Queue, this), MReq(std::move(Req)) { + MReq.MAccessMode = access::mode::read_write; + } ReleaseCommand *getReleaseCmd() { return &MReleaseCmd; } @@ -178,7 +180,8 @@ class AllocaSubBufCommand : public AllocaCommandBase { public: AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req, AllocaCommandBase *ParentAlloca) - : AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue), Req), + : AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue), + std::move(Req)), MParentAlloca(ParentAlloca) { addDep(DepDesc(MParentAlloca, &MReq, MParentAlloca)); } diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index e84021d0e6ddc..539c75e243ad8 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -179,7 +179,8 @@ Scheduler::GraphBuilder::insertMemCpyCmd(MemObjRecord *Record, Requirement *Req, Deps.insert(AllocaCmdDst); } - AllocaCommandBase *AllocaCmdSrc = findAllocaForReq(Record, Req, SrcQueue); + AllocaCommandBase *AllocaCmdSrc = + getOrCreateAllocaForReq(Record, Req, SrcQueue, true); // Full copy of buffer is needed to avoid loss of data that may be caused // by copying specific range form host to device and backwards. diff --git a/sycl/test/basic_tests/buffer/subbuffer_interop.cpp b/sycl/test/basic_tests/buffer/subbuffer_interop.cpp index 5837fb7499389..a0fb0e5d2992f 100644 --- a/sycl/test/basic_tests/buffer/subbuffer_interop.cpp +++ b/sycl/test/basic_tests/buffer/subbuffer_interop.cpp @@ -14,6 +14,7 @@ #include #include +#include using namespace cl::sycl; @@ -280,5 +281,56 @@ int main() { } } } + + const char *cl_src = "kernel void test(global int *p) { " + " printf(\"offset on device = \%d\\n\", *p);" + " if (p) *p *= 3;" + "}"; + + { + cl::sycl::queue Q; + + // Create OpenCL program + cl_int err; + auto context_cl = Q.get_context().get(); + auto device_cl = Q.get_device().get(); + cl_program program_cl = + clCreateProgramWithSource(context_cl, 1, &cl_src, nullptr, &err); + err = clBuildProgram(program_cl, 1, &device_cl, nullptr, nullptr, nullptr); + cl_kernel kernel_cl = clCreateKernel(program_cl, "test", &err); + cl::sycl::kernel kernel_sycl(kernel_cl, Q.get_context()); + + // Create buffer + constexpr int N = 256; + std::vector v(2 * N); + std::iota(v.begin(), v.end(), 0); + cl::sycl::buffer buf(v.data(), v.size()); + cl::sycl::buffer subbuf(buf, N, N); + + auto subbuf_copy = + new cl::sycl::buffer(subbuf.reinterpret(N)); + + // Test offsets + { + auto host_acc = subbuf_copy->get_access(); + std::cout << "On host: offset = " << host_acc[0] << std::endl; + assert(host_acc[0] == 256); + } + + Q.submit([&](cl::sycl::handler &cgh) { + auto acc = subbuf_copy->get_access(cgh); + cgh.set_args(acc); + cgh.single_task(kernel_sycl); + }); + + Q.wait_and_throw(); + + { + auto host_acc = subbuf_copy->get_access(); + std::cout << "On host: offset = " << host_acc[0] << std::endl; + assert(host_acc[0] == 256 * 3); + } + } + return Failed; }