Skip to content

Commit

Permalink
Add SYCL copy support to tensor (#6764)
Browse files Browse the repository at this point in the history
* Add SYCL support to Tensor To method
* Check for non-contiguous sycl tensors (not supported)
---------
Co-authored-by: Sameer Sheorey <41028320+ssheorey@users.noreply.github.com>
  • Loading branch information
lumurillo authored May 19, 2024
1 parent 1b55f11 commit 69786b6
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 18 deletions.
1 change: 1 addition & 0 deletions cpp/open3d/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ target_sources(core PRIVATE
kernel/ReductionCPU.cpp
kernel/UnaryEW.cpp
kernel/UnaryEWCPU.cpp
kernel/UnaryEWSYCL.cpp
linalg/AddMM.cpp
linalg/AddMMCPU.cpp
linalg/Det.cpp
Expand Down
4 changes: 4 additions & 0 deletions cpp/open3d/core/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ class IsDevice {
inline bool IsCUDA() const {
return GetDevice().GetType() == Device::DeviceType::CUDA;
}

inline bool IsSYCL() const {
return GetDevice().GetType() == Device::DeviceType::SYCL;
}
};

} // namespace core
Expand Down
2 changes: 1 addition & 1 deletion cpp/open3d/core/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ Tensor Tensor::Contiguous() const {
std::string Tensor::ToString(bool with_suffix,
const std::string& indent) const {
std::ostringstream rc;
if (IsCUDA() || !IsContiguous()) {
if (IsCUDA() || IsSYCL() || !IsContiguous()) {
Tensor host_contiguous_tensor = Contiguous().To(Device("CPU:0"));
rc << host_contiguous_tensor.ToString(false, indent);
} else {
Expand Down
16 changes: 12 additions & 4 deletions cpp/open3d/core/kernel/UnaryEW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,28 @@ void Copy(const Tensor& src, Tensor& dst) {
src.GetShape(), dst.GetShape());
}

// Disbatch to device
// Dispatch to device
Device src_device = src.GetDevice();
Device dst_device = dst.GetDevice();
if ((!src_device.IsCPU() && !src_device.IsCUDA()) ||
(!dst_device.IsCPU() && !dst_device.IsCUDA())) {
if ((!src_device.IsCPU() && !src_device.IsCUDA() && !src_device.IsSYCL()) ||
(!dst_device.IsCPU() && !dst_device.IsCUDA() && !dst_device.IsSYCL())) {
utility::LogError("Copy: Unimplemented device");
}
if (src_device.IsCPU() && dst_device.IsCPU()) {
CopyCPU(src, dst);
} else {
} else if ((src_device.IsCPU() || src_device.IsCUDA()) &&
(dst_device.IsCPU() || dst_device.IsCUDA())) {
#ifdef BUILD_CUDA_MODULE
CopyCUDA(src, dst);
#else
utility::LogError("Not compiled with CUDA, but CUDA device is used.");
#endif
} else if ((src_device.IsCPU() || src_device.IsSYCL()) &&
(dst_device.IsCPU() || dst_device.IsSYCL())) {
#ifdef BUILD_SYCL_MODULE
CopySYCL(src, dst);
#else
utility::LogError("Not compiled with SYCL, but SYCL device is used.");
#endif
}
}
Expand Down
4 changes: 4 additions & 0 deletions cpp/open3d/core/kernel/UnaryEW.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ void CopyCPU(const Tensor& src, Tensor& dst);
void CopyCUDA(const Tensor& src, Tensor& dst);
#endif

#ifdef BUILD_SYCL_MODULE
void CopySYCL(const Tensor& src, Tensor& dst);
#endif

} // namespace kernel
} // namespace core
} // namespace open3d
53 changes: 53 additions & 0 deletions cpp/open3d/core/kernel/UnaryEWSYCL.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// ----------------------------------------------------------------------------
// - Open3D: www.open3d.org -
// ----------------------------------------------------------------------------
// Copyright (c) 2018-2023 www.open3d.org
// SPDX-License-Identifier: MIT
// ----------------------------------------------------------------------------

#include <cmath>
#include <cstring>

#include "open3d/core/Dtype.h"
#include "open3d/core/MemoryManager.h"
#include "open3d/core/SizeVector.h"
#include "open3d/core/Tensor.h"
#include "open3d/core/kernel/UnaryEW.h"
#include "open3d/utility/Logging.h"

namespace open3d {
namespace core {
namespace kernel {

void CopySYCL(const Tensor& src, Tensor& dst) {
// It has been checked that
// - at least one of src or dst is SYCL device
SizeVector shape = src.GetShape();
Dtype src_dtype = src.GetDtype();
Dtype dst_dtype = dst.GetDtype();
Device dst_device = dst.GetDevice();
Device src_device = src.GetDevice();

if (src_dtype != dst_dtype) {
utility::LogError(
"CopySYCL: Dtype conversion from src to dst not implemented!");
}
if ((dst_device.IsSYCL() && !dst.IsContiguous()) ||
(src_device.IsSYCL() && !src.IsContiguous())) {
utility::LogError(
"CopySYCL: NonContiguous SYCL tensor Copy not implemented!");
}
Tensor src_conti = src.Contiguous(); // No op if already contiguous
if (dst.IsContiguous() && src.GetShape() == dst.GetShape() &&
src_dtype == dst_dtype) {
MemoryManager::Memcpy(dst.GetDataPtr(), dst_device,
src_conti.GetDataPtr(), src_conti.GetDevice(),
src_dtype.ByteSize() * shape.NumElements());
} else {
dst.CopyFrom(src_conti.To(dst_device));
}
}

} // namespace kernel
} // namespace core
} // namespace open3d
39 changes: 27 additions & 12 deletions cpp/tests/core/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,24 @@ INSTANTIATE_TEST_SUITE_P(Tensor,
TensorPermuteDevices,
testing::ValuesIn(PermuteDevices::TestCases()));

class TensorPermuteDevicesWithSYCL : public PermuteDevices {};
INSTANTIATE_TEST_SUITE_P(
Tensor,
TensorPermuteDevicesWithSYCL,
testing::ValuesIn(PermuteDevicesWithSYCL::TestCases()));

class TensorPermuteDevicePairs : public PermuteDevicePairs {};
INSTANTIATE_TEST_SUITE_P(
Tensor,
TensorPermuteDevicePairs,
testing::ValuesIn(TensorPermuteDevicePairs::TestCases()));

class TensorPermuteDevicePairsWithSYCL : public PermuteDevicePairsWithSYCL {};
INSTANTIATE_TEST_SUITE_P(
Tensor,
TensorPermuteDevicePairsWithSYCL,
testing::ValuesIn(TensorPermuteDevicePairsWithSYCL::TestCases()));

class TensorPermuteSizesDefaultStridesAndDevices
: public testing::TestWithParam<
std::tuple<std::pair<core::SizeVector, core::SizeVector>,
Expand All @@ -54,7 +66,7 @@ static constexpr const T &AsConst(T &t) noexcept {
return t;
}

TEST_P(TensorPermuteDevices, Constructor) {
TEST_P(TensorPermuteDevicesWithSYCL, Constructor) {
core::Device device = GetParam();
core::Dtype dtype = core::Float32;

Expand All @@ -71,7 +83,7 @@ TEST_P(TensorPermuteDevices, Constructor) {
EXPECT_ANY_THROW(core::Tensor({-1, -1}, dtype, device));
}

TEST_P(TensorPermuteDevices, ConstructorBool) {
TEST_P(TensorPermuteDevicesWithSYCL, ConstructorBool) {
core::Device device = GetParam();

core::SizeVector shape{2, 3};
Expand Down Expand Up @@ -105,7 +117,7 @@ TEST_P(TensorPermuteDevices, WithInitValue) {
EXPECT_EQ(t.ToFlatVector<float>(), vals);
}

TEST_P(TensorPermuteDevices, WithInitList) {
TEST_P(TensorPermuteDevicesWithSYCL, WithInitList) {
core::Device device = GetParam();

core::Tensor t;
Expand Down Expand Up @@ -187,23 +199,23 @@ TEST_P(TensorPermuteDevices, WithInitList) {
std::exception);
}

TEST_P(TensorPermuteDevices, WithInitValueBool) {
TEST_P(TensorPermuteDevicesWithSYCL, WithInitValueBool) {
core::Device device = GetParam();

std::vector<bool> vals{true, false, true, true, false, false};
core::Tensor t(vals, {2, 3}, core::Bool, device);
EXPECT_EQ(t.ToFlatVector<bool>(), vals);
}

TEST_P(TensorPermuteDevices, WithInitValueTypeMismatch) {
TEST_P(TensorPermuteDevicesWithSYCL, WithInitValueTypeMismatch) {
core::Device device = GetParam();

std::vector<int> vals{0, 1, 2, 3, 4, 5};
EXPECT_THROW(core::Tensor(vals, {2, 3}, core::Float32, device),
std::runtime_error);
}

TEST_P(TensorPermuteDevices, WithInitValueSizeMismatch) {
TEST_P(TensorPermuteDevicesWithSYCL, WithInitValueSizeMismatch) {
core::Device device = GetParam();

std::vector<float> vals{0, 1, 2, 3, 4};
Expand Down Expand Up @@ -298,7 +310,7 @@ TEST_P(TensorPermuteDevicePairs, IndexSetFillFancy) {
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0}));
}

TEST_P(TensorPermuteDevicePairs, Copy) {
TEST_P(TensorPermuteDevicePairsWithSYCL, Copy) {
core::Device dst_device;
core::Device src_device;
std::tie(dst_device, src_device) = GetParam();
Expand All @@ -317,7 +329,7 @@ TEST_P(TensorPermuteDevicePairs, Copy) {
EXPECT_EQ(dst_t.ToFlatVector<float>(), vals);
}

TEST_P(TensorPermuteDevicePairs, CopyBool) {
TEST_P(TensorPermuteDevicePairsWithSYCL, CopyBool) {
core::Device dst_device;
core::Device src_device;
std::tie(dst_device, src_device) = GetParam();
Expand Down Expand Up @@ -357,12 +369,15 @@ TEST_P(TensorPermuteDevicePairs, ToDevice) {
core::Device src_device;
std::tie(dst_device, src_device) = GetParam();

core::Tensor src_t = core::Tensor::Init<float>({0, 1, 2, 3}, src_device);
core::Tensor src_t =
core::Tensor::Init<float>({0.f, 1.f, 2.f, 3.f}, src_device);
core::Tensor dst_t = src_t.To(dst_device);
EXPECT_TRUE(dst_t.To(src_device).AllClose(src_t));

EXPECT_ANY_THROW(src_t.To(core::Device("CPU:1")));

EXPECT_ANY_THROW(src_t.To(core::Device("SYCL:100")));

EXPECT_ANY_THROW(src_t.To(core::Device("CUDA:-1")));
EXPECT_ANY_THROW(src_t.To(core::Device("CUDA:100000")));
}
Expand Down Expand Up @@ -529,7 +544,7 @@ TEST_P(TensorPermuteDevices, Flatten) {
EXPECT_ANY_THROW(src_t.Flatten(2, 1));
}

TEST_P(TensorPermuteDevices, DefaultStrides) {
TEST_P(TensorPermuteDevicesWithSYCL, DefaultStrides) {
core::Device device = GetParam();

core::Tensor t0({}, core::Float32, device);
Expand Down Expand Up @@ -663,7 +678,7 @@ TEST_P(TensorPermuteDevices, ItemAssign) {
EXPECT_EQ(t[1][2][3].Item<float>(), 101);
}

TEST_P(TensorPermuteDevices, ToString) {
TEST_P(TensorPermuteDevicesWithSYCL, ToString) {
using ::testing::AnyOf;
core::Device device = GetParam();
core::Tensor t;
Expand Down Expand Up @@ -738,7 +753,7 @@ TEST_P(TensorPermuteDevices, ToString) {
[True False False]])");
}

TEST_P(TensorPermuteDevicePairs, CopyContiguous) {
TEST_P(TensorPermuteDevicePairsWithSYCL, CopyContiguous) {
core::Device dst_device;
core::Device src_device;
std::tie(dst_device, src_device) = GetParam();
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ RUN conda --version \
# Activate open3d virtualenv
# This works during docker build. It becomes the prefix of all RUN commands.
# Ref: https://stackoverflow.com/a/60148365/1255535
SHELL ["conda", "run", "-n", "open3d", "/bin/bash", "-c"]
SHELL ["conda", "run", "-n", "open3d", "/bin/bash", "-o", "pipefail", "-c"]

# Dependencies: cmake
ENV PATH=${HOME}/${CMAKE_VERSION}/bin:${PATH}
Expand Down

0 comments on commit 69786b6

Please sign in to comment.