-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce backend system for vendor-specific code paths
Since SYCL 2020 does not support multi-dimensional (rectangular) copies for USM pointers, we have to either do it in a loop (slow) or fall back to vendor-specific APIs. This introduces a new "backend" system that does the latter. Currently only "generic" (= SYCL, slow) and CUDA (when using OpenSYCL or DPC++) are supported. Since backends are configuration during compile time, this additionally introduces a new integration testing mechanism for testing backends. This requires Celerity to be built with different CMake options, so the test is implemented as a Python script.
- Loading branch information
Showing
23 changed files
with
778 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#pragma once | ||
|
||
#include <sycl/sycl.hpp> | ||
|
||
#include "backend/generic_backend.h" | ||
#include "backend/traits.h" | ||
#include "backend/type.h" | ||
|
||
// NOTE: These should not leak any symbols from the backend library (i.e. don't include it in the header) | ||
#if CELERITY_DETAIL_BACKEND_CUDA_ENABLED | ||
#include "backend/cuda_backend.h" | ||
#endif | ||
|
||
// Helper function to instantiate `Template` (during compile time) based on the backend type (a runtime value). | ||
namespace celerity::detail::backend_detail { | ||
template <template <backend::type> typename Template, typename Callback> | ||
auto specialize_for_backend(backend::type type, Callback cb) { | ||
switch(type) { | ||
case backend::type::cuda: return cb(Template<backend::type::cuda>{}); | ||
case backend::type::generic: return cb(Template<backend::type::generic>{}); | ||
case backend::type::unknown: [[fallthrough]]; | ||
default: return cb(Template<backend::type::unknown>{}); | ||
} | ||
} | ||
} // namespace celerity::detail::backend_detail | ||
|
||
namespace celerity::detail::backend { | ||
|
||
/** | ||
* Returns the detected backend type for this SYCL device. | ||
* | ||
* Returns either a specialized backend or 'unknown', never 'generic'. | ||
*/ | ||
type get_type(const sycl::device& device); | ||
|
||
/** | ||
* Returns the effective backend type for this SYCL device, depending on the detected | ||
* backend type and which backend modules have been compiled. | ||
* | ||
* Returns either a specialized backend or 'generic', never 'unknown'. | ||
*/ | ||
type get_effective_type(const sycl::device& device); | ||
|
||
inline std::string_view get_name(type type) { | ||
return backend_detail::specialize_for_backend<backend_detail::name>(type, [](auto op) { return decltype(op)::value; }); | ||
} | ||
|
||
template <int Dims> | ||
void memcpy_strided_device(sycl::queue& queue, const void* source_base_ptr, void* target_base_ptr, size_t elem_size, const sycl::range<Dims>& source_range, | ||
const sycl::id<Dims>& source_offset, const sycl::range<Dims>& target_range, const sycl::id<Dims>& target_offset, const sycl::range<Dims>& copy_range) { | ||
backend_detail::specialize_for_backend<backend_detail::backend_operations>(get_effective_type(queue.get_device()), [&](auto op) { | ||
decltype(op)::memcpy_strided_device( | ||
queue, source_base_ptr, target_base_ptr, elem_size, source_range, source_offset, target_range, target_offset, copy_range); | ||
}); | ||
} | ||
|
||
} // namespace celerity::detail::backend |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#pragma once | ||
|
||
#include <sycl/sycl.hpp> | ||
|
||
#include "backend/operations.h" | ||
#include "backend/type.h" | ||
|
||
namespace celerity::detail::backend_detail { | ||
|
||
void memcpy_strided_device_cuda(sycl::queue& queue, const void* source_base_ptr, void* target_base_ptr, size_t elem_size, const sycl::range<1>& source_range, | ||
const sycl::id<1>& source_offset, const sycl::range<1>& target_range, const sycl::id<1>& target_offset, const sycl::range<1>& copy_range); | ||
|
||
void memcpy_strided_device_cuda(sycl::queue& queue, const void* source_base_ptr, void* target_base_ptr, size_t elem_size, const sycl::range<2>& source_range, | ||
const sycl::id<2>& source_offset, const sycl::range<2>& target_range, const sycl::id<2>& target_offset, const sycl::range<2>& copy_range); | ||
|
||
void memcpy_strided_device_cuda(sycl::queue& queue, const void* source_base_ptr, void* target_base_ptr, size_t elem_size, const sycl::range<3>& source_range, | ||
const sycl::id<3>& source_offset, const sycl::range<3>& target_range, const sycl::id<3>& target_offset, const sycl::range<3>& copy_range); | ||
|
||
template <> | ||
struct backend_operations<backend::type::cuda> { | ||
template <typename... Args> | ||
static void memcpy_strided_device(Args&&... args) { | ||
memcpy_strided_device_cuda(args...); | ||
} | ||
}; | ||
|
||
} // namespace celerity::detail::backend_detail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#pragma once | ||
|
||
#include <sycl/sycl.hpp> | ||
|
||
#include "backend/operations.h" | ||
#include "backend/type.h" | ||
|
||
namespace celerity::detail::backend_detail { | ||
|
||
void memcpy_strided_device_generic(sycl::queue& queue, const void* source_base_ptr, void* target_base_ptr, size_t elem_size, const sycl::range<1>& source_range, | ||
const sycl::id<1>& source_offset, const sycl::range<1>& target_range, const sycl::id<1>& target_offset, const sycl::range<1>& copy_range); | ||
|
||
void memcpy_strided_device_generic(sycl::queue& queue, const void* source_base_ptr, void* target_base_ptr, size_t elem_size, const sycl::range<2>& source_range, | ||
const sycl::id<2>& source_offset, const sycl::range<2>& target_range, const sycl::id<2>& target_offset, const sycl::range<2>& copy_range); | ||
|
||
void memcpy_strided_device_generic(sycl::queue& queue, const void* source_base_ptr, void* target_base_ptr, size_t elem_size, const sycl::range<3>& source_range, | ||
const sycl::id<3>& source_offset, const sycl::range<3>& target_range, const sycl::id<3>& target_offset, const sycl::range<3>& copy_range); | ||
|
||
template <> | ||
struct backend_operations<backend::type::generic> { | ||
template <typename... Args> | ||
static void memcpy_strided_device(Args&&... args) { | ||
memcpy_strided_device_generic(args...); | ||
} | ||
}; | ||
|
||
} // namespace celerity::detail::backend_detail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#pragma once | ||
|
||
#include <stdexcept> | ||
|
||
#include "backend/type.h" | ||
|
||
namespace celerity::detail::backend_detail { | ||
|
||
template <backend::type Type> | ||
struct backend_operations { | ||
template <typename... Args> | ||
static void memcpy_strided_device(Args&&... args) { | ||
throw std::runtime_error{"Invalid backend"}; | ||
} | ||
}; | ||
|
||
} // namespace celerity::detail::backend_detail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#pragma once | ||
|
||
#include <type_traits> | ||
|
||
#include "backend/type.h" | ||
|
||
namespace celerity::detail::backend_detail { | ||
|
||
template <backend::type Type> | ||
struct is_enabled : public std::false_type {}; | ||
|
||
template <backend::type Type> | ||
constexpr bool is_enabled_v = is_enabled<Type>::value; | ||
|
||
template <backend::type Type> | ||
struct name { | ||
static constexpr const char* value = "(unknown)"; | ||
}; | ||
|
||
template <backend::type Type> | ||
constexpr const char* name_v = name<Type>::value; | ||
|
||
template <> | ||
struct is_enabled<backend::type::generic> : public std::true_type {}; | ||
|
||
template <> | ||
struct name<backend::type::generic> { | ||
static constexpr const char* value = "generic"; | ||
}; | ||
|
||
#if CELERITY_DETAIL_BACKEND_CUDA_ENABLED | ||
template <> | ||
struct is_enabled<backend::type::cuda> : public std::true_type {}; | ||
#endif | ||
|
||
template <> | ||
struct name<backend::type::cuda> { | ||
static constexpr const char* value = "CUDA"; | ||
}; | ||
|
||
} // namespace celerity::detail::backend_detail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#pragma once | ||
|
||
namespace celerity::detail::backend { | ||
enum class type { generic, cuda, unknown }; | ||
} // namespace celerity::detail::backend |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
find_package(CUDAToolkit QUIET) | ||
# find_package(LevelZero QUIET) # TODO: Need find module? | ||
# find_package(ROCM QUIET) # TODO: Need find module? | ||
|
||
# TODO: Make conditional on CELERITY_SYCL_IMPL? | ||
option(CELERITY_ENABLE_CUDA_BACKEND "Enable optimized code paths for CUDA backends" ${CUDAToolkit_FOUND}) | ||
if(CELERITY_ENABLE_CUDA_BACKEND AND NOT CUDAToolkit_FOUND) | ||
# Run find_package again to emit error message | ||
find_package(CUDAToolkit REQUIRED) | ||
endif() | ||
|
||
add_library(celerity_backends STATIC backend.cc generic_backend.cc) | ||
set_property(TARGET celerity_backends PROPERTY CXX_STANDARD 17) | ||
# We link against the RT here to get all of its transitive properties (circular linking is allowed for static libraries). | ||
target_link_libraries(celerity_backends PRIVATE celerity_runtime) | ||
add_sycl_to_target(TARGET celerity_backends SOURCES) | ||
|
||
if(CELERITY_ENABLE_CUDA_BACKEND) | ||
target_sources(celerity_backends PRIVATE cuda_backend.cc) | ||
target_link_libraries(celerity_backends PUBLIC CUDA::cudart) | ||
target_compile_definitions(celerity_backends PUBLIC "CELERITY_DETAIL_BACKEND_CUDA_ENABLED=1") | ||
message(STATUS "CUDA backend enabled") | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#include "backend/backend.h" | ||
|
||
namespace celerity::detail::backend { | ||
|
||
type get_type(const sycl::device& device) { | ||
#if defined(__HIPSYCL__) && defined(SYCL_EXT_HIPSYCL_BACKEND_CUDA) | ||
if(device.get_backend() == sycl::backend::cuda) { return type::cuda; } | ||
#endif | ||
#if defined(__SYCL_COMPILER_VERSION) // DPC++ (TODO: This may break when using OpenSYCL w/ DPC++ as compiler) | ||
if(device.get_backend() == sycl::backend::ext_oneapi_cuda) { return type::cuda; } | ||
#endif | ||
return type::unknown; | ||
} | ||
|
||
type get_effective_type(const sycl::device& device) { | ||
[[maybe_unused]] const auto b = get_type(device); | ||
|
||
#if defined(CELERITY_DETAIL_BACKEND_CUDA_ENABLED) | ||
if(b == type::cuda) return b; | ||
#endif | ||
|
||
return type::generic; | ||
} | ||
|
||
} // namespace celerity::detail::backend |
Oops, something went wrong.