From 4d28cefdc28836003b7c8b56d620fa2e3c2b9a19 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Tue, 6 Aug 2024 15:56:10 +0200 Subject: [PATCH 1/2] Move compile-time defines into version.h config header --- CMakeLists.txt | 29 +++++++-------------- include/accessor.h | 1 + include/closure_hydrator.h | 1 + include/handler.h | 1 + include/instruction_graph.h | 1 + include/version.h.in | 12 +++++++++ include/workaround.h | 2 ++ src/config.cc | 2 +- src/live_executor.cc | 1 + src/platform_specific/named_threads.unix.cc | 1 + src/runtime.cc | 2 +- test/backend_tests.cc | 2 +- 12 files changed, 33 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f497d50d..5b9e31ea0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -190,11 +190,14 @@ if(CELERITY_TRACY_SUPPORT) fetch_content_from_submodule(Tracy vendor/tracy) endif() -configure_file(include/version.h.in include/version.h @ONLY) +# Deprecated feature flags +set(CELERITY_FEATURE_SCALAR_REDUCTIONS ON) +set(CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS ON) +set(CELERITY_FEATURE_LOCAL_ACCESSOR ON) +set(CELERITY_FEATURE_UNNAMED_KERNELS ON) # Add includes to library so they show up in IDEs file(GLOB_RECURSE INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/include/*.h") -list(APPEND INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/include/version.h") if(CMAKE_GENERATOR STREQUAL "Ninja") # Force colored warnings in Ninja's output, if the compiler has -fdiagnostics-color support. @@ -238,6 +241,9 @@ elseif(UNIX) set(SOURCES ${SOURCES} src/platform_specific/named_threads.unix.cc) endif() +configure_file(include/version.h.in include/version.h @ONLY) +list(APPEND INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/include/version.h") + add_library( celerity_runtime STATIC @@ -288,12 +294,6 @@ target_link_libraries(celerity_runtime PUBLIC add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/backend) target_link_libraries(celerity_runtime PUBLIC celerity_backends) -# Deprecated feature flags -set(CELERITY_FEATURE_SCALAR_REDUCTIONS ON) -set(CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS ON) -set(CELERITY_FEATURE_LOCAL_ACCESSOR ON) -set(CELERITY_FEATURE_UNNAMED_KERNELS ON) - # For debug builds, we set the CELERITY_DETAIL_ENABLE_DEBUG preprocessor flag, # which allows Celerity to control debug functionality within headers regardless # of a user target's build type. (This flag is not intended to be modified by @@ -308,17 +308,8 @@ target_compile_definitions(celerity_runtime PUBLIC # We still mark this as PUBLIC during builds (but not installation), # so that the examples and tests receive the correct flag as well. $:CELERITY_DETAIL_ENABLE_DEBUG> + CELERITY_DETAIL_ENABLE_DEBUG=$ > - CELERITY_USE_MIMALLOC=$ - CELERITY_FEATURE_SCALAR_REDUCTIONS=$ - CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS=$ - CELERITY_FEATURE_LOCAL_ACCESSOR=$ - CELERITY_FEATURE_UNNAMED_KERNELS=$ - CELERITY_DETAIL_HAS_NAMED_THREADS=$ - CELERITY_ACCESSOR_BOUNDARY_CHECK=$ - CELERITY_ACCESS_PATTERN_DIAGNOSTICS=$ - CELERITY_TRACY_SUPPORT=$ ) # Collect version information from git in src/version.cc. This target is always out of date, but the timestamp @@ -346,7 +337,7 @@ add_sycl_to_target( if(MSVC) target_compile_options(celerity_runtime PRIVATE /MP /W3) elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|AppleClang") - target_compile_options(celerity_runtime PRIVATE -Wall -Wextra -Wno-unused-parameter -Werror=return-type -Werror=init-self) + target_compile_options(celerity_runtime PRIVATE -Wall -Wextra -Wno-unused-parameter -Werror=return-type -Werror=init-self -Werror=undef) endif() if(CMAKE_COMPILER_ID STREQUAL "GNU") diff --git a/include/accessor.h b/include/accessor.h index ca030878f..c92531cae 100644 --- a/include/accessor.h +++ b/include/accessor.h @@ -10,6 +10,7 @@ #include "closure_hydrator.h" #include "handler.h" #include "sycl_wrappers.h" +#include "version.h" namespace celerity { diff --git a/include/closure_hydrator.h b/include/closure_hydrator.h index bf55a3750..4374e35c0 100644 --- a/include/closure_hydrator.h +++ b/include/closure_hydrator.h @@ -7,6 +7,7 @@ #include "ranges.h" #include "sycl_wrappers.h" #include "types.h" +#include "version.h" namespace celerity::detail { diff --git a/include/handler.h b/include/handler.h index 431b0609a..7f668b96e 100644 --- a/include/handler.h +++ b/include/handler.h @@ -16,6 +16,7 @@ #include "ranges.h" #include "task.h" #include "types.h" +#include "version.h" #include "workaround.h" namespace celerity { diff --git a/include/instruction_graph.h b/include/instruction_graph.h index 1ca4784cd..c14a6f99d 100644 --- a/include/instruction_graph.h +++ b/include/instruction_graph.h @@ -4,6 +4,7 @@ #include "launcher.h" #include "ranges.h" #include "types.h" +#include "version.h" #include #include diff --git a/include/version.h.in b/include/version.h.in index 121ba2c22..112f5dcc3 100644 --- a/include/version.h.in +++ b/include/version.h.in @@ -1,5 +1,17 @@ #pragma once +// CELERITY_DETAIL_ENABLE_DEBUG is specified on the command line +#cmakedefine01 CELERITY_USE_MIMALLOC +#cmakedefine01 CELERITY_DETAIL_HAS_NAMED_THREADS +#cmakedefine01 CELERITY_ACCESSOR_BOUNDARY_CHECK +#cmakedefine01 CELERITY_ACCESS_PATTERN_DIAGNOSTICS +#cmakedefine01 CELERITY_TRACY_SUPPORT + +#cmakedefine01 CELERITY_FEATURE_SCALAR_REDUCTIONS +#cmakedefine01 CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS +#cmakedefine01 CELERITY_FEATURE_LOCAL_ACCESSOR +#cmakedefine01 CELERITY_FEATURE_UNNAMED_KERNELS + #define CELERITY_VERSION_MAJOR @CELERITY_VERSION_MAJOR@ #define CELERITY_VERSION_MINOR @CELERITY_VERSION_MINOR@ #define CELERITY_VERSION_PATCH @CELERITY_VERSION_PATCH@ diff --git a/include/workaround.h b/include/workaround.h index 5a96f8bbf..061df6159 100644 --- a/include/workaround.h +++ b/include/workaround.h @@ -1,5 +1,7 @@ #pragma once +#include "version.h" + #include #include diff --git a/src/config.cc b/src/config.cc index 46fe5edf2..a584253e3 100644 --- a/src/config.cc +++ b/src/config.cc @@ -106,7 +106,7 @@ namespace detail { if(parsed_and_validated_envs.ok()) { // ------------------------------- CELERITY_LOG_LEVEL --------------------------------- -#if defined(CELERITY_DETAIL_ENABLE_DEBUG) +#if CELERITY_DETAIL_ENABLE_DEBUG m_log_lvl = parsed_and_validated_envs.get_or(env_log_level, log_level::debug); #else m_log_lvl = parsed_and_validated_envs.get_or(env_log_level, log_level::info); diff --git a/src/live_executor.cc b/src/live_executor.cc index 8adeecea5..e01209275 100644 --- a/src/live_executor.cc +++ b/src/live_executor.cc @@ -11,6 +11,7 @@ #include "tracy.h" #include "types.h" #include "utils.h" +#include "version.h" #include #include diff --git a/src/platform_specific/named_threads.unix.cc b/src/platform_specific/named_threads.unix.cc index d742d779c..9a5afe256 100644 --- a/src/platform_specific/named_threads.unix.cc +++ b/src/platform_specific/named_threads.unix.cc @@ -1,4 +1,5 @@ #include "named_threads.h" +#include "version.h" #include #include diff --git a/src/runtime.cc b/src/runtime.cc index 3d580e040..b4a73a6f1 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -80,7 +80,7 @@ namespace detail { } static const char* get_build_type() { -#if defined(CELERITY_DETAIL_ENABLE_DEBUG) +#if CELERITY_DETAIL_ENABLE_DEBUG return "debug"; #else return "release"; diff --git a/test/backend_tests.cc b/test/backend_tests.cc index 7e264d02a..065f0b432 100644 --- a/test/backend_tests.cc +++ b/test/backend_tests.cc @@ -114,7 +114,7 @@ TEST_CASE("backend allocations are properly aligned", "[backend]") { TEST_CASE("backend allocations are pattern-filled in debug builds", "[backend]") { test_utils::allow_backend_fallback_warnings(); -#if defined(CELERITY_DETAIL_ENABLE_DEBUG) +#if CELERITY_DETAIL_ENABLE_DEBUG const auto [backend_type, backend, sycl_devices] = generate_backends_with_devices(); CAPTURE(backend_type, sycl_devices); From bf818350994491efadeb69542bbbd643ff3c7bad Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Tue, 6 Aug 2024 16:52:48 +0200 Subject: [PATCH 2/2] Introduce CELERITY_SYCL_IS_* macros --- CHANGELOG.md | 1 + CMakeLists.txt | 12 ++++++++++++ cmake/AddToTarget.cmake | 3 --- examples/matmul/matmul.cc | 2 +- include/version.h.in | 4 ++++ include/workaround.h | 6 +++--- src/backend/sycl_cuda_backend.cc | 13 ++++++------- src/runtime.cc | 6 +++--- 8 files changed, 30 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f5c5962a..975f14a00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ See our [platform support guide](docs/platform-support.md) for a complete list o - Add support for SimSYCL as a SYCL implementation (#238) - Extend compiler support to GCC (optionally with sanitizers) and C++20 code bases (#238) - Add support for profiling with [Tracy](https://github.com/wolfpld/tracy), via `CELERITY_TRACY_SUPPORT` and environment variable `CELERITY_TRACY` (#267) +- The active SYCL implementation can now be queried via `CELERITY_SYCL_IS_*` macros (#??) ### Changed diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b9e31ea0..c06fe5939 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -241,6 +241,18 @@ elseif(UNIX) set(SOURCES ${SOURCES} src/platform_specific/named_threads.unix.cc) endif() +# Read by configure_file() +set(CELERITY_SYCL_IS_ACPP OFF) +set(CELERITY_SYCL_IS_DPCPP OFF) +set(CELERITY_SYCL_IS_SIMSYCL OFF) +if(CELERITY_SYCL_IMPL STREQUAL "AdaptiveCpp") + set(CELERITY_SYCL_IS_ACPP ON) +elseif(CELERITY_SYCL_IMPL STREQUAL "DPC++") + set(CELERITY_SYCL_IS_DPCPP ON) +elseif(CELERITY_SYCL_IMPL STREQUAL "SimSYCL") + set(CELERITY_SYCL_IS_SIMSYCL ON) +endif() + configure_file(include/version.h.in include/version.h @ONLY) list(APPEND INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/include/version.h") diff --git a/cmake/AddToTarget.cmake b/cmake/AddToTarget.cmake index cb74c497d..4e8268c06 100644 --- a/cmake/AddToTarget.cmake +++ b/cmake/AddToTarget.cmake @@ -13,7 +13,6 @@ if(CELERITY_SYCL_IMPL STREQUAL "DPC++") -fsycl -sycl-std=2020 "-fsycl-targets=${CELERITY_DPCPP_TARGETS}" - -DCELERITY_DPCPP=1 -Wno-sycl-strict # -Wsycl-strict produces false-positive warnings in DPC++'s own SYCL headers as of 2022-10-06 ) target_compile_options(${ADD_SYCL_TARGET} PUBLIC ${DPCPP_FLAGS}) @@ -30,8 +29,6 @@ elseif(CELERITY_SYCL_IMPL STREQUAL "SimSYCL") "${multi_value_args}" ${ARGN} ) - target_compile_options(${ADD_SYCL_TARGET} PUBLIC -DCELERITY_SIMSYCL=1) - target_link_options(${ADD_SYCL_TARGET} PUBLIC -DCELERITY_SIMSYCL=1) endfunction() endif() diff --git a/examples/matmul/matmul.cc b/examples/matmul/matmul.cc index d605708d1..070133991 100644 --- a/examples/matmul/matmul.cc +++ b/examples/matmul/matmul.cc @@ -2,7 +2,7 @@ #include -#if !defined(NDEBUG) || CELERITY_SIMSYCL +#if !defined(NDEBUG) || CELERITY_SYCL_IS_SIMSYCL const size_t MAT_SIZE = 128; #else const size_t MAT_SIZE = 1024; diff --git a/include/version.h.in b/include/version.h.in index 112f5dcc3..e8f2fe7b6 100644 --- a/include/version.h.in +++ b/include/version.h.in @@ -1,5 +1,9 @@ #pragma once +#cmakedefine01 CELERITY_SYCL_IS_ACPP +#cmakedefine01 CELERITY_SYCL_IS_DPCPP +#cmakedefine01 CELERITY_SYCL_IS_SIMSYCL + // CELERITY_DETAIL_ENABLE_DEBUG is specified on the command line #cmakedefine01 CELERITY_USE_MIMALLOC #cmakedefine01 CELERITY_DETAIL_HAS_NAMED_THREADS diff --git a/include/workaround.h b/include/workaround.h index 061df6159..b30bbd2e6 100644 --- a/include/workaround.h +++ b/include/workaround.h @@ -6,13 +6,13 @@ #include -#if defined(CELERITY_DPCPP) +#if CELERITY_SYCL_IS_DPCPP #define CELERITY_WORKAROUND_DPCPP 1 #else #define CELERITY_WORKAROUND_DPCPP 0 #endif -#if defined(__HIPSYCL__) +#if CELERITY_SYCL_IS_ACPP #define CELERITY_WORKAROUND_ACPP 1 #define CELERITY_WORKAROUND_VERSION_MAJOR HIPSYCL_VERSION_MAJOR #define CELERITY_WORKAROUND_VERSION_MINOR HIPSYCL_VERSION_MINOR @@ -21,7 +21,7 @@ #define CELERITY_WORKAROUND_ACPP 0 #endif -#if defined(CELERITY_SIMSYCL) +#if CELERITY_SYCL_IS_SIMSYCL #define CELERITY_WORKAROUND_SIMSYCL 1 #else #define CELERITY_WORKAROUND_SIMSYCL 0 diff --git a/src/backend/sycl_cuda_backend.cc b/src/backend/sycl_cuda_backend.cc index b0d1ae208..b11f5d0aa 100644 --- a/src/backend/sycl_cuda_backend.cc +++ b/src/backend/sycl_cuda_backend.cc @@ -8,8 +8,7 @@ #include "system_info.h" #include "tracy.h" #include "utils.h" -#include "workaround.h" - +#include "version.h" #define CELERITY_STRINGIFY2(f) #f #define CELERITY_STRINGIFY(f) CELERITY_STRINGIFY2(f) @@ -75,7 +74,7 @@ void nd_copy_device_async(cudaStream_t stream, const void* const source_base, vo // - There are no real thread-safety guarantees. DPC++ currently does not submit kernels from background threads, but if it ever starts doing so, this will // break more-or-less silently. // There is an open GitHub issue on the matter: https://github.com/intel/llvm/issues/13706 -#if defined(CELERITY_DPCPP) +#if CELERITY_SYCL_IS_DPCPP struct cuda_native_event_deleter { void operator()(const cudaEvent_t evt) const { CELERITY_CUDA_CHECK(cudaEventDestroy, evt); } @@ -117,7 +116,7 @@ class cuda_event final : public async_event_impl { unique_cuda_native_event m_after; }; -#endif // defined(CELERITY_DPCPP) +#endif // CELERITY_SYCL_IS_DPCPP bool can_enable_peer_access(const int id_device, const int id_peer) { // RTX 30xx and 40xx GPUs do not support peer access, but Nvidia Driver < 550 incorrectly reports that it does, causing kernel panics when enabling it @@ -151,7 +150,7 @@ namespace celerity::detail::sycl_backend_detail { async_event nd_copy_device_cuda(sycl::queue& queue, const void* const source_base, void* const dest_base, const box<3>& source_box, const box<3>& dest_box, const region<3>& copy_region, const size_t elem_size, bool enable_profiling) // { -#if defined(__HIPSYCL__) +#if CELERITY_SYCL_IS_ACPP // AdaptiveCpp provides first-class custom backend op submission without a host round-trip like sycl::queue::host_task would require. auto event = queue.AdaptiveCpp_enqueue_custom_operation([=](sycl::interop_handle handle) { const auto stream = handle.get_native_queue(); @@ -159,7 +158,7 @@ async_event nd_copy_device_cuda(sycl::queue& queue, const void* const source_bas }); sycl_backend_detail::flush(queue); return make_async_event(std::move(event), enable_profiling); -#elif defined(CELERITY_DPCPP) +#elif CELERITY_SYCL_IS_DPCPP // With DPC++, we must submit from the executor thread - see the comment on cuda_native_event above. const auto stream = sycl::get_native(queue); auto before = enable_profiling ? cuda_backend_detail::record_native_event(stream, enable_profiling) : nullptr; @@ -171,7 +170,7 @@ async_event nd_copy_device_cuda(sycl::queue& queue, const void* const source_bas #endif } -#if defined(CELERITY_DPCPP) +#if CELERITY_SYCL_IS_DPCPP constexpr sycl::backend sycl_cuda_backend = sycl::backend::ext_oneapi_cuda; #else constexpr sycl::backend sycl_cuda_backend = sycl::backend::cuda; diff --git a/src/runtime.cc b/src/runtime.cc index b4a73a6f1..400653743 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -96,11 +96,11 @@ namespace detail { } static std::string get_sycl_version() { -#if defined(__HIPSYCL__) || defined(__HIPSYCL_TRANSFORM__) +#if CELERITY_SYCL_IS_ACPP return fmt::format("AdaptiveCpp {}.{}.{}", HIPSYCL_VERSION_MAJOR, HIPSYCL_VERSION_MINOR, HIPSYCL_VERSION_PATCH); -#elif CELERITY_DPCPP +#elif CELERITY_SYCL_IS_DPCPP return "DPC++ / Clang " __clang_version__; -#elif CELERITY_SIMSYCL +#elif CELERITY_SYCL_IS_SIMSYCL return "SimSYCL " SIMSYCL_VERSION; #else #error "unknown SYCL implementation"