Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move preprocessor definitions to configuration header #277

Merged
merged 2 commits into from
Aug 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ See our [platform support guide](docs/platform-support.md) for a complete list o
- Add support for SimSYCL as a SYCL implementation (#238)
- Extend compiler support to GCC (optionally with sanitizers) and C++20 code bases (#238)
- Add support for profiling with [Tracy](https://github.com/wolfpld/tracy), via `CELERITY_TRACY_SUPPORT` and environment variable `CELERITY_TRACY` (#267)
- The active SYCL implementation can now be queried via `CELERITY_SYCL_IS_*` macros (#??)

### Changed

Expand Down
41 changes: 22 additions & 19 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -190,11 +190,14 @@ if(CELERITY_TRACY_SUPPORT)
fetch_content_from_submodule(Tracy vendor/tracy)
endif()

configure_file(include/version.h.in include/version.h @ONLY)
# Deprecated feature flags
set(CELERITY_FEATURE_SCALAR_REDUCTIONS ON)
set(CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS ON)
set(CELERITY_FEATURE_LOCAL_ACCESSOR ON)
set(CELERITY_FEATURE_UNNAMED_KERNELS ON)

# Add includes to library so they show up in IDEs
file(GLOB_RECURSE INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/include/*.h")
list(APPEND INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/include/version.h")

if(CMAKE_GENERATOR STREQUAL "Ninja")
# Force colored warnings in Ninja's output, if the compiler has -fdiagnostics-color support.
Expand Down Expand Up @@ -238,6 +241,21 @@ elseif(UNIX)
set(SOURCES ${SOURCES} src/platform_specific/named_threads.unix.cc)
endif()

# Read by configure_file()
set(CELERITY_SYCL_IS_ACPP OFF)
set(CELERITY_SYCL_IS_DPCPP OFF)
set(CELERITY_SYCL_IS_SIMSYCL OFF)
if(CELERITY_SYCL_IMPL STREQUAL "AdaptiveCpp")
set(CELERITY_SYCL_IS_ACPP ON)
elseif(CELERITY_SYCL_IMPL STREQUAL "DPC++")
set(CELERITY_SYCL_IS_DPCPP ON)
elseif(CELERITY_SYCL_IMPL STREQUAL "SimSYCL")
set(CELERITY_SYCL_IS_SIMSYCL ON)
endif()

configure_file(include/version.h.in include/version.h @ONLY)
list(APPEND INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/include/version.h")

add_library(
celerity_runtime
STATIC
Expand Down Expand Up @@ -288,12 +306,6 @@ target_link_libraries(celerity_runtime PUBLIC
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/backend)
target_link_libraries(celerity_runtime PUBLIC celerity_backends)

# Deprecated feature flags
set(CELERITY_FEATURE_SCALAR_REDUCTIONS ON)
set(CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS ON)
set(CELERITY_FEATURE_LOCAL_ACCESSOR ON)
set(CELERITY_FEATURE_UNNAMED_KERNELS ON)

# For debug builds, we set the CELERITY_DETAIL_ENABLE_DEBUG preprocessor flag,
# which allows Celerity to control debug functionality within headers regardless
# of a user target's build type. (This flag is not intended to be modified by
Expand All @@ -308,17 +320,8 @@ target_compile_definitions(celerity_runtime PUBLIC
# We still mark this as PUBLIC during builds (but not installation),
# so that the examples and tests receive the correct flag as well.
$<BUILD_INTERFACE:
$<$<CONFIG:Debug>:CELERITY_DETAIL_ENABLE_DEBUG>
CELERITY_DETAIL_ENABLE_DEBUG=$<CONFIG:Debug>
>
CELERITY_USE_MIMALLOC=$<BOOL:${CELERITY_USE_MIMALLOC}>
CELERITY_FEATURE_SCALAR_REDUCTIONS=$<BOOL:${CELERITY_FEATURE_SCALAR_REDUCTIONS}>
CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS=$<BOOL:${CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS}>
CELERITY_FEATURE_LOCAL_ACCESSOR=$<BOOL:${CELERITY_FEATURE_LOCAL_ACCESSOR}>
CELERITY_FEATURE_UNNAMED_KERNELS=$<BOOL:${CELERITY_FEATURE_UNNAMED_KERNELS}>
CELERITY_DETAIL_HAS_NAMED_THREADS=$<BOOL:${CELERITY_DETAIL_HAS_NAMED_THREADS}>
CELERITY_ACCESSOR_BOUNDARY_CHECK=$<BOOL:${CELERITY_ACCESSOR_BOUNDARY_CHECK}>
CELERITY_ACCESS_PATTERN_DIAGNOSTICS=$<BOOL:${CELERITY_ACCESS_PATTERN_DIAGNOSTICS}>
CELERITY_TRACY_SUPPORT=$<BOOL:${CELERITY_TRACY_SUPPORT}>
)

# Collect version information from git in src/version.cc. This target is always out of date, but the timestamp
Expand Down Expand Up @@ -346,7 +349,7 @@ add_sycl_to_target(
if(MSVC)
target_compile_options(celerity_runtime PRIVATE /MP /W3)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|AppleClang")
target_compile_options(celerity_runtime PRIVATE -Wall -Wextra -Wno-unused-parameter -Werror=return-type -Werror=init-self)
target_compile_options(celerity_runtime PRIVATE -Wall -Wextra -Wno-unused-parameter -Werror=return-type -Werror=init-self -Werror=undef)
endif()

if(CMAKE_COMPILER_ID STREQUAL "GNU")
Expand Down
3 changes: 0 additions & 3 deletions cmake/AddToTarget.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ if(CELERITY_SYCL_IMPL STREQUAL "DPC++")
-fsycl
-sycl-std=2020
"-fsycl-targets=${CELERITY_DPCPP_TARGETS}"
-DCELERITY_DPCPP=1
-Wno-sycl-strict # -Wsycl-strict produces false-positive warnings in DPC++'s own SYCL headers as of 2022-10-06
)
target_compile_options(${ADD_SYCL_TARGET} PUBLIC ${DPCPP_FLAGS})
Expand All @@ -30,8 +29,6 @@ elseif(CELERITY_SYCL_IMPL STREQUAL "SimSYCL")
"${multi_value_args}"
${ARGN}
)
target_compile_options(${ADD_SYCL_TARGET} PUBLIC -DCELERITY_SIMSYCL=1)
target_link_options(${ADD_SYCL_TARGET} PUBLIC -DCELERITY_SIMSYCL=1)
endfunction()
endif()

Expand Down
2 changes: 1 addition & 1 deletion examples/matmul/matmul.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include <celerity.h>

#if !defined(NDEBUG) || CELERITY_SIMSYCL
#if !defined(NDEBUG) || CELERITY_SYCL_IS_SIMSYCL
const size_t MAT_SIZE = 128;
#else
const size_t MAT_SIZE = 1024;
Expand Down
1 change: 1 addition & 0 deletions include/accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "closure_hydrator.h"
#include "handler.h"
#include "sycl_wrappers.h"
#include "version.h"

namespace celerity {

Expand Down
1 change: 1 addition & 0 deletions include/closure_hydrator.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "ranges.h"
#include "sycl_wrappers.h"
#include "types.h"
#include "version.h"

namespace celerity::detail {

Expand Down
1 change: 1 addition & 0 deletions include/handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "ranges.h"
#include "task.h"
#include "types.h"
#include "version.h"
#include "workaround.h"

namespace celerity {
Expand Down
1 change: 1 addition & 0 deletions include/instruction_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "launcher.h"
#include "ranges.h"
#include "types.h"
#include "version.h"

#include <algorithm>
#include <cstdlib>
Expand Down
16 changes: 16 additions & 0 deletions include/version.h.in
PeterTh marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
#pragma once

#cmakedefine01 CELERITY_SYCL_IS_ACPP
#cmakedefine01 CELERITY_SYCL_IS_DPCPP
#cmakedefine01 CELERITY_SYCL_IS_SIMSYCL

// CELERITY_DETAIL_ENABLE_DEBUG is specified on the command line
PeterTh marked this conversation as resolved.
Show resolved Hide resolved
#cmakedefine01 CELERITY_USE_MIMALLOC
#cmakedefine01 CELERITY_DETAIL_HAS_NAMED_THREADS
#cmakedefine01 CELERITY_ACCESSOR_BOUNDARY_CHECK
#cmakedefine01 CELERITY_ACCESS_PATTERN_DIAGNOSTICS
#cmakedefine01 CELERITY_TRACY_SUPPORT

#cmakedefine01 CELERITY_FEATURE_SCALAR_REDUCTIONS
#cmakedefine01 CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS
#cmakedefine01 CELERITY_FEATURE_LOCAL_ACCESSOR
#cmakedefine01 CELERITY_FEATURE_UNNAMED_KERNELS

#define CELERITY_VERSION_MAJOR @CELERITY_VERSION_MAJOR@
#define CELERITY_VERSION_MINOR @CELERITY_VERSION_MINOR@
#define CELERITY_VERSION_PATCH @CELERITY_VERSION_PATCH@
Expand Down
8 changes: 5 additions & 3 deletions include/workaround.h
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
#pragma once

#include "version.h"

#include <cassert>

#include <sycl/sycl.hpp>

#if defined(CELERITY_DPCPP)
#if CELERITY_SYCL_IS_DPCPP
#define CELERITY_WORKAROUND_DPCPP 1
#else
#define CELERITY_WORKAROUND_DPCPP 0
#endif

#if defined(__HIPSYCL__)
#if CELERITY_SYCL_IS_ACPP
#define CELERITY_WORKAROUND_ACPP 1
#define CELERITY_WORKAROUND_VERSION_MAJOR HIPSYCL_VERSION_MAJOR
#define CELERITY_WORKAROUND_VERSION_MINOR HIPSYCL_VERSION_MINOR
Expand All @@ -19,7 +21,7 @@
#define CELERITY_WORKAROUND_ACPP 0
#endif

#if defined(CELERITY_SIMSYCL)
#if CELERITY_SYCL_IS_SIMSYCL
#define CELERITY_WORKAROUND_SIMSYCL 1
#else
#define CELERITY_WORKAROUND_SIMSYCL 0
Expand Down
13 changes: 6 additions & 7 deletions src/backend/sycl_cuda_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
#include "system_info.h"
#include "tracy.h"
#include "utils.h"
#include "workaround.h"

#include "version.h"

#define CELERITY_STRINGIFY2(f) #f
#define CELERITY_STRINGIFY(f) CELERITY_STRINGIFY2(f)
Expand Down Expand Up @@ -75,7 +74,7 @@ void nd_copy_device_async(cudaStream_t stream, const void* const source_base, vo
// - There are no real thread-safety guarantees. DPC++ currently does not submit kernels from background threads, but if it ever starts doing so, this will
// break more-or-less silently.
// There is an open GitHub issue on the matter: https://github.com/intel/llvm/issues/13706
#if defined(CELERITY_DPCPP)
#if CELERITY_SYCL_IS_DPCPP

struct cuda_native_event_deleter {
void operator()(const cudaEvent_t evt) const { CELERITY_CUDA_CHECK(cudaEventDestroy, evt); }
Expand Down Expand Up @@ -117,7 +116,7 @@ class cuda_event final : public async_event_impl {
unique_cuda_native_event m_after;
};

#endif // defined(CELERITY_DPCPP)
#endif // CELERITY_SYCL_IS_DPCPP

bool can_enable_peer_access(const int id_device, const int id_peer) {
// RTX 30xx and 40xx GPUs do not support peer access, but Nvidia Driver < 550 incorrectly reports that it does, causing kernel panics when enabling it
Expand Down Expand Up @@ -151,15 +150,15 @@ namespace celerity::detail::sycl_backend_detail {
async_event nd_copy_device_cuda(sycl::queue& queue, const void* const source_base, void* const dest_base, const box<3>& source_box, const box<3>& dest_box,
const region<3>& copy_region, const size_t elem_size, bool enable_profiling) //
{
#if defined(__HIPSYCL__)
#if CELERITY_SYCL_IS_ACPP
// AdaptiveCpp provides first-class custom backend op submission without a host round-trip like sycl::queue::host_task would require.
auto event = queue.AdaptiveCpp_enqueue_custom_operation([=](sycl::interop_handle handle) {
const auto stream = handle.get_native_queue<sycl::backend::cuda>();
cuda_backend_detail::nd_copy_device_async(stream, source_base, dest_base, source_box, dest_box, copy_region, elem_size);
});
sycl_backend_detail::flush(queue);
return make_async_event<sycl_event>(std::move(event), enable_profiling);
#elif defined(CELERITY_DPCPP)
#elif CELERITY_SYCL_IS_DPCPP
// With DPC++, we must submit from the executor thread - see the comment on cuda_native_event above.
const auto stream = sycl::get_native<sycl::backend::ext_oneapi_cuda>(queue);
auto before = enable_profiling ? cuda_backend_detail::record_native_event(stream, enable_profiling) : nullptr;
Expand All @@ -171,7 +170,7 @@ async_event nd_copy_device_cuda(sycl::queue& queue, const void* const source_bas
#endif
}

#if defined(CELERITY_DPCPP)
#if CELERITY_SYCL_IS_DPCPP
constexpr sycl::backend sycl_cuda_backend = sycl::backend::ext_oneapi_cuda;
#else
constexpr sycl::backend sycl_cuda_backend = sycl::backend::cuda;
Expand Down
2 changes: 1 addition & 1 deletion src/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ namespace detail {
if(parsed_and_validated_envs.ok()) {
// ------------------------------- CELERITY_LOG_LEVEL ---------------------------------

#if defined(CELERITY_DETAIL_ENABLE_DEBUG)
#if CELERITY_DETAIL_ENABLE_DEBUG
m_log_lvl = parsed_and_validated_envs.get_or(env_log_level, log_level::debug);
#else
m_log_lvl = parsed_and_validated_envs.get_or(env_log_level, log_level::info);
Expand Down
1 change: 1 addition & 0 deletions src/live_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "tracy.h"
#include "types.h"
#include "utils.h"
#include "version.h"

#include <deque>
#include <memory>
Expand Down
1 change: 1 addition & 0 deletions src/platform_specific/named_threads.unix.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "named_threads.h"
#include "version.h"

#include <cassert>
#include <type_traits>
Expand Down
8 changes: 4 additions & 4 deletions src/runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ namespace detail {
}

static const char* get_build_type() {
#if defined(CELERITY_DETAIL_ENABLE_DEBUG)
#if CELERITY_DETAIL_ENABLE_DEBUG
return "debug";
#else
return "release";
Expand All @@ -96,11 +96,11 @@ namespace detail {
}

static std::string get_sycl_version() {
#if defined(__HIPSYCL__) || defined(__HIPSYCL_TRANSFORM__)
#if CELERITY_SYCL_IS_ACPP
return fmt::format("AdaptiveCpp {}.{}.{}", HIPSYCL_VERSION_MAJOR, HIPSYCL_VERSION_MINOR, HIPSYCL_VERSION_PATCH);
#elif CELERITY_DPCPP
#elif CELERITY_SYCL_IS_DPCPP
return "DPC++ / Clang " __clang_version__;
#elif CELERITY_SIMSYCL
#elif CELERITY_SYCL_IS_SIMSYCL
return "SimSYCL " SIMSYCL_VERSION;
#else
#error "unknown SYCL implementation"
Expand Down
2 changes: 1 addition & 1 deletion test/backend_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ TEST_CASE("backend allocations are properly aligned", "[backend]") {
TEST_CASE("backend allocations are pattern-filled in debug builds", "[backend]") {
test_utils::allow_backend_fallback_warnings();

#if defined(CELERITY_DETAIL_ENABLE_DEBUG)
#if CELERITY_DETAIL_ENABLE_DEBUG
const auto [backend_type, backend, sycl_devices] = generate_backends_with_devices();
CAPTURE(backend_type, sycl_devices);

Expand Down
Loading