diff --git a/CMakeLists.txt b/CMakeLists.txt index 73baecc34..8dc8bd447 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,6 @@ else() set(ENABLE_ACC_CHECK OFF) endif() -option(CELERITY_USE_MIMALLOC "Use the mimalloc memory allocator" ON) option(CELERITY_ACCESSOR_BOUNDARY_CHECK "Enable accessor boundary check" ${ENABLE_ACC_CHECK}) if(CELERITY_ACCESSOR_BOUNDARY_CHECK) @@ -119,6 +118,15 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake Build Type" FORCE) endif() +if(CELERITY_SYCL_IMPL STREQUAL "DPC++") + # See https://github.com/oneapi-src/unified-runtime/issues/803 + message(STATUS "Not enabling mimalloc by default because it breaks with oneAPI plugin loading") + set(CELERITY_USE_MIMALLOC_DEFAULT OFF) +else() + set(CELERITY_USE_MIMALLOC_DEFAULT ON) +endif() +option(CELERITY_USE_MIMALLOC "Use the mimalloc memory allocator" ${CELERITY_USE_MIMALLOC_DEFAULT}) + # 3rdparty dependencies include(FetchContent) diff --git a/cmake/celerity-config.cmake.in b/cmake/celerity-config.cmake.in index d8d784f19..9d7f6d9e1 100644 --- a/cmake/celerity-config.cmake.in +++ b/cmake/celerity-config.cmake.in @@ -23,7 +23,9 @@ find_dependency(fmt REQUIRED) find_dependency(spdlog REQUIRED) find_dependency(small_vector REQUIRED) find_dependency(libenvpp REQUIRED) -find_dependency(mimalloc REQUIRED) +if(@CELERITY_USE_MIMALLOC@) + find_dependency(mimalloc REQUIRED) +endif() if(@CELERITY_ENABLE_CUDA_BACKEND@) if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17") find_dependency(CUDAToolkit REQUIRED) diff --git a/src/worker_job.cc b/src/worker_job.cc index c30295ae9..9ef1abcd2 100644 --- a/src/worker_job.cc +++ b/src/worker_job.cc @@ -217,7 +217,7 @@ namespace detail { try { const auto info = m_buffer_mngr.access_device_buffer(bid, mode, sr); #if CELERITY_ACCESSOR_BOUNDARY_CHECK - auto* const oob_idx = sycl::malloc_shared>(2, m_queue.get_sycl_queue()); + auto* const oob_idx = sycl::malloc_host>(2, m_queue.get_sycl_queue()); assert(oob_idx != nullptr); constexpr size_t size_t_max = std::numeric_limits::max(); const auto buffer_dims = m_buffer_mngr.get_buffer_info(bid).dimensions; diff --git a/test/backend_tests.cc b/test/backend_tests.cc index 902801ea1..d6d899410 100644 --- a/test/backend_tests.cc +++ b/test/backend_tests.cc @@ -124,7 +124,14 @@ TEMPLATE_TEST_CASE_SIG("memcpy_strided_device allows to copy between the same de const size_t platform_id = GENERATE(Catch::Generators::range(size_t(0), sycl::platform::get_platforms().size())); const auto test_type = GENERATE(copy_test_type::intra_device, copy_test_type::inter_device, copy_test_type::host_to_device, copy_test_type::device_to_host); CAPTURE(platform_id, test_type); + const auto platform = sycl::platform::get_platforms()[platform_id]; + CAPTURE(platform.get_info()); + + if(platform.get_info().substr(0, 5) == "Intel" && test_type == copy_test_type::inter_device) { + SKIP("Inter-GPU copy appears to currently be broken on Intel OpenCL / Level Zero"); + } + auto [src, tgt] = ([&] { try { return select_source_and_target(test_type, platform);