From 3f194baea973b9880ee5df30c6798e4cd5beb042 Mon Sep 17 00:00:00 2001 From: Anurag Date: Mon, 14 Mar 2022 15:58:41 +0100 Subject: [PATCH 1/3] Add tests for existing device selection logic --- include/config.h | 1 + include/device_queue.h | 72 +++++- src/config.cc | 1 - src/device_queue.cc | 73 +----- test/CMakeLists.txt | 1 + test/device_selection_tests.cc | 390 +++++++++++++++++++++++++++++++++ 6 files changed, 464 insertions(+), 74 deletions(-) create mode 100644 test/device_selection_tests.cc diff --git a/include/config.h b/include/config.h index b26f582b5..472d34f04 100644 --- a/include/config.h +++ b/include/config.h @@ -48,6 +48,7 @@ namespace detail { std::optional device_cfg; std::optional enable_device_profiling; size_t graph_print_max_verts = 200; + friend struct config_testspy; }; } // namespace detail diff --git a/include/device_queue.h b/include/device_queue.h index 18a0221f9..c77df9c7b 100644 --- a/include/device_queue.h +++ b/include/device_queue.h @@ -59,9 +59,79 @@ namespace detail { std::unique_ptr sycl_queue; bool device_profiling_enabled = false; - cl::sycl::device pick_device(const config& cfg, cl::sycl::device* user_device) const; void handle_async_exceptions(cl::sycl::exception_list el) const; }; + template + DeviceT pick_device(const config& cfg, DeviceT* user_device, const std::vector& platforms) { + DeviceT device; + std::string how_selected = "automatically selected"; + if(user_device != nullptr) { + device = *user_device; + how_selected = "specified by user"; + } else { + const auto device_cfg = cfg.get_device_config(); + if(device_cfg != std::nullopt) { + how_selected = fmt::format("set by CELERITY_DEVICES: platform {}, device {}", device_cfg->platform_id, device_cfg->device_id); + CELERITY_DEBUG("{} platforms available", platforms.size()); + if(device_cfg->platform_id >= platforms.size()) { + throw std::runtime_error(fmt::format("Invalid platform id {}: Only {} platforms available", device_cfg->platform_id, platforms.size())); + } + const auto devices = platforms[device_cfg->platform_id].get_devices(); + if(device_cfg->device_id >= devices.size()) { + throw std::runtime_error(fmt::format( + "Invalid device id {}: Only {} devices available on platform {}", device_cfg->device_id, devices.size(), device_cfg->platform_id)); + } + device = devices[device_cfg->device_id]; + } else { + const auto host_cfg = cfg.get_host_config(); + + const auto try_find_device_per_node = [&host_cfg, &device, &how_selected, &platforms](cl::sycl::info::device_type type) { + // Try to find a platform that can provide a unique device for each node. + for(size_t i = 0; i < platforms.size(); ++i) { + auto&& platform = platforms[i]; + const auto devices = platform.get_devices(type); + if(devices.size() >= host_cfg.node_count) { + how_selected = fmt::format("automatically selected platform {}, device {}", i, host_cfg.local_rank); + device = devices[host_cfg.local_rank]; + return true; + } + } + return false; + }; + + const auto try_find_one_device = [&device, &platforms](cl::sycl::info::device_type type) { + for(auto& p : platforms) { + for(auto& d : p.get_devices(type)) { + device = d; + return true; + } + } + return false; + }; + + // Try to find a unique GPU per node. + if(!try_find_device_per_node(cl::sycl::info::device_type::gpu)) { + // Try to find a unique device (of any type) per node. + if(try_find_device_per_node(cl::sycl::info::device_type::all)) { + CELERITY_WARN("No suitable platform found that can provide {} GPU devices, and CELERITY_DEVICES not set", host_cfg.node_count); + } else { + CELERITY_WARN("No suitable platform found that can provide {} devices, and CELERITY_DEVICES not set", host_cfg.node_count); + // Just use the first available device. Prefer GPUs, but settle for anything. + if(!try_find_one_device(cl::sycl::info::device_type::gpu) && !try_find_one_device(cl::sycl::info::device_type::all)) { + throw std::runtime_error("Automatic device selection failed: No device available"); + } + } + } + } + } + + const auto platform_name = device.get_platform().template get_info(); + const auto device_name = device.template get_info(); + CELERITY_INFO("Using platform '{}', device '{}' ({})", platform_name, device_name, how_selected); + + return device; + } + } // namespace detail } // namespace celerity diff --git a/src/config.cc b/src/config.cc index 384f926c0..4842a5695 100644 --- a/src/config.cc +++ b/src/config.cc @@ -192,6 +192,5 @@ namespace detail { } } } - } // namespace detail } // namespace celerity diff --git a/src/device_queue.cc b/src/device_queue.cc index 23f33794d..e8370f8ea 100644 --- a/src/device_queue.cc +++ b/src/device_queue.cc @@ -16,80 +16,10 @@ namespace detail { const auto props = device_profiling_enabled ? cl::sycl::property_list{cl::sycl::property::queue::enable_profiling()} : cl::sycl::property_list{}; const auto handle_exceptions = cl::sycl::async_handler{[this](cl::sycl::exception_list el) { this->handle_async_exceptions(el); }}; - auto device = pick_device(cfg, user_device); + auto device = pick_device(cfg, user_device, cl::sycl::platform::get_platforms()); sycl_queue = std::make_unique(device, handle_exceptions, props); } - cl::sycl::device device_queue::pick_device(const config& cfg, cl::sycl::device* user_device) const { - cl::sycl::device device; - std::string how_selected = "automatically selected"; - if(user_device != nullptr) { - device = *user_device; - how_selected = "specified by user"; - } else { - const auto device_cfg = cfg.get_device_config(); - if(device_cfg != std::nullopt) { - how_selected = fmt::format("set by CELERITY_DEVICES: platform {}, device {}", device_cfg->platform_id, device_cfg->device_id); - const auto platforms = cl::sycl::platform::get_platforms(); - CELERITY_DEBUG("{} platforms available", platforms.size()); - if(device_cfg->platform_id >= platforms.size()) { - throw std::runtime_error(fmt::format("Invalid platform id {}: Only {} platforms available", device_cfg->platform_id, platforms.size())); - } - const auto devices = platforms[device_cfg->platform_id].get_devices(); - if(device_cfg->device_id >= devices.size()) { - throw std::runtime_error(fmt::format( - "Invalid device id {}: Only {} devices available on platform {}", device_cfg->device_id, devices.size(), device_cfg->platform_id)); - } - device = devices[device_cfg->device_id]; - } else { - const auto host_cfg = cfg.get_host_config(); - - const auto try_find_device_per_node = [&host_cfg, &device, &how_selected](cl::sycl::info::device_type type) { - // Try to find a platform that can provide a unique device for each node. - const auto platforms = cl::sycl::platform::get_platforms(); - for(size_t i = 0; i < platforms.size(); ++i) { - auto&& platform = platforms[i]; - const auto devices = platform.get_devices(type); - if(devices.size() >= host_cfg.node_count) { - how_selected = fmt::format("automatically selected platform {}, device {}", i, host_cfg.local_rank); - device = devices[host_cfg.local_rank]; - return true; - } - } - return false; - }; - - const auto try_find_one_device = [&device](cl::sycl::info::device_type type) { - const auto devices = cl::sycl::device::get_devices(type); - if(!devices.empty()) { - device = devices[0]; - return true; - } - return false; - }; - - // Try to find a unique GPU per node. - if(!try_find_device_per_node(cl::sycl::info::device_type::gpu)) { - // Try to find a unique device (of any type) per node. - if(try_find_device_per_node(cl::sycl::info::device_type::all)) { - CELERITY_WARN("No suitable platform found that can provide {} GPU devices, and CELERITY_DEVICES not set", host_cfg.node_count); - } else { - CELERITY_WARN("No suitable platform found that can provide {} devices, and CELERITY_DEVICES not set", host_cfg.node_count); - // Just use the first available device. Prefer GPUs, but settle for anything. - if(!try_find_one_device(cl::sycl::info::device_type::gpu) && !try_find_one_device(cl::sycl::info::device_type::all)) { - throw std::runtime_error("Automatic device selection failed: No device available"); - } - } - } - } - } - - const auto platform_name = device.get_platform().get_info(); - const auto device_name = device.get_info(); - CELERITY_INFO("Using platform '{}', device '{}' ({})", platform_name, device_name, how_selected); - - return device; - } void device_queue::handle_async_exceptions(cl::sycl::exception_list el) const { for(auto& e : el) { @@ -102,6 +32,5 @@ namespace detail { } } - } // namespace detail } // namespace celerity diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 423716975..a577b503b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -37,6 +37,7 @@ set(TEST_TARGETS runtime_deprecation_tests sycl_tests task_graph_tests + device_selection_tests ) add_library(test_main test_main.cc) diff --git a/test/device_selection_tests.cc b/test/device_selection_tests.cc new file mode 100644 index 000000000..d9ac09538 --- /dev/null +++ b/test/device_selection_tests.cc @@ -0,0 +1,390 @@ +#include "catch2/catch_test_macros.hpp" +#include "catch2/generators/catch_generators.hpp" +#include "catch2/matchers/catch_matchers_string.hpp" +#include "spdlog/sinks/ostream_sink.h" +#include "test_utils.h" +#include + +struct mock_platform; +struct mock_device { + mock_device() : id(0), type(cl::sycl::info::device_type::gpu) {} + + mock_device(size_t id, cl::sycl::info::device_type type = cl::sycl::info::device_type::gpu) : id(id), type(type) {} + + mock_platform get_platform() const; + + template + std::string get_info() const { + return "bar"; + } + + bool operator==(const mock_device& other) const { return other.id == id; } + + cl::sycl::info::device_type get_type() const { return type; } + + size_t get_id() { return id; } + + private: + size_t id; + cl::sycl::info::device_type type; +}; +struct mock_platform { + // TODO: These devices should somehow have this platform as their platform (?) + mock_platform(size_t id, std::vector devices) : devices(std::move(devices)), id(id) {} + + std::vector get_devices(cl::sycl::info::device_type type = cl::sycl::info::device_type::all) const { + if(type != cl::sycl::info::device_type::all) { + std::vector devices_with_type; + for(auto device : devices) { + if(device.get_type() == type) { devices_with_type.emplace_back(device); } + } + return devices_with_type; + } else + return devices; + } + + template + std::string get_info() const { + return "foo"; + } + + size_t get_id() { return id; } + + private: + std::vector devices; + size_t id; +}; + +// TODO: Device should know its associated platform and return it from here +mock_platform mock_device::get_platform() const { + return {15, {}}; // Setting random platform ID for now +} + + +namespace celerity::detail { +struct config_testspy { + static void set_mock_device_cfg(config& cfg, const device_config& d_cfg) { cfg.device_cfg = d_cfg; } + static void set_mock_host_cfg(config& cfg, const host_config& h_cfg) { cfg.host_cfg = h_cfg; } +}; +} // namespace celerity::detail + +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prefers user specified device pointer", "[device-selection][one]") { + celerity::detail::config cfg(nullptr, nullptr); + + mock_device td(42); + mock_platform tp(68, {{5}, {7}, {9}}); + + auto device = pick_device(cfg, &td, std::vector{tp}); + CHECK(device == td); +} + +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, + "pick_device automatically selects a gpu device if available and otherwise falls back to the first device available", "[device-selection][gen]") { + celerity::detail::config cfg(nullptr, nullptr); + + mock_device* td = nullptr; + using device_t = cl::sycl::info::device_type; + + auto dv_type_1 = GENERATE(as(), device_t::gpu, device_t::accelerator, device_t::cpu, device_t::custom, device_t::host); + CAPTURE(dv_type_1); + + mock_device td_1(0, dv_type_1); + mock_platform tp_1(0, {td_1}); + + auto dv_type_2 = GENERATE(as(), device_t::gpu, device_t::accelerator, device_t::cpu, device_t::custom, device_t::host); + CAPTURE(dv_type_2); + + mock_device td_2(0, dv_type_2); + mock_platform tp_2(1, {td_2}); + + auto device = pick_device(cfg, td, std::vector{tp_1, tp_2}); + std::vector devices; + if(dv_type_1 == device_t::gpu || (dv_type_1 != device_t::gpu && dv_type_2 != device_t::gpu)) { + devices = tp_1.get_devices(); + } else { + devices = tp_2.get_devices(); + } + CHECK(device == devices[0]); +} + +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device selects device using CELERITY_DEVICES", "[device-selection][device-cfg]") { + celerity::detail::config cfg(nullptr, nullptr); + mock_device* td = nullptr; + + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(0, cl::sycl::info::device_type::gpu); + mock_device td_3(1, cl::sycl::info::device_type::gpu); + mock_platform tp_1(1, {td_2, td_3}); + + celerity::detail::device_config d_cfg{tp_1.get_id(), td_3.get_id()}; + celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); + + auto device = pick_device(cfg, td, std::vector{tp_0, tp_1}); + std::vector devices = tp_1.get_devices(); + CHECK(device == devices[1]); +} + +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, + "pick_device selects a GPU for each local_rank or falls back to any type of sufficient device for all ranks", "[device-selection][host-cfg]") { + celerity::detail::config cfg(nullptr, nullptr); + mock_device* td = nullptr; + + SECTION("pick_device unique GPU per node") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(0, cl::sycl::info::device_type::gpu); + mock_device td_3(1, cl::sycl::info::device_type::gpu); + mock_device td_4(2, cl::sycl::info::device_type::gpu); + mock_device td_5(3, cl::sycl::info::device_type::gpu); + mock_platform tp_1(1, {td_2, td_3, td_4, td_5}); + + size_t node_count = 4; + size_t local_rank = 2; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device = pick_device(cfg, td, std::vector{tp_0, tp_1}); + std::vector devices = tp_1.get_devices(); + CHECK(device == devices[2]); + } + + SECTION("pick_device prefers unique GPU over other devices") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(0, cl::sycl::info::device_type::gpu); + mock_device td_3(1, cl::sycl::info::device_type::gpu); + mock_device td_4(2, cl::sycl::info::device_type::gpu); + mock_device td_5(3, cl::sycl::info::device_type::gpu); + mock_platform tp_1(1, {td_2, td_3, td_4, td_5}); + + mock_device td_6(0, cl::sycl::info::device_type::accelerator); + mock_device td_7(1, cl::sycl::info::device_type::accelerator); + mock_device td_8(2, cl::sycl::info::device_type::accelerator); + mock_device td_9(3, cl::sycl::info::device_type::accelerator); + mock_platform tp_2(1, {td_6, td_7, td_8, td_9}); + + size_t node_count = 4; + size_t local_rank = 3; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device = pick_device(cfg, td, std::vector{tp_0, tp_1, tp_2}); + std::vector devices = tp_1.get_devices(); + CHECK(device == devices[3]); + } + + SECTION("pick_device falls back to other devices with insufficient GPUs") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(0, cl::sycl::info::device_type::gpu); + mock_device td_3(1, cl::sycl::info::device_type::gpu); + mock_device td_4(2, cl::sycl::info::device_type::gpu); + mock_platform tp_1(1, {td_2, td_3, td_4}); + + mock_device td_5(0, cl::sycl::info::device_type::accelerator); + mock_device td_6(1, cl::sycl::info::device_type::accelerator); + mock_device td_7(2, cl::sycl::info::device_type::accelerator); + mock_device td_8(3, cl::sycl::info::device_type::accelerator); + mock_platform tp_2(1, {td_5, td_6, td_7, td_8}); + + size_t node_count = 4; + size_t local_rank = 3; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device = pick_device(cfg, td, std::vector{tp_0, tp_1, tp_2}); + std::vector devices = tp_2.get_devices(); + CHECK(device == devices[3]); + } + + SECTION("pick_device prefers the first available GPU with insufficient GPUs and other devices") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(0, cl::sycl::info::device_type::gpu); + mock_platform tp_1(1, {td_2}); + + mock_device td_5(0, cl::sycl::info::device_type::accelerator); + mock_device td_6(1, cl::sycl::info::device_type::accelerator); + mock_device td_7(2, cl::sycl::info::device_type::accelerator); + mock_platform tp_2(1, {td_5, td_6, td_7}); + + size_t node_count = 4; + size_t local_rank = 3; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device = pick_device(cfg, td, std::vector{tp_0, tp_1, tp_2}); + std::vector devices = tp_1.get_devices(); + CHECK(device == devices[0]); + } + + SECTION("pick_device prefers the first available device(any) with no GPUs") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_5(0, cl::sycl::info::device_type::accelerator); + mock_device td_6(1, cl::sycl::info::device_type::accelerator); + mock_device td_7(2, cl::sycl::info::device_type::accelerator); + mock_platform tp_2(1, {td_5, td_6, td_7}); + + size_t node_count = 4; + size_t local_rank = 3; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device = pick_device(cfg, td, std::vector{tp_0, tp_2}); + std::vector devices = tp_2.get_devices(); + CHECK(device == devices[0]); + } +} + +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prints expected info/warn messages", "[device-selection][msg]") { + std::ostringstream oss; + auto logger = spdlog::default_logger(); + auto ostream_info_sink = std::make_shared(oss); + ostream_info_sink->set_level(spdlog::level::info); + logger->sinks().push_back(ostream_info_sink); + + celerity::detail::config cfg(nullptr, nullptr); + SECTION("device_pointer is specified by the user") { + mock_device td(42); + mock_platform tp(68, {{5}, {7}, {9}}); + + auto device = pick_device(cfg, &td, std::vector{tp}); + CHECK_THAT(oss.str(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (specified by user)")); + oss.str(""); + } + + mock_device* td = nullptr; + SECTION("CELERITY_DEVICE is set by the user") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(0, cl::sycl::info::device_type::gpu); + mock_device td_3(1, cl::sycl::info::device_type::gpu); + mock_platform tp_1(1, {td_2, td_3}); + + celerity::detail::device_config d_cfg{td_3.get_id(), tp_1.get_id()}; + celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); + + auto device = pick_device(cfg, td, std::vector{tp_0, tp_1}); + CHECK_THAT(oss.str(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (set by CELERITY_DEVICES: platform 1, device 1)")); + oss.str(""); + } + + + SECTION("pick_device selects a gpu/any per node automaticaly") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(0, cl::sycl::info::device_type::gpu); + mock_device td_3(1, cl::sycl::info::device_type::gpu); + mock_platform tp_1(1, {td_2, td_3}); + + auto device = pick_device(cfg, td, std::vector{tp_0, tp_1}); + CHECK_THAT(oss.str(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (automatically selected platform 1, device 0)")); + oss.str(""); + } + + std::ostringstream _oss; + auto ostream_warn_sink = std::make_shared(_oss); + ostream_warn_sink->set_level(spdlog::level::warn); + logger->sinks().push_back(ostream_warn_sink); + SECTION("pick_device can't find any platform with sufficient GPUs") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(0, cl::sycl::info::device_type::gpu); + mock_device td_3(1, cl::sycl::info::device_type::gpu); + mock_device td_4(2, cl::sycl::info::device_type::gpu); + mock_platform tp_1(1, {td_2, td_3, td_4}); + + mock_device td_5(0, cl::sycl::info::device_type::accelerator); + mock_device td_6(1, cl::sycl::info::device_type::accelerator); + mock_device td_7(2, cl::sycl::info::device_type::accelerator); + mock_device td_8(3, cl::sycl::info::device_type::accelerator); + mock_platform tp_2(1, {td_5, td_6, td_7, td_8}); + + size_t node_count = 4; + size_t local_rank = 3; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device = pick_device(cfg, td, std::vector{tp_0, tp_1, tp_2}); + CHECK_THAT(_oss.str(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 GPU devices, and CELERITY_DEVICES not set")); + _oss.str(""); + } + + SECTION("pick_device can't find any platform with any type of sufficient device") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(0, cl::sycl::info::device_type::gpu); + mock_platform tp_1(1, {td_2}); + + mock_device td_5(0, cl::sycl::info::device_type::accelerator); + mock_device td_6(1, cl::sycl::info::device_type::accelerator); + mock_device td_7(2, cl::sycl::info::device_type::accelerator); + mock_platform tp_2(1, {td_5, td_6, td_7}); + + size_t node_count = 4; + size_t local_rank = 3; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device = pick_device(cfg, td, std::vector{tp_0, tp_1, tp_2}); + CHECK_THAT(_oss.str(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 devices, and CELERITY_DEVICES not set")); + _oss.str(""); + } + + SECTION("CELERITY_DEVICE is set with invalid platform id") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(0, cl::sycl::info::device_type::gpu); + mock_device td_3(1, cl::sycl::info::device_type::gpu); + mock_platform tp_1(3, {td_2, td_3}); + + celerity::detail::device_config d_cfg{tp_1.get_id(), td_3.get_id()}; + celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); + CHECK_THROWS_WITH(pick_device(cfg, td, std::vector{tp_0, tp_1}), "Invalid platform id 3: Only 2 platforms available"); + } + + SECTION("CELERITY_DEVICE is set with invalid device id") { + mock_device td_1(0, cl::sycl::info::device_type::cpu); + mock_platform tp_0(0, {td_1}); + + mock_device td_2(4, cl::sycl::info::device_type::gpu); + mock_device td_3(5, cl::sycl::info::device_type::gpu); + mock_platform tp_1(1, {td_2, td_3}); + + celerity::detail::device_config d_cfg{tp_1.get_id(), td_3.get_id()}; + celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); + + CHECK_THROWS_WITH(pick_device(cfg, td, std::vector{tp_0, tp_1}), "Invalid device id 5: Only 2 devices available on platform 1"); + } + + SECTION("pick_device couldn't find any device") { + CHECK_THROWS_WITH(pick_device(cfg, td, std::vector{}), "Automatic device selection failed: No device available"); + } +} \ No newline at end of file From 19a80dc443865e3046808d6ff9b2d87b5446d9c4 Mon Sep 17 00:00:00 2001 From: Anurag Date: Wed, 26 Jan 2022 11:07:47 +0100 Subject: [PATCH 2/3] Add support for passing device selector to distr_queue ctor ... and runtime::init --- include/celerity.h | 15 +- include/config.h | 3 +- include/device_queue.h | 172 +++++++-- include/distr_queue.h | 20 +- include/runtime.h | 7 +- src/device_queue.cc | 7 +- src/runtime.cc | 8 +- test/device_selection_tests.cc | 685 ++++++++++++++++++++++++--------- test/runtime_tests.cc | 5 +- test/sycl_tests.cc | 35 ++ test/test_utils.h | 16 +- 11 files changed, 730 insertions(+), 243 deletions(-) diff --git a/include/celerity.h b/include/celerity.h index 8e92236fd..199da6051 100644 --- a/include/celerity.h +++ b/include/celerity.h @@ -1,6 +1,7 @@ #ifndef RUNTIME_INCLUDE_ENTRY_CELERITY #define RUNTIME_INCLUDE_ENTRY_CELERITY +#include "device_queue.h" #include "runtime.h" #include "accessor.h" @@ -15,14 +16,24 @@ namespace runtime { /** * @brief Initializes the Celerity runtime. */ - inline void init(int* argc, char** argv[]) { detail::runtime::init(argc, argv, nullptr); } + inline void init(int* argc, char** argv[]) { detail::runtime::init(argc, argv, detail::auto_select_device{}); } /** * @brief Initializes the Celerity runtime and instructs it to use a particular device. * * @param device The device to be used on the current node. This can vary between nodes. */ - inline void init(int* argc, char** argv[], cl::sycl::device& device) { detail::runtime::init(argc, argv, &device); } + [[deprecated("Use the overload with device selector instead, this will be removed in future release")]] inline void init( + int* argc, char** argv[], sycl::device& device) { + detail::runtime::init(argc, argv, device); + } + + /** + * @brief Initializes the Celerity runtime and instructs it to use a particular device. + * + * @param device_selector The device selector to be used on the current node. This can vary between nodes. + */ + inline void init(int* argc, char** argv[], const detail::device_selector& device_selector) { detail::runtime::init(argc, argv, device_selector); } } // namespace runtime } // namespace celerity diff --git a/include/config.h b/include/config.h index 472d34f04..958dd2e9c 100644 --- a/include/config.h +++ b/include/config.h @@ -19,6 +19,8 @@ namespace detail { }; class config { + friend struct config_testspy; + public: /** * Initializes the @p config by parsing environment variables and passed arguments. @@ -48,7 +50,6 @@ namespace detail { std::optional device_cfg; std::optional enable_device_profiling; size_t graph_print_max_verts = 200; - friend struct config_testspy; }; } // namespace detail diff --git a/include/device_queue.h b/include/device_queue.h index c77df9c7b..4a0266d7d 100644 --- a/include/device_queue.h +++ b/include/device_queue.h @@ -3,6 +3,8 @@ #include #include +#include +#include #include "config.h" #include "workaround.h" @@ -10,6 +12,10 @@ namespace celerity { namespace detail { + struct auto_select_device {}; + using device_selector = std::function; + using device_or_selector = std::variant; + class task; /** @@ -21,9 +27,9 @@ namespace detail { * @brief Initializes the @p device_queue, selecting an appropriate device in the process. * * @param cfg The configuration is used to select the appropriate SYCL device. - * @param user_device Optionally a device can be provided, which will take precedence over any configuration. + * @param user_device_or_selector Optionally a device (which will take precedence over any configuration) or a device selector can be provided. */ - void init(const config& cfg, cl::sycl::device* user_device); + void init(const config& cfg, const device_or_selector& user_device_or_selector); /** * @brief Executes the kernel associated with task @p ctsk over the chunk @p chnk. @@ -62,12 +68,111 @@ namespace detail { void handle_async_exceptions(cl::sycl::exception_list el) const; }; + // Try to find a platform that can provide a unique device for each node using a device selector. + template + bool try_find_device_per_node( + std::string& how_selected, DeviceT& device, const std::vector& platforms, const host_config& host_cfg, SelectorT selector) { + std::vector> devices_with_platform_idx; + for(size_t i = 0; i < platforms.size(); ++i) { + auto&& platform = platforms[i]; + for(auto device : platform.get_devices()) { + if(selector(device) == -1) { continue; } + devices_with_platform_idx.emplace_back(device, i); + } + } + + std::stable_sort(devices_with_platform_idx.begin(), devices_with_platform_idx.end(), + [selector](const auto& a, const auto& b) { return selector(std::get<0>(a)) > selector(std::get<0>(b)); }); + bool same_platform = true; + bool same_device_type = true; + if(devices_with_platform_idx.size() >= host_cfg.node_count) { + auto [device_from_platform, idx] = devices_with_platform_idx[0]; + const auto platform = device_from_platform.get_platform(); + const auto device_type = device_from_platform.template get_info(); + + for(size_t i = 1; i < host_cfg.node_count; ++i) { + auto [device_from_platform, idx] = devices_with_platform_idx[i]; + if(device_from_platform.get_platform() != platform) { same_platform = false; } + if(device_from_platform.template get_info() != device_type) { same_device_type = false; } + } + + if(!same_platform || !same_device_type) { CELERITY_WARN("Selected devices are of different type and/or do not belong to the same platform"); } + + auto [selected_device_from_platform, selected_idx] = devices_with_platform_idx[host_cfg.local_rank]; + how_selected = fmt::format("device selector specified: platform {}, device {}", selected_idx, host_cfg.local_rank); + device = selected_device_from_platform; + return true; + } + + return false; + } + + // Try to find a platform that can provide a unique device for each node. template - DeviceT pick_device(const config& cfg, DeviceT* user_device, const std::vector& platforms) { + bool try_find_device_per_node( + std::string& how_selected, DeviceT& device, const std::vector& platforms, const host_config& host_cfg, sycl::info::device_type type) { + for(size_t i = 0; i < platforms.size(); ++i) { + auto&& platform = platforms[i]; + std::vector platform_devices; + + platform_devices = platform.get_devices(type); + if(platform_devices.size() >= host_cfg.node_count) { + how_selected = fmt::format("automatically selected platform {}, device {}", i, host_cfg.local_rank); + device = platform_devices[host_cfg.local_rank]; + return true; + } + } + + return false; + } + + template + bool try_find_one_device( + std::string& how_selected, DeviceT& device, const std::vector& platforms, const host_config& host_cfg, SelectorT selector) { + std::vector platform_devices; + for(auto& p : platforms) { + auto p_devices = p.get_devices(); + platform_devices.insert(platform_devices.end(), p_devices.begin(), p_devices.end()); + } + + std::stable_sort(platform_devices.begin(), platform_devices.end(), [selector](const auto& a, const auto& b) { return selector(a) > selector(b); }); + if(!platform_devices.empty()) { + if(selector(platform_devices[0]) == -1) { return false; } + device = platform_devices[0]; + return true; + } + + return false; + }; + + template + bool try_find_one_device( + std::string& how_selected, DeviceT& device, const std::vector& platforms, const host_config& host_cfg, sycl::info::device_type type) { + for(auto& p : platforms) { + for(auto& d : p.get_devices(type)) { + device = d; + return true; + } + } + + return false; + }; + + + template + auto pick_device(const config& cfg, const DevicePtrOrSelector& user_device_or_selector, const std::vector& platforms) { + using DeviceT = typename decltype(std::declval().get_devices())::value_type; + + constexpr bool user_device_provided = std::is_same_v; + constexpr bool device_selector_provided = std::is_invocable_r_v; + constexpr bool auto_select = std::is_same_v; + static_assert( + user_device_provided ^ device_selector_provided ^ auto_select, "pick_device requires either a device, a selector, or the auto_select_device tag"); + DeviceT device; std::string how_selected = "automatically selected"; - if(user_device != nullptr) { - device = *user_device; + if constexpr(user_device_provided) { + device = user_device_or_selector; how_selected = "specified by user"; } else { const auto device_cfg = cfg.get_device_config(); @@ -86,48 +191,37 @@ namespace detail { } else { const auto host_cfg = cfg.get_host_config(); - const auto try_find_device_per_node = [&host_cfg, &device, &how_selected, &platforms](cl::sycl::info::device_type type) { - // Try to find a platform that can provide a unique device for each node. - for(size_t i = 0; i < platforms.size(); ++i) { - auto&& platform = platforms[i]; - const auto devices = platform.get_devices(type); - if(devices.size() >= host_cfg.node_count) { - how_selected = fmt::format("automatically selected platform {}, device {}", i, host_cfg.local_rank); - device = devices[host_cfg.local_rank]; - return true; - } - } - return false; - }; - - const auto try_find_one_device = [&device, &platforms](cl::sycl::info::device_type type) { - for(auto& p : platforms) { - for(auto& d : p.get_devices(type)) { - device = d; - return true; + if constexpr(!device_selector_provided) { + // Try to find a unique GPU per node. + if(!try_find_device_per_node(how_selected, device, platforms, host_cfg, sycl::info::device_type::gpu)) { + if(try_find_device_per_node(how_selected, device, platforms, host_cfg, sycl::info::device_type::all)) { + CELERITY_WARN("No suitable platform found that can provide {} GPU devices, and CELERITY_DEVICES not set", host_cfg.node_count); + } else { + CELERITY_WARN("No suitable platform found that can provide {} devices, and CELERITY_DEVICES not set", host_cfg.node_count); + // Just use the first available device. Prefer GPUs, but settle for anything. + if(!try_find_one_device(how_selected, device, platforms, host_cfg, sycl::info::device_type::gpu) + && !try_find_one_device(how_selected, device, platforms, host_cfg, sycl::info::device_type::all)) { + throw std::runtime_error("Automatic device selection failed: No device available"); + } } } - return false; - }; - - // Try to find a unique GPU per node. - if(!try_find_device_per_node(cl::sycl::info::device_type::gpu)) { - // Try to find a unique device (of any type) per node. - if(try_find_device_per_node(cl::sycl::info::device_type::all)) { - CELERITY_WARN("No suitable platform found that can provide {} GPU devices, and CELERITY_DEVICES not set", host_cfg.node_count); - } else { - CELERITY_WARN("No suitable platform found that can provide {} devices, and CELERITY_DEVICES not set", host_cfg.node_count); - // Just use the first available device. Prefer GPUs, but settle for anything. - if(!try_find_one_device(cl::sycl::info::device_type::gpu) && !try_find_one_device(cl::sycl::info::device_type::all)) { - throw std::runtime_error("Automatic device selection failed: No device available"); + } else { + // Try to find a unique device per node using a selector. + if(!try_find_device_per_node(how_selected, device, platforms, host_cfg, user_device_or_selector)) { + CELERITY_WARN("No suitable platform found that can provide {} devices that match the specified device selector, and " + "CELERITY_DEVICES not set", + host_cfg.node_count); + // Use the first available device according to the selector, but fails if no such device is found. + if(!try_find_one_device(how_selected, device, platforms, host_cfg, user_device_or_selector)) { + throw std::runtime_error("Device selection with device selector failed: No device available"); } } } } } - const auto platform_name = device.get_platform().template get_info(); - const auto device_name = device.template get_info(); + const auto platform_name = device.get_platform().template get_info(); + const auto device_name = device.template get_info(); CELERITY_INFO("Using platform '{}', device '{}' ({})", platform_name, device_name, how_selected); return device; diff --git a/include/distr_queue.h b/include/distr_queue.h index f2120ae82..ca095fc4d 100644 --- a/include/distr_queue.h +++ b/include/distr_queue.h @@ -3,6 +3,7 @@ #include #include +#include "device_queue.h" #include "runtime.h" #include "task_manager.h" @@ -25,10 +26,19 @@ inline constexpr allow_by_ref_t allow_by_ref{}; class distr_queue { public: - distr_queue() { init(nullptr); } - distr_queue(cl::sycl::device& device) { + distr_queue() { init(detail::auto_select_device{}); } + + [[deprecated("Use the overload with device selector instead, this will be removed in future release")]] distr_queue(cl::sycl::device& device) { if(detail::runtime::is_initialized()) { throw std::runtime_error("Passing explicit device not possible, runtime has already been initialized."); } - init(&device); + init(device); + } + + template + distr_queue(const DeviceSelector& device_selector) { + if(detail::runtime::is_initialized()) { + throw std::runtime_error("Passing explicit device selector not possible, runtime has already been initialized."); + } + init(device_selector); } distr_queue(const distr_queue&) = default; @@ -77,8 +87,8 @@ class distr_queue { private: std::shared_ptr tracker; - void init(cl::sycl::device* user_device) { - if(!detail::runtime::is_initialized()) { detail::runtime::init(nullptr, nullptr, user_device); } + void init(detail::device_or_selector device_or_selector) { + if(!detail::runtime::is_initialized()) { detail::runtime::init(nullptr, nullptr, device_or_selector); } try { detail::runtime::get_instance().startup(); } catch(detail::runtime_already_started_error&) { diff --git a/include/runtime.h b/include/runtime.h index ff74570f1..e5b6b6204 100644 --- a/include/runtime.h +++ b/include/runtime.h @@ -41,9 +41,10 @@ namespace detail { public: /** - * @param user_device This optional device can be provided by the user, overriding any other device selection strategy. + * @param user_device_or_selector This optional device (overriding any other device selection strategy) or device selector can be provided by the user. */ - static void init(int* argc, char** argv[], cl::sycl::device* user_device = nullptr); + static void init(int* argc, char** argv[], device_or_selector user_device_or_selector = auto_select_device{}); + static bool is_initialized() { return instance != nullptr; } static runtime& get_instance(); @@ -117,7 +118,7 @@ namespace detail { }; std::deque active_flushes; - runtime(int* argc, char** argv[], cl::sycl::device* user_device = nullptr); + runtime(int* argc, char** argv[], device_or_selector user_device_or_selector); runtime(const runtime&) = delete; runtime(runtime&&) = delete; diff --git a/src/device_queue.cc b/src/device_queue.cc index e8370f8ea..897c22aa3 100644 --- a/src/device_queue.cc +++ b/src/device_queue.cc @@ -8,7 +8,7 @@ namespace celerity { namespace detail { - void device_queue::init(const config& cfg, cl::sycl::device* user_device) { + void device_queue::init(const config& cfg, const device_or_selector& user_device_or_selector) { assert(sycl_queue == nullptr); const auto profiling_cfg = cfg.get_enable_device_profiling(); device_profiling_enabled = profiling_cfg != std::nullopt && *profiling_cfg; @@ -16,11 +16,12 @@ namespace detail { const auto props = device_profiling_enabled ? cl::sycl::property_list{cl::sycl::property::queue::enable_profiling()} : cl::sycl::property_list{}; const auto handle_exceptions = cl::sycl::async_handler{[this](cl::sycl::exception_list el) { this->handle_async_exceptions(el); }}; - auto device = pick_device(cfg, user_device, cl::sycl::platform::get_platforms()); + + auto device = std::visit( + [&cfg](const auto& value) { return ::celerity::detail::pick_device(cfg, value, cl::sycl::platform::get_platforms()); }, user_device_or_selector); sycl_queue = std::make_unique(device, handle_exceptions, props); } - void device_queue::handle_async_exceptions(cl::sycl::exception_list el) const { for(auto& e : el) { try { diff --git a/src/runtime.cc b/src/runtime.cc index 1140f19d7..27c7ff9ba 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -48,9 +48,9 @@ namespace detail { mpi_finalized = true; } - void runtime::init(int* argc, char** argv[], cl::sycl::device* user_device) { + void runtime::init(int* argc, char** argv[], device_or_selector user_device_or_selector) { assert(!instance); - instance = std::unique_ptr(new runtime(argc, argv, user_device)); + instance = std::unique_ptr(new runtime(argc, argv, user_device_or_selector)); } runtime& runtime::get_instance() { @@ -91,7 +91,7 @@ namespace detail { #endif } - runtime::runtime(int* argc, char** argv[], cl::sycl::device* user_device) { + runtime::runtime(int* argc, char** argv[], device_or_selector user_device_or_selector) { if(test_mode) { assert(test_active && "initializing the runtime from a test without a runtime_fixture"); } else { @@ -145,7 +145,7 @@ namespace detail { CELERITY_INFO( "Celerity runtime version {} running on {}. PID = {}, build type = {}", get_version_string(), get_sycl_version(), get_pid(), get_build_type()); - d_queue->init(*cfg, user_device); + d_queue->init(*cfg, user_device_or_selector); } runtime::~runtime() { diff --git a/test/device_selection_tests.cc b/test/device_selection_tests.cc index d9ac09538..f3af00df2 100644 --- a/test/device_selection_tests.cc +++ b/test/device_selection_tests.cc @@ -7,33 +7,37 @@ struct mock_platform; struct mock_device { - mock_device() : id(0), type(cl::sycl::info::device_type::gpu) {} + mock_device() : platform(nullptr), id(0), type(sycl::info::device_type::gpu) {} - mock_device(size_t id, cl::sycl::info::device_type type = cl::sycl::info::device_type::gpu) : id(id), type(type) {} + mock_device(size_t id, mock_platform& platform, sycl::info::device_type type = sycl::info::device_type::gpu) : platform(&platform), id(id), type(type) {} - mock_platform get_platform() const; + mock_platform& get_platform() const { return *platform; } - template - std::string get_info() const { - return "bar"; + template + auto get_info() const { + if constexpr(Param == sycl::info::device::name) { return name; } + if constexpr(Param == sycl::info::device::device_type) { return type; } } bool operator==(const mock_device& other) const { return other.id == id; } - cl::sycl::info::device_type get_type() const { return type; } + sycl::info::device_type get_type() const { return type; } size_t get_id() { return id; } private: + mock_platform* platform; + std::string name = "bar"; size_t id; - cl::sycl::info::device_type type; + sycl::info::device_type type; }; struct mock_platform { - // TODO: These devices should somehow have this platform as their platform (?) - mock_platform(size_t id, std::vector devices) : devices(std::move(devices)), id(id) {} + mock_platform(size_t id) : id(id) {} + + void set_devices(std::vector devices) { this->devices = devices; } - std::vector get_devices(cl::sycl::info::device_type type = cl::sycl::info::device_type::all) const { - if(type != cl::sycl::info::device_type::all) { + std::vector get_devices(sycl::info::device_type type = sycl::info::device_type::all) const { + if(type != sycl::info::device_type::all) { std::vector devices_with_type; for(auto device : devices) { if(device.get_type() == type) { devices_with_type.emplace_back(device); } @@ -43,23 +47,23 @@ struct mock_platform { return devices; } - template + template std::string get_info() const { - return "foo"; + return name; } + void set_info(std::string name) { this->name = name; } + + bool operator!=(const mock_platform& other) const { return other.id != id; } + size_t get_id() { return id; } private: std::vector devices; size_t id; + std::string name = "foo"; }; -// TODO: Device should know its associated platform and return it from here -mock_platform mock_device::get_platform() const { - return {15, {}}; // Setting random platform ID for now -} - namespace celerity::detail { struct config_testspy { @@ -68,78 +72,79 @@ struct config_testspy { }; } // namespace celerity::detail -TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prefers user specified device pointer", "[device-selection][one]") { +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prefers user specified device pointer", "[device-selection]") { celerity::detail::config cfg(nullptr, nullptr); - mock_device td(42); - mock_platform tp(68, {{5}, {7}, {9}}); + mock_platform tp(68); + mock_device td(42, tp); + tp.set_devices({td}); - auto device = pick_device(cfg, &td, std::vector{tp}); + auto device = pick_device(cfg, td, std::vector{tp}); CHECK(device == td); } TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, - "pick_device automatically selects a gpu device if available and otherwise falls back to the first device available", "[device-selection][gen]") { + "pick_device automatically selects a gpu device if available and otherwise falls back to the first device available", "[device-selection]") { celerity::detail::config cfg(nullptr, nullptr); - mock_device* td = nullptr; - using device_t = cl::sycl::info::device_type; + using device_t = sycl::info::device_type; - auto dv_type_1 = GENERATE(as(), device_t::gpu, device_t::accelerator, device_t::cpu, device_t::custom, device_t::host); + auto dv_type_1 = GENERATE(as(), device_t::gpu, device_t::accelerator, device_t::cpu, device_t::custom, device_t::host); CAPTURE(dv_type_1); - mock_device td_1(0, dv_type_1); - mock_platform tp_1(0, {td_1}); + mock_platform tp_1(0); + mock_device td_1(0, tp_1, dv_type_1); + tp_1.set_devices({td_1}); - auto dv_type_2 = GENERATE(as(), device_t::gpu, device_t::accelerator, device_t::cpu, device_t::custom, device_t::host); + auto dv_type_2 = GENERATE(as(), device_t::gpu, device_t::accelerator, device_t::cpu, device_t::custom, device_t::host); CAPTURE(dv_type_2); - mock_device td_2(0, dv_type_2); - mock_platform tp_2(1, {td_2}); + mock_platform tp_2(1); + mock_device td_2(1, tp_2, dv_type_2); + tp_2.set_devices({td_2}); - auto device = pick_device(cfg, td, std::vector{tp_1, tp_2}); - std::vector devices; + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_1, tp_2}); if(dv_type_1 == device_t::gpu || (dv_type_1 != device_t::gpu && dv_type_2 != device_t::gpu)) { - devices = tp_1.get_devices(); + CHECK(device == td_1); } else { - devices = tp_2.get_devices(); + CHECK(device == td_2); } - CHECK(device == devices[0]); } TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device selects device using CELERITY_DEVICES", "[device-selection][device-cfg]") { celerity::detail::config cfg(nullptr, nullptr); - mock_device* td = nullptr; - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); - mock_device td_2(0, cl::sycl::info::device_type::gpu); - mock_device td_3(1, cl::sycl::info::device_type::gpu); - mock_platform tp_1(1, {td_2, td_3}); + mock_platform tp_1(1); + mock_device td_2(0, tp_1, sycl::info::device_type::gpu); + mock_device td_3(1, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3}); celerity::detail::device_config d_cfg{tp_1.get_id(), td_3.get_id()}; celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); - auto device = pick_device(cfg, td, std::vector{tp_0, tp_1}); - std::vector devices = tp_1.get_devices(); - CHECK(device == devices[1]); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}); + CHECK(device == td_3); } TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device selects a GPU for each local_rank or falls back to any type of sufficient device for all ranks", "[device-selection][host-cfg]") { celerity::detail::config cfg(nullptr, nullptr); - mock_device* td = nullptr; SECTION("pick_device unique GPU per node") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); - mock_device td_2(0, cl::sycl::info::device_type::gpu); - mock_device td_3(1, cl::sycl::info::device_type::gpu); - mock_device td_4(2, cl::sycl::info::device_type::gpu); - mock_device td_5(3, cl::sycl::info::device_type::gpu); - mock_platform tp_1(1, {td_2, td_3, td_4, td_5}); + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::gpu); + mock_device td_3(2, tp_1, sycl::info::device_type::gpu); + mock_device td_4(3, tp_1, sycl::info::device_type::gpu); + mock_device td_5(4, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3, td_4, td_5}); size_t node_count = 4; size_t local_rank = 2; @@ -148,26 +153,28 @@ TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, td, std::vector{tp_0, tp_1}); - std::vector devices = tp_1.get_devices(); - CHECK(device == devices[2]); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}); + CHECK(device == td_4); } SECTION("pick_device prefers unique GPU over other devices") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); - - mock_device td_2(0, cl::sycl::info::device_type::gpu); - mock_device td_3(1, cl::sycl::info::device_type::gpu); - mock_device td_4(2, cl::sycl::info::device_type::gpu); - mock_device td_5(3, cl::sycl::info::device_type::gpu); - mock_platform tp_1(1, {td_2, td_3, td_4, td_5}); - - mock_device td_6(0, cl::sycl::info::device_type::accelerator); - mock_device td_7(1, cl::sycl::info::device_type::accelerator); - mock_device td_8(2, cl::sycl::info::device_type::accelerator); - mock_device td_9(3, cl::sycl::info::device_type::accelerator); - mock_platform tp_2(1, {td_6, td_7, td_8, td_9}); + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::gpu); + mock_device td_3(2, tp_1, sycl::info::device_type::gpu); + mock_device td_4(3, tp_1, sycl::info::device_type::gpu); + mock_device td_5(4, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3, td_4, td_5}); + + mock_platform tp_2(1); + mock_device td_6(5, tp_2, sycl::info::device_type::accelerator); + mock_device td_7(6, tp_2, sycl::info::device_type::accelerator); + mock_device td_8(7, tp_2, sycl::info::device_type::accelerator); + mock_device td_9(8, tp_2, sycl::info::device_type::accelerator); + tp_2.set_devices({td_6, td_7, td_8, td_9}); size_t node_count = 4; size_t local_rank = 3; @@ -176,25 +183,27 @@ TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, td, std::vector{tp_0, tp_1, tp_2}); - std::vector devices = tp_1.get_devices(); - CHECK(device == devices[3]); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1, tp_2}); + CHECK(device == td_5); } SECTION("pick_device falls back to other devices with insufficient GPUs") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); - - mock_device td_2(0, cl::sycl::info::device_type::gpu); - mock_device td_3(1, cl::sycl::info::device_type::gpu); - mock_device td_4(2, cl::sycl::info::device_type::gpu); - mock_platform tp_1(1, {td_2, td_3, td_4}); - - mock_device td_5(0, cl::sycl::info::device_type::accelerator); - mock_device td_6(1, cl::sycl::info::device_type::accelerator); - mock_device td_7(2, cl::sycl::info::device_type::accelerator); - mock_device td_8(3, cl::sycl::info::device_type::accelerator); - mock_platform tp_2(1, {td_5, td_6, td_7, td_8}); + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::gpu); + mock_device td_3(2, tp_1, sycl::info::device_type::gpu); + mock_device td_4(3, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3, td_4}); + + mock_platform tp_2(2); + mock_device td_5(4, tp_2, sycl::info::device_type::accelerator); + mock_device td_6(5, tp_2, sycl::info::device_type::accelerator); + mock_device td_7(6, tp_2, sycl::info::device_type::accelerator); + mock_device td_8(7, tp_2, sycl::info::device_type::accelerator); + tp_2.set_devices({td_5, td_6, td_7, td_8}); size_t node_count = 4; size_t local_rank = 3; @@ -203,22 +212,24 @@ TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, td, std::vector{tp_0, tp_1, tp_2}); - std::vector devices = tp_2.get_devices(); - CHECK(device == devices[3]); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1, tp_2}); + CHECK(device == td_8); } SECTION("pick_device prefers the first available GPU with insufficient GPUs and other devices") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); - mock_device td_2(0, cl::sycl::info::device_type::gpu); - mock_platform tp_1(1, {td_2}); + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2}); - mock_device td_5(0, cl::sycl::info::device_type::accelerator); - mock_device td_6(1, cl::sycl::info::device_type::accelerator); - mock_device td_7(2, cl::sycl::info::device_type::accelerator); - mock_platform tp_2(1, {td_5, td_6, td_7}); + mock_platform tp_2(2); + mock_device td_3(2, tp_2, sycl::info::device_type::accelerator); + mock_device td_4(3, tp_2, sycl::info::device_type::accelerator); + mock_device td_5(4, tp_2, sycl::info::device_type::accelerator); + tp_2.set_devices({td_3, td_4, td_5}); size_t node_count = 4; size_t local_rank = 3; @@ -227,19 +238,20 @@ TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, td, std::vector{tp_0, tp_1, tp_2}); - std::vector devices = tp_1.get_devices(); - CHECK(device == devices[0]); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1, tp_2}); + CHECK(device == td_2); } SECTION("pick_device prefers the first available device(any) with no GPUs") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); - mock_device td_5(0, cl::sycl::info::device_type::accelerator); - mock_device td_6(1, cl::sycl::info::device_type::accelerator); - mock_device td_7(2, cl::sycl::info::device_type::accelerator); - mock_platform tp_2(1, {td_5, td_6, td_7}); + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::accelerator); + mock_device td_3(2, tp_1, sycl::info::device_type::accelerator); + mock_device td_4(3, tp_1, sycl::info::device_type::accelerator); + tp_1.set_devices({td_2, td_3, td_4}); size_t node_count = 4; size_t local_rank = 3; @@ -248,78 +260,96 @@ TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, td, std::vector{tp_0, tp_2}); - std::vector devices = tp_2.get_devices(); - CHECK(device == devices[0]); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}); + CHECK(device == td_1); } } -TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prints expected info/warn messages", "[device-selection][msg]") { +class log_capture { + public: + log_capture(spdlog::level::level_enum level = spdlog::level::trace) { + auto logger = spdlog::default_logger(); + auto ostream_info_sink = std::make_shared(oss); + ostream_info_sink->set_level(level); + logger->sinks().push_back(ostream_info_sink); + } + + ~log_capture() { + auto logger = spdlog::default_logger(); + // TODO: Assert that no other sink has been pushed in the meantime + logger->sinks().pop_back(); + } + + std::string get_log() { return oss.str(); } + + private: std::ostringstream oss; - auto logger = spdlog::default_logger(); - auto ostream_info_sink = std::make_shared(oss); - ostream_info_sink->set_level(spdlog::level::info); - logger->sinks().push_back(ostream_info_sink); +}; +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prints expected info/warn messages", "[device-selection][msg]") { celerity::detail::config cfg(nullptr, nullptr); SECTION("device_pointer is specified by the user") { - mock_device td(42); - mock_platform tp(68, {{5}, {7}, {9}}); + log_capture lc; + mock_platform tp(68); + mock_device td(42, tp); + tp.set_devices({{5, tp}, {7, tp}, {9, tp}}); - auto device = pick_device(cfg, &td, std::vector{tp}); - CHECK_THAT(oss.str(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (specified by user)")); - oss.str(""); + auto device = pick_device(cfg, td, std::vector{tp}); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (specified by user)")); } - mock_device* td = nullptr; SECTION("CELERITY_DEVICE is set by the user") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); + log_capture lc; + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); - mock_device td_2(0, cl::sycl::info::device_type::gpu); - mock_device td_3(1, cl::sycl::info::device_type::gpu); - mock_platform tp_1(1, {td_2, td_3}); + mock_platform tp_1(1); + mock_device td_2(0, tp_1, sycl::info::device_type::gpu); + mock_device td_3(1, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3}); celerity::detail::device_config d_cfg{td_3.get_id(), tp_1.get_id()}; celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); - auto device = pick_device(cfg, td, std::vector{tp_0, tp_1}); - CHECK_THAT(oss.str(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (set by CELERITY_DEVICES: platform 1, device 1)")); - oss.str(""); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (set by CELERITY_DEVICES: platform 1, device 1)")); } SECTION("pick_device selects a gpu/any per node automaticaly") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); - - mock_device td_2(0, cl::sycl::info::device_type::gpu); - mock_device td_3(1, cl::sycl::info::device_type::gpu); - mock_platform tp_1(1, {td_2, td_3}); - - auto device = pick_device(cfg, td, std::vector{tp_0, tp_1}); - CHECK_THAT(oss.str(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (automatically selected platform 1, device 0)")); - oss.str(""); + log_capture lc; + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + + mock_platform tp_1(1); + mock_device td_2(0, tp_1, sycl::info::device_type::gpu); + mock_device td_3(1, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3}); + + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (automatically selected platform 1, device 0)")); } - std::ostringstream _oss; - auto ostream_warn_sink = std::make_shared(_oss); - ostream_warn_sink->set_level(spdlog::level::warn); - logger->sinks().push_back(ostream_warn_sink); SECTION("pick_device can't find any platform with sufficient GPUs") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); - - mock_device td_2(0, cl::sycl::info::device_type::gpu); - mock_device td_3(1, cl::sycl::info::device_type::gpu); - mock_device td_4(2, cl::sycl::info::device_type::gpu); - mock_platform tp_1(1, {td_2, td_3, td_4}); - - mock_device td_5(0, cl::sycl::info::device_type::accelerator); - mock_device td_6(1, cl::sycl::info::device_type::accelerator); - mock_device td_7(2, cl::sycl::info::device_type::accelerator); - mock_device td_8(3, cl::sycl::info::device_type::accelerator); - mock_platform tp_2(1, {td_5, td_6, td_7, td_8}); + log_capture lc{spdlog::level::warn}; + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + + mock_platform tp_1(1); + mock_device td_2(0, tp_1, sycl::info::device_type::gpu); + mock_device td_3(1, tp_1, sycl::info::device_type::gpu); + mock_device td_4(2, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3, td_4}); + + mock_platform tp_2(1); + mock_device td_5(0, tp_2, sycl::info::device_type::accelerator); + mock_device td_6(1, tp_2, sycl::info::device_type::accelerator); + mock_device td_7(2, tp_2, sycl::info::device_type::accelerator); + mock_device td_8(3, tp_2, sycl::info::device_type::accelerator); + tp_2.set_devices({td_5, td_6, td_7, td_8}); size_t node_count = 4; size_t local_rank = 3; @@ -328,22 +358,25 @@ TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prints expected celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, td, std::vector{tp_0, tp_1, tp_2}); - CHECK_THAT(_oss.str(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 GPU devices, and CELERITY_DEVICES not set")); - _oss.str(""); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1, tp_2}); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 GPU devices, and CELERITY_DEVICES not set")); } SECTION("pick_device can't find any platform with any type of sufficient device") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); + log_capture lc(spdlog::level::warn); + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); - mock_device td_2(0, cl::sycl::info::device_type::gpu); - mock_platform tp_1(1, {td_2}); + mock_platform tp_1(1); + mock_device td_2(0, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2}); - mock_device td_5(0, cl::sycl::info::device_type::accelerator); - mock_device td_6(1, cl::sycl::info::device_type::accelerator); - mock_device td_7(2, cl::sycl::info::device_type::accelerator); - mock_platform tp_2(1, {td_5, td_6, td_7}); + mock_platform tp_2(2); + mock_device td_3(0, tp_2, sycl::info::device_type::accelerator); + mock_device td_4(1, tp_2, sycl::info::device_type::accelerator); + mock_device td_5(2, tp_2, sycl::info::device_type::accelerator); + tp_2.set_devices({td_3, td_4, td_5}); size_t node_count = 4; size_t local_rank = 3; @@ -352,39 +385,323 @@ TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prints expected celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, td, std::vector{tp_0, tp_1, tp_2}); - CHECK_THAT(_oss.str(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 devices, and CELERITY_DEVICES not set")); - _oss.str(""); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1, tp_2}); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 devices, and CELERITY_DEVICES not set")); } SECTION("CELERITY_DEVICE is set with invalid platform id") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); - mock_device td_2(0, cl::sycl::info::device_type::gpu); - mock_device td_3(1, cl::sycl::info::device_type::gpu); - mock_platform tp_1(3, {td_2, td_3}); + mock_platform tp_1(3); + mock_device td_2(0, tp_1, sycl::info::device_type::gpu); + mock_device td_3(1, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3}); celerity::detail::device_config d_cfg{tp_1.get_id(), td_3.get_id()}; celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); - CHECK_THROWS_WITH(pick_device(cfg, td, std::vector{tp_0, tp_1}), "Invalid platform id 3: Only 2 platforms available"); + CHECK_THROWS_WITH(pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}), + "Invalid platform id 3: Only 2 platforms available"); } SECTION("CELERITY_DEVICE is set with invalid device id") { - mock_device td_1(0, cl::sycl::info::device_type::cpu); - mock_platform tp_0(0, {td_1}); + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); - mock_device td_2(4, cl::sycl::info::device_type::gpu); - mock_device td_3(5, cl::sycl::info::device_type::gpu); - mock_platform tp_1(1, {td_2, td_3}); + mock_platform tp_1(1); + mock_device td_2(4, tp_1, sycl::info::device_type::gpu); + mock_device td_3(5, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3}); celerity::detail::device_config d_cfg{tp_1.get_id(), td_3.get_id()}; celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); - CHECK_THROWS_WITH(pick_device(cfg, td, std::vector{tp_0, tp_1}), "Invalid device id 5: Only 2 devices available on platform 1"); + CHECK_THROWS_WITH(pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}), + "Invalid device id 5: Only 2 devices available on platform 1"); } SECTION("pick_device couldn't find any device") { - CHECK_THROWS_WITH(pick_device(cfg, td, std::vector{}), "Automatic device selection failed: No device available"); + CHECK_THROWS_WITH( + pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{}), "Automatic device selection failed: No device available"); } -} \ No newline at end of file +} + +// The following test doesn't work with ComputeCpp backend, since the == operator behaves differently +#if !defined(WORKAROUND_COMPUTECPP) +TEST_CASE_METHOD(celerity::test_utils::runtime_fixture, + "runtime::init/distr_queue provides an overloaded constructor with device selector, testing sycl::device", "[distr_queue][ctor][sycl]") { + std::vector devices = sycl::device::get_devices(); + if(devices.size() < 2) { + WARN("Platforms must have 2 or more devices!"); + return; + } + + auto device_idx = GENERATE(0, 1); + CAPTURE(device_idx); + sycl::device device = devices[device_idx]; + CAPTURE(device); + + auto device_selector = [device](const sycl::device& d) -> int { return d == device ? 2 : 1; }; + + celerity::distr_queue q(device_selector); + + auto& dq = celerity::detail::runtime::get_instance().get_device_queue(); + CHECK(dq.get_sycl_queue().get_device() == device); +} +#endif + +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "runtime::init/distr_queue provides an overloaded constructor with device selector, testing mock_device", + "[device-selection][ctor][mock-host-cfg]") { + celerity::detail::config cfg(nullptr, nullptr); + + SECTION("pick_device prefers a particular device over all") { + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + tp_0.set_info("foo_0"); + + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::gpu); + mock_device td_3(2, tp_1, sycl::info::device_type::gpu); + mock_device td_4(3, tp_1, sycl::info::device_type::gpu); + mock_device td_5(4, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3, td_4, td_5}); + tp_1.set_info("foo_1"); + + mock_platform tp_2(2); + mock_device td_6(5, tp_2, sycl::info::device_type::accelerator); + mock_device td_7(6, tp_2, sycl::info::device_type::accelerator); + mock_device td_8(7, tp_2, sycl::info::device_type::accelerator); + mock_device td_9(8, tp_2, sycl::info::device_type::accelerator); + tp_2.set_devices({td_6, td_7, td_8, td_9}); + tp_2.set_info("foo_2"); + + auto device_selector = [td_7](const mock_device& d) -> int { return d == td_7 ? 2 : 1; }; + + auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); + CHECK(device == td_7); + } + + SECTION("pick_device prefers a group of devices") { + log_capture lc; + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + tp_0.set_info("foo_0"); + + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::gpu); + mock_device td_3(2, tp_1, sycl::info::device_type::gpu); + mock_device td_4(3, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3, td_4}); + tp_1.set_info("foo_1"); + + mock_platform tp_2(2); + mock_device td_5(4, tp_2, sycl::info::device_type::gpu); + mock_device td_6(5, tp_2, sycl::info::device_type::gpu); + mock_device td_7(6, tp_2, sycl::info::device_type::accelerator); + mock_device td_8(7, tp_2, sycl::info::device_type::accelerator); + mock_device td_9(8, tp_2, sycl::info::device_type::accelerator); + mock_device td_10(9, tp_2, sycl::info::device_type::accelerator); + tp_2.set_devices({td_5, td_6, td_7, td_8, td_9, td_10}); + tp_2.set_info("foo_2"); + + size_t node_count = 4; + size_t local_rank = 3; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device_selector = [](const mock_device& d) -> int { return d.get_type() == sycl::info::device_type::accelerator ? 2 : 1; }; + + auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'foo_2', device 'bar' (device selector specified: platform 2, device 3)")); + CHECK(device == td_10); + } + + SECTION("pick_device prefers prioritised device with selector with insufficient devices") { + log_capture lc(spdlog::level::warn); + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + tp_0.set_info("foo_0"); + + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2}); + tp_1.set_info("foo_1"); + + mock_platform tp_2(2); + mock_device td_3(2, tp_2, sycl::info::device_type::accelerator); + tp_2.set_devices({td_3}); + tp_2.set_info("foo_2"); + + size_t node_count = 4; + size_t local_rank = 3; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + auto device_selector = [td_3](const mock_device& d) -> int { return d == td_3 ? 2 : 1; }; + + auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); + CHECK_THAT( + lc.get_log(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 devices that match the specified device selector")); + CHECK(device == td_3); + } + + SECTION("pick_device can choose devices across platform with warnings") { + log_capture lc(spdlog::level::warn); + + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + tp_0.set_info("foo_0"); + + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::accelerator); + mock_device td_3(2, tp_1, sycl::info::device_type::gpu); + mock_device td_4(3, tp_1, sycl::info::device_type::gpu); + mock_device td_5(4, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3, td_4, td_5}); + tp_1.set_info("foo_1"); + + mock_platform tp_2(2); + mock_device td_6(5, tp_2, sycl::info::device_type::gpu); + mock_device td_7(6, tp_2, sycl::info::device_type::accelerator); + mock_device td_8(7, tp_2, sycl::info::device_type::accelerator); + mock_device td_9(8, tp_2, sycl::info::device_type::accelerator); + tp_2.set_devices({td_6, td_7, td_8, td_9}); + tp_2.set_info("foo_2"); + + size_t node_count = 4; + size_t local_rank = 2; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device_selector = [](const mock_device& d) -> int { return d.get_type() == sycl::info::device_type::accelerator ? 2 : 1; }; + + auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); + INFO("Platform id" << device.get_platform().get_id() << " device id " << device.get_id()); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Selected devices are of different type and/or do not belong to the same platform")); + CHECK(device == td_8); + } + + SECTION("pick_device can choose different types of devices with warnings") { + log_capture lc(spdlog::level::warn); + + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + tp_0.set_info("foo_0"); + + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::accelerator); + mock_device td_3(2, tp_1, sycl::info::device_type::gpu); + mock_device td_4(3, tp_1, sycl::info::device_type::gpu); + mock_device td_5(4, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3, td_4, td_5}); + tp_1.set_info("foo_1"); + + mock_platform tp_2(2); + mock_device td_6(5, tp_2, sycl::info::device_type::accelerator); + mock_device td_7(6, tp_2, sycl::info::device_type::gpu); + mock_device td_8(7, tp_2, sycl::info::device_type::gpu); + mock_device td_9(8, tp_2, sycl::info::device_type::gpu); + tp_2.set_devices({td_6, td_7, td_8, td_9}); + tp_2.set_info("foo_2"); + + size_t node_count = 4; + size_t local_rank = 0; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device_selector = [](const mock_device& d) -> int { return d.get_type() == sycl::info::device_type::accelerator ? 2 : 1; }; + + auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); + INFO("Platform id" << device.get_platform().get_id() << " device id " << device.get_id()); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Selected devices are of different type and/or do not belong to the same platform")); + CHECK(device == td_2); + } + + SECTION("pick_device can choose different types of devices with insufficient devices in platforms with warnings") { + log_capture lc(spdlog::level::warn); + + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + tp_0.set_info("foo_0"); + + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::accelerator); + mock_device td_3(2, tp_1, sycl::info::device_type::gpu); + mock_device td_4(3, tp_1, sycl::info::device_type::gpu); + mock_device td_5(4, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3, td_4, td_5}); + tp_1.set_info("foo_1"); + + mock_platform tp_2(2); + mock_device td_6(5, tp_2, sycl::info::device_type::accelerator); + mock_device td_7(6, tp_2, sycl::info::device_type::gpu); + mock_device td_8(7, tp_2, sycl::info::device_type::gpu); + tp_2.set_devices({td_6, td_7, td_8}); + tp_2.set_info("foo_2"); + + size_t node_count = 4; + size_t local_rank = 1; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device_selector = [](const mock_device& d) -> int { return d.get_type() == sycl::info::device_type::accelerator ? 2 : 1; }; + + auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); + INFO("Platform id" << device.get_platform().get_id() << " device id " << device.get_id()); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Selected devices are of different type and/or do not belong to the same platform")); + CHECK(device == td_6); + } +} + +TEST_CASE_METHOD( + celerity::test_utils::mpi_fixture, "pick_device does not consider devices with a negative selector score", "[device-selection][msg][negative]") { + celerity::detail::config cfg(nullptr, nullptr); + log_capture lc; + + mock_platform tp_0(0); + mock_device td_1(0, tp_0, sycl::info::device_type::cpu); + tp_0.set_devices({td_1}); + tp_0.set_info("foo_0"); + + mock_platform tp_1(1); + mock_device td_2(1, tp_1, sycl::info::device_type::gpu); + mock_device td_3(2, tp_1, sycl::info::device_type::gpu); + mock_device td_4(3, tp_1, sycl::info::device_type::gpu); + mock_device td_5(4, tp_1, sycl::info::device_type::gpu); + tp_1.set_devices({td_2, td_3, td_4}); + tp_1.set_info("foo_1"); + + mock_platform tp_2(2); + mock_device td_7(5, tp_2, sycl::info::device_type::gpu); + tp_2.set_devices({td_7}); + tp_2.set_info("foo_2"); + + size_t node_count = 4; + size_t local_rank = 2; + size_t local_num_cpus = 1; + + celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device_selector = [](const mock_device& d) -> int { return d.get_type() == sycl::info::device_type::accelerator ? 1 : -1; }; + + CHECK_THROWS_WITH( + pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}), "Device selection with device selector failed: No device available"); +} diff --git a/test/runtime_tests.cc b/test/runtime_tests.cc index 9d28b342b..60c1926ec 100644 --- a/test/runtime_tests.cc +++ b/test/runtime_tests.cc @@ -69,6 +69,8 @@ namespace detail { REQUIRE(runtime::is_initialized()); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" TEST_CASE_METHOD(test_utils::runtime_fixture, "an explicit device can be provided to distr_queue", "[distr_queue][lifetime]") { cl::sycl::default_selector selector; cl::sycl::device device{selector}; @@ -84,6 +86,7 @@ namespace detail { REQUIRE_THROWS_WITH(distr_queue{device}, "Passing explicit device not possible, runtime has already been initialized."); } } +#pragma GCC diagnostic pop TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer implicitly initializes the runtime", "[distr_queue][lifetime]") { REQUIRE_FALSE(runtime::is_initialized()); @@ -683,7 +686,7 @@ namespace detail { #if CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS TEST_CASE_METHOD(test_utils::runtime_fixture, "attempting a reduction on buffers with size != 1 throws", "[task-manager]") { - runtime::init(nullptr, nullptr, nullptr); + runtime::init(nullptr, nullptr); auto& tm = runtime::get_instance().get_task_manager(); buffer buf_1{cl::sycl::range<1>{2}}; diff --git a/test/sycl_tests.cc b/test/sycl_tests.cc index 954e934b2..2dd8686e1 100644 --- a/test/sycl_tests.cc +++ b/test/sycl_tests.cc @@ -171,4 +171,39 @@ TEST_CASE_METHOD(test_utils::device_queue_fixture, "SYCL has working simple scal #endif +TEST_CASE("SYCL implements by-value equality-comparison of device information", "[sycl][device-selection][!mayfail]") { + constexpr static auto get_devices = [] { + auto devs = sycl::device::get_devices(); + std::sort(devs.begin(), devs.end(), [](const sycl::device& lhs, const sycl::device& rhs) { + const auto lhs_vendor_id = lhs.get_info(), rhs_vendor_id = rhs.get_info(); + const auto lhs_name = lhs.get_info(), rhs_name = rhs.get_info(); + if(lhs_vendor_id < rhs_vendor_id) return true; + if(lhs_vendor_id > rhs_vendor_id) return false; + return lhs_name < rhs_name; + }); + return devs; + }; + + constexpr static auto get_platforms = [] { + const auto devs = get_devices(); + std::vector pfs; + for(const auto& d : devs) { + pfs.push_back(d.get_platform()); + } + return pfs; + }; + + SECTION("for sycl::device") { + const auto first = get_devices(); + const auto second = get_devices(); + CHECK(first == second); + } + + SECTION("for sycl::platforms") { + const auto first = get_platforms(); + const auto second = get_platforms(); + CHECK(first == second); + } +} + } // namespace celerity::detail diff --git a/test/test_utils.h b/test/test_utils.h index a46f2e4a0..6a3cb16b9 100644 --- a/test/test_utils.h +++ b/test/test_utils.h @@ -384,7 +384,7 @@ namespace test_utils { if(!dq) { cfg = std::make_unique(nullptr, nullptr); dq = std::make_unique(); - dq->init(*cfg, nullptr); + dq->init(*cfg, detail::auto_select_device{}); } return *dq; } @@ -450,4 +450,18 @@ struct StringMaker> { } }; +template <> +struct StringMaker { + static std::string convert(const sycl::device& d) { + return fmt::format("sycl::device(vendor_id={}, name=\"{}\")", d.get_info(), d.get_info()); + } +}; + +template <> +struct StringMaker { + static std::string convert(const sycl::platform& d) { + return fmt::format("sycl::platform(vendor=\"{}\", name=\"{}\")", d.get_info(), d.get_info()); + } +}; + } // namespace Catch \ No newline at end of file From e9514d447ba5595cd6a4cce26b437d5aabfc2b58 Mon Sep 17 00:00:00 2001 From: Philip Salzmann Date: Wed, 11 May 2022 19:05:19 +0200 Subject: [PATCH 3/3] Streamline device selection tests --- include/config.h | 1 - src/config.cc | 11 - test/device_selection_tests.cc | 790 +++++++++++++-------------------- 3 files changed, 304 insertions(+), 498 deletions(-) diff --git a/include/config.h b/include/config.h index 958dd2e9c..ff1b2ab73 100644 --- a/include/config.h +++ b/include/config.h @@ -10,7 +10,6 @@ namespace detail { struct host_config { size_t node_count; size_t local_rank; - size_t local_num_cpus; }; struct device_config { diff --git a/src/config.cc b/src/config.cc index 4842a5695..b1b0783df 100644 --- a/src/config.cc +++ b/src/config.cc @@ -180,17 +180,6 @@ namespace detail { const auto result = get_env("CELERITY_FORCE_WG"); if(result.first) { CELERITY_WARN("Support for CELERITY_FORCE_WG has been removed with Celerity 0.3.0."); } } - - // -------------------------------- CELERITY_HOST_CPUS -------------------------------- - - { - host_cfg.local_num_cpus = std::thread::hardware_concurrency(); - const auto result = get_env("CELERITY_HOST_CPUS"); - if(result.first) { - const auto parsed = parse_uint(result.second.c_str()); - if(parsed.first) { host_cfg.local_num_cpus = parsed.second; } - } - } } } // namespace detail } // namespace celerity diff --git a/test/device_selection_tests.cc b/test/device_selection_tests.cc index f3af00df2..f7fd55699 100644 --- a/test/device_selection_tests.cc +++ b/test/device_selection_tests.cc @@ -1,17 +1,34 @@ -#include "catch2/catch_test_macros.hpp" -#include "catch2/generators/catch_generators.hpp" -#include "catch2/matchers/catch_matchers_string.hpp" -#include "spdlog/sinks/ostream_sink.h" +#include +#include +#include + +#include "log.h" // Need to include before spdlog +#include + #include "test_utils.h" -#include + +using dt = sycl::info::device_type; struct mock_platform; + +struct type_and_name { + dt type; + std::string name; +}; + struct mock_device { - mock_device() : platform(nullptr), id(0), type(sycl::info::device_type::gpu) {} + mock_device() : id(0), type(dt::gpu), platform(nullptr) {} - mock_device(size_t id, mock_platform& platform, sycl::info::device_type type = sycl::info::device_type::gpu) : platform(&platform), id(id), type(type) {} + mock_device(size_t id, mock_platform& platform, dt type) : mock_device(id, platform, {type, fmt::format("Mock device {}", id)}){}; - mock_platform& get_platform() const { return *platform; } + mock_device(size_t id, mock_platform& platform, const type_and_name& tan) : id(id), type(tan.type), name(tan.name), platform(&platform) {} + + bool operator==(const mock_device& other) const { return other.id == id; } + + mock_platform& get_platform() const { + assert(platform != nullptr); + return *platform; + } template auto get_info() const { @@ -19,32 +36,47 @@ struct mock_device { if constexpr(Param == sycl::info::device::device_type) { return type; } } - bool operator==(const mock_device& other) const { return other.id == id; } - - sycl::info::device_type get_type() const { return type; } + dt get_type() const { return type; } - size_t get_id() { return id; } + size_t get_id() const { return id; } private: - mock_platform* platform; - std::string name = "bar"; size_t id; - sycl::info::device_type type; + dt type; + std::string name; + mock_platform* platform; }; + +struct mock_platform_factory { + public: + template + auto create_platforms(Args... args) { + return std::array{mock_platform(next_id++, args)...}; + } + + private: + size_t next_id = 0; +}; + struct mock_platform { - mock_platform(size_t id) : id(id) {} + mock_platform(size_t id, std::optional name) : id(id), name(name.has_value() ? std::move(*name) : fmt::format("Mock platform {}", id)) {} - void set_devices(std::vector devices) { this->devices = devices; } + template + auto create_devices(Args... args) { + std::array new_devices = {mock_device(next_device_id++, *this, args)...}; + devices.insert(devices.end(), new_devices.begin(), new_devices.end()); + return new_devices; + } - std::vector get_devices(sycl::info::device_type type = sycl::info::device_type::all) const { - if(type != sycl::info::device_type::all) { + std::vector get_devices(dt type = dt::all) const { + if(type != dt::all) { std::vector devices_with_type; for(auto device : devices) { if(device.get_type() == type) { devices_with_type.emplace_back(device); } } return devices_with_type; - } else - return devices; + } + return devices; } template @@ -52,19 +84,18 @@ struct mock_platform { return name; } - void set_info(std::string name) { this->name = name; } + bool operator==(const mock_platform& other) const { return other.id == id; } + bool operator!=(const mock_platform& other) const { return !(*this == other); } - bool operator!=(const mock_platform& other) const { return other.id != id; } - - size_t get_id() { return id; } + size_t get_id() const { return id; } private: - std::vector devices; size_t id; - std::string name = "foo"; + std::string name; + size_t next_device_id = 0; + std::vector devices; }; - namespace celerity::detail { struct config_testspy { static void set_mock_device_cfg(config& cfg, const device_config& d_cfg) { cfg.device_cfg = d_cfg; } @@ -74,194 +105,127 @@ struct config_testspy { TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prefers user specified device pointer", "[device-selection]") { celerity::detail::config cfg(nullptr, nullptr); + mock_platform_factory mpf; - mock_platform tp(68); - mock_device td(42, tp); - tp.set_devices({td}); + auto [mp] = mpf.create_platforms(std::nullopt); + auto md = mp.create_devices(dt::gpu)[0]; - auto device = pick_device(cfg, td, std::vector{tp}); - CHECK(device == td); + auto device = pick_device(cfg, md, std::vector{mp}); + CHECK(device == md); } TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, - "pick_device automatically selects a gpu device if available and otherwise falls back to the first device available", "[device-selection]") { + "pick_device automatically selects a gpu device if available, otherwise falls back to the first device available", "[device-selection]") { celerity::detail::config cfg(nullptr, nullptr); + mock_platform_factory mpf; - using device_t = sycl::info::device_type; - - auto dv_type_1 = GENERATE(as(), device_t::gpu, device_t::accelerator, device_t::cpu, device_t::custom, device_t::host); + auto dv_type_1 = GENERATE(as
(), dt::gpu, dt::accelerator, dt::cpu, dt::custom, dt::host); CAPTURE(dv_type_1); - mock_platform tp_1(0); - mock_device td_1(0, tp_1, dv_type_1); - tp_1.set_devices({td_1}); - - auto dv_type_2 = GENERATE(as(), device_t::gpu, device_t::accelerator, device_t::cpu, device_t::custom, device_t::host); + auto dv_type_2 = GENERATE(as
(), dt::gpu, dt::accelerator, dt::cpu, dt::custom, dt::host); CAPTURE(dv_type_2); - mock_platform tp_2(1); - mock_device td_2(1, tp_2, dv_type_2); - tp_2.set_devices({td_2}); + auto [mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt); - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_1, tp_2}); - if(dv_type_1 == device_t::gpu || (dv_type_1 != device_t::gpu && dv_type_2 != device_t::gpu)) { - CHECK(device == td_1); + auto md_1 = mp_1.create_devices(dv_type_1)[0]; + auto md_2 = mp_2.create_devices(dv_type_2)[0]; + + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_1, mp_2}); + if(dv_type_1 == dt::gpu || (dv_type_1 != dt::gpu && dv_type_2 != dt::gpu)) { + CHECK(device == md_1); } else { - CHECK(device == td_2); + CHECK(device == md_2); } } -TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device selects device using CELERITY_DEVICES", "[device-selection][device-cfg]") { +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device selects device using CELERITY_DEVICES", "[device-selection]") { celerity::detail::config cfg(nullptr, nullptr); + mock_platform_factory mpf; - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - - mock_platform tp_1(1); - mock_device td_2(0, tp_1, sycl::info::device_type::gpu); - mock_device td_3(1, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3}); + auto [mp_0, mp_1] = mpf.create_platforms(std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu, dt::gpu); + auto md = mp_1.create_devices(dt::gpu, dt::gpu, dt::cpu)[1]; - celerity::detail::device_config d_cfg{tp_1.get_id(), td_3.get_id()}; + celerity::detail::device_config d_cfg{1, 1}; celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}); - CHECK(device == td_3); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1}); + CHECK(device == md); } -TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, - "pick_device selects a GPU for each local_rank or falls back to any type of sufficient device for all ranks", "[device-selection][host-cfg]") { +TEST_CASE_METHOD( + celerity::test_utils::mpi_fixture, "pick_device attempts to select a unique device from a single platform for each local node", "[device-selection]") { celerity::detail::config cfg(nullptr, nullptr); - SECTION("pick_device unique GPU per node") { - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); + SECTION("preferring GPUs over other device types") { + mock_platform_factory mpf; - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::gpu); - mock_device td_3(2, tp_1, sycl::info::device_type::gpu); - mock_device td_4(3, tp_1, sycl::info::device_type::gpu); - mock_device td_5(4, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3, td_4, td_5}); + const size_t node_count = 4; + const size_t local_rank = 3; - size_t node_count = 4; - size_t local_rank = 2; - size_t local_num_cpus = 1; + auto [mp_0, mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + auto md = mp_1.create_devices(dt::gpu, dt::gpu, dt::gpu, dt::gpu)[local_rank]; + mp_2.create_devices(dt::accelerator, dt::accelerator, dt::accelerator, dt::accelerator); - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::host_config h_cfg{node_count, local_rank}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}); - CHECK(device == td_4); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1, mp_2}); + CHECK(device == md); } - SECTION("pick_device prefers unique GPU over other devices") { - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::gpu); - mock_device td_3(2, tp_1, sycl::info::device_type::gpu); - mock_device td_4(3, tp_1, sycl::info::device_type::gpu); - mock_device td_5(4, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3, td_4, td_5}); - - mock_platform tp_2(1); - mock_device td_6(5, tp_2, sycl::info::device_type::accelerator); - mock_device td_7(6, tp_2, sycl::info::device_type::accelerator); - mock_device td_8(7, tp_2, sycl::info::device_type::accelerator); - mock_device td_9(8, tp_2, sycl::info::device_type::accelerator); - tp_2.set_devices({td_6, td_7, td_8, td_9}); - - size_t node_count = 4; - size_t local_rank = 3; - size_t local_num_cpus = 1; - - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; - celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + SECTION("falling back to other device types when an insufficient number of GPUs is available") { + mock_platform_factory mpf; - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1, tp_2}); - CHECK(device == td_5); - } + const size_t node_count = 4; + const size_t local_rank = 2; + + auto [mp_0, mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::gpu, dt::gpu, dt::gpu); + auto md = mp_2.create_devices(dt::accelerator, dt::accelerator, dt::accelerator, dt::accelerator)[local_rank]; - SECTION("pick_device falls back to other devices with insufficient GPUs") { - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::gpu); - mock_device td_3(2, tp_1, sycl::info::device_type::gpu); - mock_device td_4(3, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3, td_4}); - - mock_platform tp_2(2); - mock_device td_5(4, tp_2, sycl::info::device_type::accelerator); - mock_device td_6(5, tp_2, sycl::info::device_type::accelerator); - mock_device td_7(6, tp_2, sycl::info::device_type::accelerator); - mock_device td_8(7, tp_2, sycl::info::device_type::accelerator); - tp_2.set_devices({td_5, td_6, td_7, td_8}); - - size_t node_count = 4; - size_t local_rank = 3; - size_t local_num_cpus = 1; - - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::host_config h_cfg{node_count, local_rank}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1, tp_2}); - CHECK(device == td_8); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1, mp_2}); + CHECK(device == md); } - SECTION("pick_device prefers the first available GPU with insufficient GPUs and other devices") { - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2}); + SECTION("falling back to a single GPU for all nodes if an insufficient number of GPUs and other device types is available") { + mock_platform_factory mpf; - mock_platform tp_2(2); - mock_device td_3(2, tp_2, sycl::info::device_type::accelerator); - mock_device td_4(3, tp_2, sycl::info::device_type::accelerator); - mock_device td_5(4, tp_2, sycl::info::device_type::accelerator); - tp_2.set_devices({td_3, td_4, td_5}); + auto [mp_0, mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + auto md = mp_1.create_devices(dt::gpu)[0]; + mp_2.create_devices(dt::accelerator, dt::accelerator, dt::accelerator); - size_t node_count = 4; - size_t local_rank = 3; - size_t local_num_cpus = 1; + const size_t node_count = 4; + const size_t local_rank = GENERATE(0, 3); - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::host_config h_cfg{node_count, local_rank}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1, tp_2}); - CHECK(device == td_2); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1, mp_2}); + CHECK(device == md); } - SECTION("pick_device prefers the first available device(any) with no GPUs") { - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); + SECTION("falling back to a single device of any type for all nodes if an insufficient number of GPUs or other device types is available") { + mock_platform_factory mpf; - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::accelerator); - mock_device td_3(2, tp_1, sycl::info::device_type::accelerator); - mock_device td_4(3, tp_1, sycl::info::device_type::accelerator); - tp_1.set_devices({td_2, td_3, td_4}); + auto [mp_0, mp_1] = mpf.create_platforms(std::nullopt, std::nullopt); + auto md = mp_0.create_devices(dt::cpu)[0]; + mp_1.create_devices(dt::accelerator, dt::accelerator, dt::accelerator); - size_t node_count = 4; - size_t local_rank = 3; - size_t local_num_cpus = 1; + const size_t node_count = 4; + const size_t local_rank = GENERATE(0, 3); - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::host_config h_cfg{node_count, local_rank}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}); - CHECK(device == td_1); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1}); + CHECK(device == md); } } @@ -269,14 +233,14 @@ class log_capture { public: log_capture(spdlog::level::level_enum level = spdlog::level::trace) { auto logger = spdlog::default_logger(); - auto ostream_info_sink = std::make_shared(oss); - ostream_info_sink->set_level(level); - logger->sinks().push_back(ostream_info_sink); + ostream_sink = std::make_shared(oss); + ostream_sink->set_level(level); + logger->sinks().push_back(ostream_sink); } ~log_capture() { auto logger = spdlog::default_logger(); - // TODO: Assert that no other sink has been pushed in the meantime + assert(*logger->sinks().rbegin() == ostream_sink); logger->sinks().pop_back(); } @@ -284,145 +248,115 @@ class log_capture { private: std::ostringstream oss; + std::shared_ptr ostream_sink; }; -TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prints expected info/warn messages", "[device-selection][msg]") { +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prints expected info/warn messages", "[device-selection]") { celerity::detail::config cfg(nullptr, nullptr); - SECTION("device_pointer is specified by the user") { + SECTION("when device pointer is specified by user") { log_capture lc; - mock_platform tp(68); - mock_device td(42, tp); - tp.set_devices({{5, tp}, {7, tp}, {9, tp}}); + mock_platform tp(68, "My platform"); + auto td = tp.create_devices(dt::gpu, type_and_name{dt::gpu, "My device"}, dt::gpu)[1]; auto device = pick_device(cfg, td, std::vector{tp}); - CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (specified by user)")); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'My platform', device 'My device' (specified by user)")); } - SECTION("CELERITY_DEVICE is set by the user") { + SECTION("when CELERITY_DEVICES is set") { log_capture lc; - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); + mock_platform_factory mpf; - mock_platform tp_1(1); - mock_device td_2(0, tp_1, sycl::info::device_type::gpu); - mock_device td_3(1, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3}); + auto [mp_0, mp_1] = mpf.create_platforms(std::nullopt, "My platform"); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::gpu, type_and_name{dt::gpu, "My device"}); - celerity::detail::device_config d_cfg{td_3.get_id(), tp_1.get_id()}; + celerity::detail::device_config d_cfg{1, 1}; celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}); - CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (set by CELERITY_DEVICES: platform 1, device 1)")); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1}); + CHECK_THAT(lc.get_log(), + Catch::Matchers::ContainsSubstring("Using platform 'My platform', device 'My device' (set by CELERITY_DEVICES: platform 1, device 1)")); } - - SECTION("pick_device selects a gpu/any per node automaticaly") { + SECTION("when automatically selecting a device") { log_capture lc; - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); + mock_platform_factory mpf; - mock_platform tp_1(1); - mock_device td_2(0, tp_1, sycl::info::device_type::gpu); - mock_device td_3(1, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3}); + auto [mp_0, mp_1] = mpf.create_platforms(std::nullopt, "My platform"); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(type_and_name{dt::gpu, "My device"}, dt::gpu); - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}); - CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'foo', device 'bar' (automatically selected platform 1, device 0)")); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1}); + CHECK_THAT( + lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'My platform', device 'My device' (automatically selected platform 1, device 0)")); } - SECTION("pick_device can't find any platform with sufficient GPUs") { + SECTION("when it can't find a platform with a sufficient number of GPUs") { log_capture lc{spdlog::level::warn}; - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - - mock_platform tp_1(1); - mock_device td_2(0, tp_1, sycl::info::device_type::gpu); - mock_device td_3(1, tp_1, sycl::info::device_type::gpu); - mock_device td_4(2, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3, td_4}); - - mock_platform tp_2(1); - mock_device td_5(0, tp_2, sycl::info::device_type::accelerator); - mock_device td_6(1, tp_2, sycl::info::device_type::accelerator); - mock_device td_7(2, tp_2, sycl::info::device_type::accelerator); - mock_device td_8(3, tp_2, sycl::info::device_type::accelerator); - tp_2.set_devices({td_5, td_6, td_7, td_8}); - - size_t node_count = 4; - size_t local_rank = 3; - size_t local_num_cpus = 1; - - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + mock_platform_factory mpf; + + auto [mp_0, mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::gpu, dt::gpu, dt::gpu); + mp_2.create_devices(dt::accelerator, dt::accelerator, dt::accelerator, dt::accelerator); + + const size_t node_count = 4; + const size_t local_rank = 3; + + celerity::detail::host_config h_cfg{node_count, local_rank}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1, tp_2}); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1, mp_2}); CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 GPU devices, and CELERITY_DEVICES not set")); } - SECTION("pick_device can't find any platform with any type of sufficient device") { + SECTION("when it can't find a platform with a sufficient number of devices of any type") { log_capture lc(spdlog::level::warn); - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); + mock_platform_factory mpf; + auto [mp_0, mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt, std::nullopt); - mock_platform tp_1(1); - mock_device td_2(0, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2}); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::gpu); + mp_2.create_devices(dt::accelerator, dt::accelerator, dt::accelerator); - mock_platform tp_2(2); - mock_device td_3(0, tp_2, sycl::info::device_type::accelerator); - mock_device td_4(1, tp_2, sycl::info::device_type::accelerator); - mock_device td_5(2, tp_2, sycl::info::device_type::accelerator); - tp_2.set_devices({td_3, td_4, td_5}); + const size_t node_count = 4; + const size_t local_rank = 3; - size_t node_count = 4; - size_t local_rank = 3; - size_t local_num_cpus = 1; - - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::host_config h_cfg{node_count, local_rank}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1, tp_2}); + auto device = pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1, mp_2}); CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 devices, and CELERITY_DEVICES not set")); } - SECTION("CELERITY_DEVICE is set with invalid platform id") { - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); + SECTION("when CELERITY_DEVICES contains an invalid platform id") { + mock_platform_factory mpf; - mock_platform tp_1(3); - mock_device td_2(0, tp_1, sycl::info::device_type::gpu); - mock_device td_3(1, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3}); + auto [mp_0, mp_1] = mpf.create_platforms(std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::gpu, dt::gpu); - celerity::detail::device_config d_cfg{tp_1.get_id(), td_3.get_id()}; + celerity::detail::device_config d_cfg{3, 0}; celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); - CHECK_THROWS_WITH(pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}), + CHECK_THROWS_WITH(pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1}), "Invalid platform id 3: Only 2 platforms available"); } - SECTION("CELERITY_DEVICE is set with invalid device id") { - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); + SECTION("when CELERITY_DEVICES contains an invalid device id") { + mock_platform_factory mpf; - mock_platform tp_1(1); - mock_device td_2(4, tp_1, sycl::info::device_type::gpu); - mock_device td_3(5, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3}); + auto [mp_0, mp_1] = mpf.create_platforms(std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::gpu, dt::gpu); - celerity::detail::device_config d_cfg{tp_1.get_id(), td_3.get_id()}; + celerity::detail::device_config d_cfg{1, 5}; celerity::detail::config_testspy::set_mock_device_cfg(cfg, d_cfg); - CHECK_THROWS_WITH(pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{tp_0, tp_1}), + CHECK_THROWS_WITH(pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{mp_0, mp_1}), "Invalid device id 5: Only 2 devices available on platform 1"); } - SECTION("pick_device couldn't find any device") { + SECTION("when no device was selected") { CHECK_THROWS_WITH( pick_device(cfg, celerity::detail::auto_select_device{}, std::vector{}), "Automatic device selection failed: No device available"); } @@ -430,8 +364,7 @@ TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device prints expected // The following test doesn't work with ComputeCpp backend, since the == operator behaves differently #if !defined(WORKAROUND_COMPUTECPP) -TEST_CASE_METHOD(celerity::test_utils::runtime_fixture, - "runtime::init/distr_queue provides an overloaded constructor with device selector, testing sycl::device", "[distr_queue][ctor][sycl]") { +TEST_CASE_METHOD(celerity::test_utils::runtime_fixture, "pick_device supports passing a device selector function", "[device-selection]") { std::vector devices = sycl::device::get_devices(); if(devices.size() < 2) { WARN("Platforms must have 2 or more devices!"); @@ -452,256 +385,141 @@ TEST_CASE_METHOD(celerity::test_utils::runtime_fixture, } #endif -TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "runtime::init/distr_queue provides an overloaded constructor with device selector, testing mock_device", - "[device-selection][ctor][mock-host-cfg]") { +TEST_CASE("pick_device correctly selects according to device selector score", "[device-selection]") { + mock_platform_factory mpf; + + auto [mp_0, mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::gpu, dt::gpu, dt::gpu, dt::gpu); + auto md = mp_2.create_devices(dt::accelerator, dt::accelerator, dt::accelerator, dt::accelerator)[1]; + + auto device_selector = [md](const mock_device& d) -> int { return d == md ? 2 : 1; }; + celerity::detail::config cfg(nullptr, nullptr); + auto device = pick_device(cfg, device_selector, std::vector{mp_0, mp_1, mp_2}); + CHECK(device == md); +} - SECTION("pick_device prefers a particular device over all") { - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - tp_0.set_info("foo_0"); - - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::gpu); - mock_device td_3(2, tp_1, sycl::info::device_type::gpu); - mock_device td_4(3, tp_1, sycl::info::device_type::gpu); - mock_device td_5(4, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3, td_4, td_5}); - tp_1.set_info("foo_1"); - - mock_platform tp_2(2); - mock_device td_6(5, tp_2, sycl::info::device_type::accelerator); - mock_device td_7(6, tp_2, sycl::info::device_type::accelerator); - mock_device td_8(7, tp_2, sycl::info::device_type::accelerator); - mock_device td_9(8, tp_2, sycl::info::device_type::accelerator); - tp_2.set_devices({td_6, td_7, td_8, td_9}); - tp_2.set_info("foo_2"); - - auto device_selector = [td_7](const mock_device& d) -> int { return d == td_7 ? 2 : 1; }; - - auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); - CHECK(device == td_7); - } +TEST_CASE("pick_device selects a unique device for each local node according to device selector score", "[device-selection]") { + log_capture lc; + mock_platform_factory mpf; - SECTION("pick_device prefers a group of devices") { - log_capture lc; - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - tp_0.set_info("foo_0"); - - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::gpu); - mock_device td_3(2, tp_1, sycl::info::device_type::gpu); - mock_device td_4(3, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3, td_4}); - tp_1.set_info("foo_1"); - - mock_platform tp_2(2); - mock_device td_5(4, tp_2, sycl::info::device_type::gpu); - mock_device td_6(5, tp_2, sycl::info::device_type::gpu); - mock_device td_7(6, tp_2, sycl::info::device_type::accelerator); - mock_device td_8(7, tp_2, sycl::info::device_type::accelerator); - mock_device td_9(8, tp_2, sycl::info::device_type::accelerator); - mock_device td_10(9, tp_2, sycl::info::device_type::accelerator); - tp_2.set_devices({td_5, td_6, td_7, td_8, td_9, td_10}); - tp_2.set_info("foo_2"); - - size_t node_count = 4; - size_t local_rank = 3; - size_t local_num_cpus = 1; - - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; - celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + auto [mp_0, mp_1] = mpf.create_platforms(std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::gpu, dt::gpu, dt::gpu); - auto device_selector = [](const mock_device& d) -> int { return d.get_type() == sycl::info::device_type::accelerator ? 2 : 1; }; + mock_platform mp_2(2, "My platform"); + auto md = mp_2.create_devices(dt::gpu, dt::gpu, dt::accelerator, dt::accelerator, dt::accelerator, type_and_name{dt::accelerator, "My device"})[5]; - auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); - CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'foo_2', device 'bar' (device selector specified: platform 2, device 3)")); - CHECK(device == td_10); - } + const size_t node_count = 4; + const size_t local_rank = 3; - SECTION("pick_device prefers prioritised device with selector with insufficient devices") { - log_capture lc(spdlog::level::warn); - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - tp_0.set_info("foo_0"); - - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2}); - tp_1.set_info("foo_1"); - - mock_platform tp_2(2); - mock_device td_3(2, tp_2, sycl::info::device_type::accelerator); - tp_2.set_devices({td_3}); - tp_2.set_info("foo_2"); - - size_t node_count = 4; - size_t local_rank = 3; - size_t local_num_cpus = 1; - - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; - celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device_selector = [td_3](const mock_device& d) -> int { return d == td_3 ? 2 : 1; }; + celerity::detail::host_config h_cfg{node_count, local_rank}; + celerity::detail::config cfg(nullptr, nullptr); + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); - CHECK_THAT( - lc.get_log(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 devices that match the specified device selector")); - CHECK(device == td_3); - } + auto device_selector = [](const mock_device& d) -> int { return d.get_type() == dt::accelerator ? 2 : 1; }; - SECTION("pick_device can choose devices across platform with warnings") { - log_capture lc(spdlog::level::warn); + auto device = pick_device(cfg, device_selector, std::vector{mp_0, mp_1, mp_2}); + CHECK_THAT( + lc.get_log(), Catch::Matchers::ContainsSubstring("Using platform 'My platform', device 'My device' (device selector specified: platform 2, device 3)")); + CHECK(device == md); +} - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - tp_0.set_info("foo_0"); - - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::accelerator); - mock_device td_3(2, tp_1, sycl::info::device_type::gpu); - mock_device td_4(3, tp_1, sycl::info::device_type::gpu); - mock_device td_5(4, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3, td_4, td_5}); - tp_1.set_info("foo_1"); - - mock_platform tp_2(2); - mock_device td_6(5, tp_2, sycl::info::device_type::gpu); - mock_device td_7(6, tp_2, sycl::info::device_type::accelerator); - mock_device td_8(7, tp_2, sycl::info::device_type::accelerator); - mock_device td_9(8, tp_2, sycl::info::device_type::accelerator); - tp_2.set_devices({td_6, td_7, td_8, td_9}); - tp_2.set_info("foo_2"); - - size_t node_count = 4; - size_t local_rank = 2; - size_t local_num_cpus = 1; - - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; - celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); +TEST_CASE("pick_device selects the highest scoring device for all nodes if an insufficient number of total devices is available", "[device-selection]") { + log_capture lc(spdlog::level::warn); + mock_platform_factory mpf; - auto device_selector = [](const mock_device& d) -> int { return d.get_type() == sycl::info::device_type::accelerator ? 2 : 1; }; + auto [mp_0, mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::gpu); + auto md = mp_2.create_devices(dt::accelerator)[0]; - auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); - INFO("Platform id" << device.get_platform().get_id() << " device id " << device.get_id()); - CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Selected devices are of different type and/or do not belong to the same platform")); - CHECK(device == td_8); - } + const size_t node_count = 4; + const size_t local_rank = 3; - SECTION("pick_device can choose different types of devices with warnings") { - log_capture lc(spdlog::level::warn); + celerity::detail::host_config h_cfg{node_count, local_rank}; + celerity::detail::config cfg(nullptr, nullptr); + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + auto device_selector = [md](const mock_device& d) -> int { return d == md ? 2 : 1; }; - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - tp_0.set_info("foo_0"); - - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::accelerator); - mock_device td_3(2, tp_1, sycl::info::device_type::gpu); - mock_device td_4(3, tp_1, sycl::info::device_type::gpu); - mock_device td_5(4, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3, td_4, td_5}); - tp_1.set_info("foo_1"); - - mock_platform tp_2(2); - mock_device td_6(5, tp_2, sycl::info::device_type::accelerator); - mock_device td_7(6, tp_2, sycl::info::device_type::gpu); - mock_device td_8(7, tp_2, sycl::info::device_type::gpu); - mock_device td_9(8, tp_2, sycl::info::device_type::gpu); - tp_2.set_devices({td_6, td_7, td_8, td_9}); - tp_2.set_info("foo_2"); - - size_t node_count = 4; - size_t local_rank = 0; - size_t local_num_cpus = 1; - - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; - celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + auto device = pick_device(cfg, device_selector, std::vector{mp_0, mp_1, mp_2}); + CHECK_THAT( + lc.get_log(), Catch::Matchers::ContainsSubstring("No suitable platform found that can provide 4 devices that match the specified device selector")); + CHECK(device == md); +} - auto device_selector = [](const mock_device& d) -> int { return d.get_type() == sycl::info::device_type::accelerator ? 2 : 1; }; +TEST_CASE("pick_device warns when highest scoring devices span multiple platforms", "[device-selection]") { + log_capture lc(spdlog::level::warn); + mock_platform_factory mpf; - auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); - INFO("Platform id" << device.get_platform().get_id() << " device id " << device.get_id()); - CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Selected devices are of different type and/or do not belong to the same platform")); - CHECK(device == td_2); - } + auto [mp_0, mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::accelerator, dt::gpu, dt::gpu, dt::gpu); + mp_2.create_devices(dt::gpu, dt::accelerator, dt::accelerator, dt::accelerator)[2]; - SECTION("pick_device can choose different types of devices with insufficient devices in platforms with warnings") { - log_capture lc(spdlog::level::warn); + const size_t node_count = 4; + const size_t local_rank = GENERATE(0, 3); - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - tp_0.set_info("foo_0"); - - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::accelerator); - mock_device td_3(2, tp_1, sycl::info::device_type::gpu); - mock_device td_4(3, tp_1, sycl::info::device_type::gpu); - mock_device td_5(4, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3, td_4, td_5}); - tp_1.set_info("foo_1"); - - mock_platform tp_2(2); - mock_device td_6(5, tp_2, sycl::info::device_type::accelerator); - mock_device td_7(6, tp_2, sycl::info::device_type::gpu); - mock_device td_8(7, tp_2, sycl::info::device_type::gpu); - tp_2.set_devices({td_6, td_7, td_8}); - tp_2.set_info("foo_2"); - - size_t node_count = 4; - size_t local_rank = 1; - size_t local_num_cpus = 1; - - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; - celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + celerity::detail::host_config h_cfg{node_count, local_rank}; + celerity::detail::config cfg(nullptr, nullptr); + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device_selector = [](const mock_device& d) -> int { return d.get_type() == dt::accelerator ? 2 : 1; }; - auto device_selector = [](const mock_device& d) -> int { return d.get_type() == sycl::info::device_type::accelerator ? 2 : 1; }; + auto device = pick_device(cfg, device_selector, std::vector{mp_0, mp_1, mp_2}); + INFO("Platform id" << device.get_platform().get_id() << " device id " << device.get_id()); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Selected devices are of different type and/or do not belong to the same platform")); - auto device = pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}); - INFO("Platform id" << device.get_platform().get_id() << " device id " << device.get_id()); - CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Selected devices are of different type and/or do not belong to the same platform")); - CHECK(device == td_6); + if(local_rank == 0) { + CHECK(device.get_platform() == mp_1); + } else { + CHECK(device.get_platform() == mp_2); } } -TEST_CASE_METHOD( - celerity::test_utils::mpi_fixture, "pick_device does not consider devices with a negative selector score", "[device-selection][msg][negative]") { +TEST_CASE("pick_device warns when highest scoring devices are of different types", "[device-selection]") { + log_capture lc(spdlog::level::warn); + mock_platform_factory mpf; + + auto [mp_0, mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + auto md = mp_1.create_devices(dt::accelerator, dt::gpu, dt::gpu, dt::gpu)[0]; + mp_2.create_devices(dt::accelerator, dt::gpu, dt::gpu, dt::gpu); + + const size_t node_count = 4; + const size_t local_rank = 0; + + celerity::detail::host_config h_cfg{node_count, local_rank}; + celerity::detail::config cfg(nullptr, nullptr); + celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); + + auto device_selector = [](const mock_device& d) -> int { return d.get_type() == dt::accelerator ? 2 : 1; }; + + auto device = pick_device(cfg, device_selector, std::vector{mp_0, mp_1, mp_2}); + INFO("Platform id" << device.get_platform().get_id() << " device id " << device.get_id()); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring("Selected devices are of different type and/or do not belong to the same platform")); + CHECK(device == md); +} + +TEST_CASE_METHOD(celerity::test_utils::mpi_fixture, "pick_device does not consider devices with a negative selector score", "[device-selection]") { celerity::detail::config cfg(nullptr, nullptr); log_capture lc; + mock_platform_factory mpf; + + auto [mp_0, mp_1, mp_2] = mpf.create_platforms(std::nullopt, std::nullopt, std::nullopt); + mp_0.create_devices(dt::cpu); + mp_1.create_devices(dt::gpu, dt::gpu, dt::gpu); + mp_2.create_devices(dt::gpu); + + const size_t node_count = 4; + const size_t local_rank = 2; - mock_platform tp_0(0); - mock_device td_1(0, tp_0, sycl::info::device_type::cpu); - tp_0.set_devices({td_1}); - tp_0.set_info("foo_0"); - - mock_platform tp_1(1); - mock_device td_2(1, tp_1, sycl::info::device_type::gpu); - mock_device td_3(2, tp_1, sycl::info::device_type::gpu); - mock_device td_4(3, tp_1, sycl::info::device_type::gpu); - mock_device td_5(4, tp_1, sycl::info::device_type::gpu); - tp_1.set_devices({td_2, td_3, td_4}); - tp_1.set_info("foo_1"); - - mock_platform tp_2(2); - mock_device td_7(5, tp_2, sycl::info::device_type::gpu); - tp_2.set_devices({td_7}); - tp_2.set_info("foo_2"); - - size_t node_count = 4; - size_t local_rank = 2; - size_t local_num_cpus = 1; - - celerity::detail::host_config h_cfg{node_count, local_rank, local_num_cpus}; + celerity::detail::host_config h_cfg{node_count, local_rank}; celerity::detail::config_testspy::set_mock_host_cfg(cfg, h_cfg); - auto device_selector = [](const mock_device& d) -> int { return d.get_type() == sycl::info::device_type::accelerator ? 1 : -1; }; + auto device_selector = [](const mock_device& d) -> int { return d.get_type() == dt::accelerator ? 1 : -1; }; CHECK_THROWS_WITH( - pick_device(cfg, device_selector, std::vector{tp_0, tp_1, tp_2}), "Device selection with device selector failed: No device available"); + pick_device(cfg, device_selector, std::vector{mp_0, mp_1, mp_2}), "Device selection with device selector failed: No device available"); }