diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0855b1635..9e5f9ae9e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -74,9 +74,9 @@ target_link_libraries(all_tests PRIVATE test_main) set_test_target_parameters(all_tests "") # Unit benchmark executable -add_executable(benchmarks benchmarks.cc benchmark_reporters.cc) +add_executable(benchmarks benchmarks.cc system_benchmarks.cc benchmark_reporters.cc) target_link_libraries(benchmarks PRIVATE test_main) -set_test_target_parameters(benchmarks benchmarks.cc) +set_test_target_parameters(benchmarks benchmarks.cc system_benchmarks.cc) add_subdirectory(system) if(CELERITY_DETAIL_INTEGRATION_TESTING) diff --git a/test/system_benchmarks.cc b/test/system_benchmarks.cc new file mode 100644 index 000000000..6f9a74603 --- /dev/null +++ b/test/system_benchmarks.cc @@ -0,0 +1,69 @@ +#include +#include +#include +#include + +#include + +#include "test_utils.h" + +using namespace celerity; + +template +class bench_runtime_fixture : public test_utils::runtime_fixture {}; + +TEMPLATE_TEST_CASE_METHOD_SIG( + bench_runtime_fixture, "benchmark independent task pattern with N tasks", "[benchmark][system-benchmarks][indep-tasks]", ((int N), N), 100, 1000, 5000) { + constexpr size_t num_tasks = N; + constexpr size_t num_repeats = 2; + constexpr size_t items_per_task = 256; + +#ifndef NDEBUG + if(N > 100) { SKIP("Skipping larger-scale benchmark in debug build to save CI time"); } +#endif + + celerity::distr_queue queue; + celerity::buffer buffer(celerity::range<2>(items_per_task, num_tasks)); + + // initialize buffer + queue.submit([&](celerity::handler& cgh) { + celerity::accessor w{buffer, cgh, celerity::access::one_to_one{}, celerity::write_only, celerity::no_init}; + cgh.parallel_for(buffer.get_range(), [=](celerity::item<2> item) { w[item] = item.get_linear_id(); }); + }); + queue.slow_full_sync(); + + size_t bench_repeats = 0; + BENCHMARK("task generation") { + for(size_t r = 0; r < num_repeats; ++r) { + for(size_t i = 0; i < num_tasks; ++i) { + queue.submit([&](celerity::handler& cgh) { + celerity::accessor acc{buffer, cgh, + [=](celerity::chunk<1> c) { return celerity::subrange<2>(celerity::id<2>(c.offset.get(0), i), celerity::range<2>(c.range.get(0), 1)); }, + celerity::read_write}; + cgh.parallel_for(celerity::range<1>(items_per_task), [=](celerity::item<1> item) { // + acc[item[0]][i] += 1; + }); + }); + } + } + queue.slow_full_sync(); + bench_repeats++; + }; + + // check result + celerity::buffer success_buffer = true; + queue.submit([&](celerity::handler& cgh) { + celerity::accessor r{buffer, cgh, celerity::access::all{}, celerity::read_only_host_task}; + celerity::accessor succ{success_buffer, cgh, celerity::access::all{}, celerity::write_only_host_task}; + cgh.host_task(celerity::on_master_node_tag{}, [=]() { + celerity::for_each_item(buffer.get_range(), [=](celerity::item<2> item) { + size_t expected = item.get_linear_id() + (num_repeats * bench_repeats); + if(r[item] != expected) { + fmt::print("Mismatch at {}: {} != {}\n", item.get_linear_id(), r[item], expected); + succ = false; + } + }); + }); + }); + CHECK(*experimental::fence(queue, success_buffer).get() == true); +}