From c2853ca403a7cb24a67d2594a577e27bbedef7e8 Mon Sep 17 00:00:00 2001 From: Peter Thoman Date: Mon, 28 Feb 2022 16:45:15 +0100 Subject: [PATCH] Added microbenchmark unit test file and benchmarks for intrusive_graph --- ci/perf/gpuc1_bench.txt | 92 +++++++++++++++++++++++++++++++++++++++++ test/CMakeLists.txt | 3 ++ test/benchmarks.cc | 59 ++++++++++++++++++++++++++ 3 files changed, 154 insertions(+) create mode 100644 ci/perf/gpuc1_bench.txt create mode 100644 test/benchmarks.cc diff --git a/ci/perf/gpuc1_bench.txt b/ci/perf/gpuc1_bench.txt new file mode 100644 index 000000000..fb5f3f694 --- /dev/null +++ b/ci/perf/gpuc1_bench.txt @@ -0,0 +1,92 @@ +[2022-03-02 13:59:51.114] [0] [info] Celerity runtime version 0.3.2 1e9fac9-dirty running on hipSYCL 0.9.1. PID = 333714, build type = release +[2022-03-02 13:59:52.087] [0] [info] Using platform 'CUDA', device 'NVIDIA GeForce RTX 2070' (automatically selected platform 1, device 0) + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +benchmarks is a Catch v2.13.8 host application. +Run with -? for options + +------------------------------------------------------------------------------- +benchmark intrusive graph dependency handling, N=1 +------------------------------------------------------------------------------- +../test/benchmarks.cc:51 +............................................................................... + +benchmark name samples iterations estimated + mean low mean high mean + std dev low std dev high std dev +------------------------------------------------------------------------------- +creating nodes 100 4961 1.9844 ms + 5.21801 ns 5.19983 ns 5.30536 ns + 0.176351 ns 0.00730106 ns 0.420381 ns + +creating and adding dependencies 100 522 2.4012 ms + 43.6724 ns 43.0861 ns 44.2722 ns + 3.03143 ns 2.93459 ns 3.37543 ns + +adding and removing dependencies 100 610 2.44 ms + 39.9986 ns 39.9105 ns 40.287 ns + 0.730811 ns 0.263142 ns 1.6246 ns + +checking for dependencies 100 30117 0 ns + 0.826139 ns 0.824279 ns 0.834236 ns + 0.0170003 ns 0.00271225 ns 0.0400652 ns + + +------------------------------------------------------------------------------- +benchmark intrusive graph dependency handling, N=10 +------------------------------------------------------------------------------- +../test/benchmarks.cc:54 +............................................................................... + +benchmark name samples iterations estimated + mean low mean high mean + std dev low std dev high std dev +------------------------------------------------------------------------------- +creating nodes 100 404 2.424 ms + 64.252 ns 64.0988 ns 64.8569 ns + 1.45773 ns 0.112241 ns 3.46626 ns + +creating and adding dependencies 100 42 2.4738 ms + 589.408 ns 587.877 ns 595.84 ns + 13.7171 ns 1.48863 ns 32.1303 ns + +adding and removing dependencies 100 44 2.442 ms + 572.738 ns 572.461 ns 573.511 ns + 2.19484 ns 1.00408 ns 4.73079 ns + +checking for dependencies 100 903 2.4381 ms + 27.3515 ns 27.3091 ns 27.3985 ns + 0.227501 ns 0.199505 ns 0.258621 ns + + +------------------------------------------------------------------------------- +benchmark intrusive graph dependency handling, N=100 +------------------------------------------------------------------------------- +../test/benchmarks.cc:57 +............................................................................... + +benchmark name samples iterations estimated + mean low mean high mean + std dev low std dev high std dev +------------------------------------------------------------------------------- +creating nodes 100 38 2.4396 ms + 643.098 ns 642.726 ns 643.452 ns + 1.84686 ns 1.65161 ns 2.07119 ns + +creating and adding dependencies 100 2 3.4124 ms + 17.1069 us 17.0971 us 17.1352 us + 78.0725 ns 34.6157 ns 169.666 ns + +adding and removing dependencies 100 3 3.2802 ms + 10.9672 us 10.9459 us 11.0629 us + 197.05 ns 25.6187 ns 466.055 ns + +checking for dependencies 100 5 2.6525 ms + 5.3111 us 5.29436 us 5.37616 us + 142.51 ns 13.1282 ns 330 ns + + +=============================================================================== +test cases: 3 | 3 passed +assertions: - none - + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fc34092c0..aadd67c92 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -8,6 +8,7 @@ include("${PROJECT_SOURCE_DIR}/vendor/Catch2/contrib/ParseAndAddCatchTests.cmake file(GLOB_RECURSE TEST_INCLUDES *.h) set(TEST_TARGETS + benchmarks runtime_tests runtime_deprecation_tests graph_generation_tests @@ -27,6 +28,8 @@ target_link_libraries( set_property(TARGET unit_test_suite PROPERTY CXX_STANDARD 17) +add_definitions("-DCATCH_CONFIG_ENABLE_BENCHMARKING") + add_celerity_to_target(TARGET unit_test_suite SOURCES unit_test_suite_celerity.cc) foreach(TEST_TARGET ${TEST_TARGETS}) diff --git a/test/benchmarks.cc b/test/benchmarks.cc new file mode 100644 index 000000000..6b9e04a44 --- /dev/null +++ b/test/benchmarks.cc @@ -0,0 +1,59 @@ +#include + +#include + +using namespace celerity::detail; + +struct bench_graph_node : intrusive_graph_node {}; + + +template +void intrusive_graph_benchmark() { + BENCHMARK("creating nodes") { + bench_graph_node nodes[N]; + return nodes[N - 1].get_pseudo_critical_path_length(); // trick the compiler + }; + + BENCHMARK("creating and adding dependencies") { + bench_graph_node n0; + bench_graph_node nodes[N]; + for(int i = 0; i < N; ++i) { + n0.add_dependency({&nodes[i], dependency_kind::TRUE_DEP}); + } + return n0.get_dependencies(); + }; + + bench_graph_node n0; + bench_graph_node nodes[N]; + BENCHMARK("adding and removing dependencies") { + for(int i = 0; i < N; ++i) { + n0.add_dependency({&nodes[i], dependency_kind::TRUE_DEP}); + } + for(int i = 0; i < N; ++i) { + n0.remove_dependency(&nodes[i]); + } + return n0.get_dependencies(); + }; + + for(int i = 0; i < N; ++i) { + n0.add_dependency({&nodes[i], dependency_kind::TRUE_DEP}); + } + BENCHMARK("checking for dependencies") { + int d = 0; + for(int i = 0; i < N; ++i) { + d += n0.has_dependency(&nodes[i]) ? 1 : 0; + } + return d; + }; +} + +// try to cover the dependency counts we'll see in practice +TEST_CASE("benchmark intrusive graph dependency handling, N=1", "[benchmark]") { + intrusive_graph_benchmark<1>(); +} +TEST_CASE("benchmark intrusive graph dependency handling, N=10", "[benchmark]") { + intrusive_graph_benchmark<10>(); +} +TEST_CASE("benchmark intrusive graph dependency handling, N=100", "[benchmark]") { + intrusive_graph_benchmark<100>(); +}