From c2853ca403a7cb24a67d2594a577e27bbedef7e8 Mon Sep 17 00:00:00 2001
From: Peter Thoman <petert@dps.uibk.ac.at>
Date: Mon, 28 Feb 2022 16:45:15 +0100
Subject: [PATCH] Added microbenchmark unit test file and benchmarks for
 intrusive_graph

---
 ci/perf/gpuc1_bench.txt | 92 +++++++++++++++++++++++++++++++++++++++++
 test/CMakeLists.txt     |  3 ++
 test/benchmarks.cc      | 59 ++++++++++++++++++++++++++
 3 files changed, 154 insertions(+)
 create mode 100644 ci/perf/gpuc1_bench.txt
 create mode 100644 test/benchmarks.cc

diff --git a/ci/perf/gpuc1_bench.txt b/ci/perf/gpuc1_bench.txt
new file mode 100644
index 000000000..fb5f3f694
--- /dev/null
+++ b/ci/perf/gpuc1_bench.txt
@@ -0,0 +1,92 @@
+[2022-03-02 13:59:51.114] [0] [info] Celerity runtime version 0.3.2 1e9fac9-dirty running on hipSYCL 0.9.1. PID = 333714, build type = release
+[2022-03-02 13:59:52.087] [0] [info] Using platform 'CUDA', device 'NVIDIA GeForce RTX 2070' (automatically selected platform 1, device 0)
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+benchmarks is a Catch v2.13.8 host application.
+Run with -? for options
+
+-------------------------------------------------------------------------------
+benchmark intrusive graph dependency handling, N=1
+-------------------------------------------------------------------------------
+../test/benchmarks.cc:51
+...............................................................................
+
+benchmark name                       samples       iterations    estimated
+                                     mean          low mean      high mean
+                                     std dev       low std dev   high std dev
+-------------------------------------------------------------------------------
+creating nodes                                 100          4961     1.9844 ms 
+                                        5.21801 ns    5.19983 ns    5.30536 ns 
+                                       0.176351 ns 0.00730106 ns   0.420381 ns 
+                                                                               
+creating and adding dependencies               100           522     2.4012 ms 
+                                        43.6724 ns    43.0861 ns    44.2722 ns 
+                                        3.03143 ns    2.93459 ns    3.37543 ns 
+                                                                               
+adding and removing dependencies               100           610       2.44 ms 
+                                        39.9986 ns    39.9105 ns     40.287 ns 
+                                       0.730811 ns   0.263142 ns     1.6246 ns 
+                                                                               
+checking for dependencies                      100         30117          0 ns 
+                                       0.826139 ns   0.824279 ns   0.834236 ns 
+                                      0.0170003 ns 0.00271225 ns  0.0400652 ns 
+                                                                               
+
+-------------------------------------------------------------------------------
+benchmark intrusive graph dependency handling, N=10
+-------------------------------------------------------------------------------
+../test/benchmarks.cc:54
+...............................................................................
+
+benchmark name                       samples       iterations    estimated
+                                     mean          low mean      high mean
+                                     std dev       low std dev   high std dev
+-------------------------------------------------------------------------------
+creating nodes                                 100           404      2.424 ms 
+                                         64.252 ns    64.0988 ns    64.8569 ns 
+                                        1.45773 ns   0.112241 ns    3.46626 ns 
+                                                                               
+creating and adding dependencies               100            42     2.4738 ms 
+                                        589.408 ns    587.877 ns     595.84 ns 
+                                        13.7171 ns    1.48863 ns    32.1303 ns 
+                                                                               
+adding and removing dependencies               100            44      2.442 ms 
+                                        572.738 ns    572.461 ns    573.511 ns 
+                                        2.19484 ns    1.00408 ns    4.73079 ns 
+                                                                               
+checking for dependencies                      100           903     2.4381 ms 
+                                        27.3515 ns    27.3091 ns    27.3985 ns 
+                                       0.227501 ns   0.199505 ns   0.258621 ns 
+                                                                               
+
+-------------------------------------------------------------------------------
+benchmark intrusive graph dependency handling, N=100
+-------------------------------------------------------------------------------
+../test/benchmarks.cc:57
+...............................................................................
+
+benchmark name                       samples       iterations    estimated
+                                     mean          low mean      high mean
+                                     std dev       low std dev   high std dev
+-------------------------------------------------------------------------------
+creating nodes                                 100            38     2.4396 ms 
+                                        643.098 ns    642.726 ns    643.452 ns 
+                                        1.84686 ns    1.65161 ns    2.07119 ns 
+                                                                               
+creating and adding dependencies               100             2     3.4124 ms 
+                                        17.1069 us    17.0971 us    17.1352 us 
+                                        78.0725 ns    34.6157 ns    169.666 ns 
+                                                                               
+adding and removing dependencies               100             3     3.2802 ms 
+                                        10.9672 us    10.9459 us    11.0629 us 
+                                         197.05 ns    25.6187 ns    466.055 ns 
+                                                                               
+checking for dependencies                      100             5     2.6525 ms 
+                                         5.3111 us    5.29436 us    5.37616 us 
+                                         142.51 ns    13.1282 ns        330 ns 
+                                                                               
+
+===============================================================================
+test cases: 3 | 3 passed
+assertions: - none -
+
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index fc34092c0..aadd67c92 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -8,6 +8,7 @@ include("${PROJECT_SOURCE_DIR}/vendor/Catch2/contrib/ParseAndAddCatchTests.cmake
 file(GLOB_RECURSE TEST_INCLUDES *.h)
 
 set(TEST_TARGETS
+  benchmarks
   runtime_tests
   runtime_deprecation_tests
   graph_generation_tests
@@ -27,6 +28,8 @@ target_link_libraries(
 
 set_property(TARGET unit_test_suite PROPERTY CXX_STANDARD 17)
 
+add_definitions("-DCATCH_CONFIG_ENABLE_BENCHMARKING")
+
 add_celerity_to_target(TARGET unit_test_suite SOURCES unit_test_suite_celerity.cc)
 
 foreach(TEST_TARGET ${TEST_TARGETS})
diff --git a/test/benchmarks.cc b/test/benchmarks.cc
new file mode 100644
index 000000000..6b9e04a44
--- /dev/null
+++ b/test/benchmarks.cc
@@ -0,0 +1,59 @@
+#include <catch2/catch.hpp>
+
+#include <intrusive_graph.h>
+
+using namespace celerity::detail;
+
+struct bench_graph_node : intrusive_graph_node<bench_graph_node> {};
+
+
+template <int N>
+void intrusive_graph_benchmark() {
+	BENCHMARK("creating nodes") {
+		bench_graph_node nodes[N];
+		return nodes[N - 1].get_pseudo_critical_path_length(); // trick the compiler
+	};
+
+	BENCHMARK("creating and adding dependencies") {
+		bench_graph_node n0;
+		bench_graph_node nodes[N];
+		for(int i = 0; i < N; ++i) {
+			n0.add_dependency({&nodes[i], dependency_kind::TRUE_DEP});
+		}
+		return n0.get_dependencies();
+	};
+
+	bench_graph_node n0;
+	bench_graph_node nodes[N];
+	BENCHMARK("adding and removing dependencies") {
+		for(int i = 0; i < N; ++i) {
+			n0.add_dependency({&nodes[i], dependency_kind::TRUE_DEP});
+		}
+		for(int i = 0; i < N; ++i) {
+			n0.remove_dependency(&nodes[i]);
+		}
+		return n0.get_dependencies();
+	};
+
+	for(int i = 0; i < N; ++i) {
+		n0.add_dependency({&nodes[i], dependency_kind::TRUE_DEP});
+	}
+	BENCHMARK("checking for dependencies") {
+		int d = 0;
+		for(int i = 0; i < N; ++i) {
+			d += n0.has_dependency(&nodes[i]) ? 1 : 0;
+		}
+		return d;
+	};
+}
+
+// try to cover the dependency counts we'll see in practice
+TEST_CASE("benchmark intrusive graph dependency handling, N=1", "[benchmark]") {
+	intrusive_graph_benchmark<1>();
+}
+TEST_CASE("benchmark intrusive graph dependency handling, N=10", "[benchmark]") {
+	intrusive_graph_benchmark<10>();
+}
+TEST_CASE("benchmark intrusive graph dependency handling, N=100", "[benchmark]") {
+	intrusive_graph_benchmark<100>();
+}