celerity · psalz · Dec 20, 2023 · Dec 20, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ Versioning](http://semver.org/spec/v2.0.0.html).
 
 ### Added
 
+- Add new environment variable `CELERITY_PRINT_GRAPHS` to control whether task and command graphs are logged (#197, #236)
 - Introduce new experimental `for_each_item` utility to iterate over a celerity range (#199)
 - Add new environment variables `CELERITY_HORIZON_STEP` and `CELERITY_HORIZON_MAX_PARALLELISM` to control Horizon generation (#199)
 - Add new `experimental::constrain_split` API to limit how a kernel can be split (#?)

diff --git a/README.md b/README.md
@@ -122,7 +122,7 @@ Celerity's runtime behavior:
   automatically assign a unique device to each worker on a host.
 - `CELERITY_PROFILE_KERNEL` controls whether SYCL queue profiling information
   should be queried (currently not supported when using hipSYCL).
-- `CELERITY_RECORDING` enables recording of the generated tasks and commands,
-  which allows printing dot graphs for debugging and analysis.
+- `CELERITY_PRINT_GRAPHS` controls whether task and command graphs are logged
+  at the end of execution (requires log level `info` or higher).
 - `CELERITY_DRY_RUN_NODES` takes a number and simulates a run with that many nodes
   without actually executing the commands.
diff --git a/include/config.h b/include/config.h
@@ -40,7 +40,11 @@ namespace detail {
 		const std::optional<device_config>& get_device_config() const { return m_device_cfg; }
 		std::optional<bool> get_enable_device_profiling() const { return m_enable_device_profiling; }
 		bool is_dry_run() const { return m_dry_run_nodes > 0; }
-		bool is_recording() const { return m_recording; }
+		bool should_print_graphs() const { return m_should_print_graphs; }
+		bool should_record() const {
+			// Currently only graph printing requires recording, but this might change in the future.
+			return m_should_print_graphs;
+		}
 		int get_dry_run_nodes() const { return m_dry_run_nodes; }
 		std::optional<int> get_horizon_step() const { return m_horizon_step; }
 		std::optional<int> get_horizon_max_parallelism() const { return m_horizon_max_parallelism; }
@@ -50,7 +54,7 @@ namespace detail {
 		std::optional<device_config> m_device_cfg;
 		std::optional<bool> m_enable_device_profiling;
 		size_t m_dry_run_nodes = 0;
-		bool m_recording = false;
+		bool m_should_print_graphs = false;
 		std::optional<int> m_horizon_step;
 		std::optional<int> m_horizon_max_parallelism;
 	};

diff --git a/src/config.cc b/src/config.cc
@@ -62,7 +62,7 @@ namespace {
 
 size_t parse_validate_graph_print_max_verts(const std::string_view str) {
 	throw env::validation_error{"Support for CELERITY_GRAPH_PRINT_MAX_VERTS has been removed with Celerity 0.5.0.\n"
-	                            "Opt into graph recording by setting CELERITY_RECORDING."};
+	                            "Opt into graph printing by setting CELERITY_PRINT_GRAPHS=1."};
 	return 0;
 }
 
@@ -155,7 +155,7 @@ namespace detail {
 		    pref.register_variable<std::vector<size_t>>("DEVICES", [this](const std::string_view str) { return parse_validate_devices(str, m_host_cfg); });
 		const auto env_profile_kernel = pref.register_variable<bool>("PROFILE_KERNEL", parse_validate_profile_kernel);
 		const auto env_dry_run_nodes = pref.register_variable<size_t>("DRY_RUN_NODES", parse_validate_dry_run_nodes);
-		const auto env_recording = pref.register_variable<bool>("RECORDING");
+		const auto env_print_graphs = pref.register_variable<bool>("PRINT_GRAPHS");
 		constexpr int horizon_max = 1024 * 64;
 		const auto env_horizon_step = pref.register_range<int>("HORIZON_STEP", 1, horizon_max);
 		const auto env_horizon_max_para = pref.register_range<int>("HORIZON_MAX_PARALLELISM", 1, horizon_max);
@@ -201,7 +201,7 @@ namespace detail {
 			const auto has_dry_run_nodes = parsed_and_validated_envs.get(env_dry_run_nodes);
 			if(has_dry_run_nodes) { m_dry_run_nodes = *has_dry_run_nodes; }
 
-			m_recording = parsed_and_validated_envs.get_or(env_recording, false);
+			m_should_print_graphs = parsed_and_validated_envs.get_or(env_print_graphs, false);
 			m_horizon_step = parsed_and_validated_envs.get(env_horizon_step);
 			m_horizon_max_parallelism = parsed_and_validated_envs.get(env_horizon_max_para);
 

diff --git a/src/runtime.cc b/src/runtime.cc
@@ -149,7 +149,7 @@ namespace detail {
 		m_reduction_mngr = std::make_unique<reduction_manager>();
 		m_host_object_mngr = std::make_unique<host_object_manager>();
 
-		if(m_cfg->is_recording()) m_task_recorder = std::make_unique<task_recorder>(m_buffer_mngr.get());
+		if(m_cfg->should_record()) m_task_recorder = std::make_unique<task_recorder>(m_buffer_mngr.get());
 
 		task_manager::policy_set task_mngr_policy;
 		// Merely _declaring_ an uninitialized read is legitimate as long as the kernel does not actually perform the read at runtime - this might happen in the
@@ -164,7 +164,7 @@ namespace detail {
 		m_exec = std::make_unique<executor>(m_num_nodes, m_local_nid, *m_h_queue, *m_d_queue, *m_task_mngr, *m_buffer_mngr, *m_reduction_mngr);
 
 		m_cdag = std::make_unique<command_graph>();
-		if(m_cfg->is_recording()) m_command_recorder = std::make_unique<command_recorder>(m_task_mngr.get(), m_buffer_mngr.get());
+		if(m_cfg->should_record()) m_command_recorder = std::make_unique<command_recorder>(m_task_mngr.get(), m_buffer_mngr.get());
 
 		distributed_graph_generator::policy_set dggen_policy;
 		// Any uninitialized read that is observed on CDAG generation was already logged on task generation, unless we have a bug.
@@ -224,17 +224,17 @@ namespace detail {
 		m_d_queue->wait();
 		m_h_queue->wait();
 
-		if(spdlog::should_log(log_level::trace) && m_cfg->is_recording()) {
+		if(spdlog::should_log(log_level::info) && m_cfg->should_print_graphs()) {
 			if(m_local_nid == 0) { // It's the same across all nodes
 				assert(m_task_recorder.get() != nullptr);
 				const auto graph_str = detail::print_task_graph(*m_task_recorder);
-				CELERITY_TRACE("Task graph:\n\n{}\n", graph_str);
+				CELERITY_INFO("Task graph:\n\n{}\n", graph_str);
 			}
 			// must be called on all nodes
 			auto cmd_graph = gather_command_graph();
 			if(m_local_nid == 0) {
 				std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Avoid racing on stdout with other nodes (funneled through mpirun)
-				CELERITY_TRACE("Command graph:\n\n{}\n", cmd_graph);
+				CELERITY_INFO("Command graph:\n\n{}\n", cmd_graph);
 			}
 		}
 

diff --git a/test/print_graph_tests.cc b/test/print_graph_tests.cc
@@ -112,7 +112,7 @@ TEST_CASE("command graph printing is unchanged", "[print_graph][command-graph]")
 }
 
 TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer debug names show up in the generated graph", "[print_graph]") {
-	env::scoped_test_environment tenv(recording_enabled_env_setting);
+	env::scoped_test_environment tenv(print_graphs_env_setting);
 
 	distr_queue q;
 	celerity::range<1> range(16);
@@ -141,8 +141,8 @@ TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer debug names show up in the
 	}
 }
 
-TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY_RECORDING is set", "[print_graph]") {
-	env::scoped_test_environment tenv(recording_enabled_env_setting);
+TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY_PRINT_GRAPHS is set", "[print_graph]") {
+	env::scoped_test_environment tenv(print_graphs_env_setting);
 
 	distr_queue q;
 	celerity::range<1> range(16);

diff --git a/test/runtime_tests.cc b/test/runtime_tests.cc
@@ -1305,7 +1305,7 @@ namespace detail {
 		    {"CELERITY_DEVICES", "1 1"},
 		    {"CELERITY_PROFILE_KERNEL", "1"},
 		    {"CELERITY_DRY_RUN_NODES", "4"},
-		    {"CELERITY_RECORDING", "true"},
+		    {"CELERITY_PRINT_GRAPHS", "true"},
 		};
 		const auto test_env = env::scoped_test_environment(env_map);
 		auto cfg = config(nullptr, nullptr);
@@ -1319,7 +1319,7 @@ namespace detail {
 		REQUIRE(has_prof.has_value());
 		CHECK((*has_prof) == true);
 		CHECK(cfg.get_dry_run_nodes() == 4);
-		CHECK(cfg.is_recording() == true);
+		CHECK(cfg.should_print_graphs() == true);
 	}
 
 	TEST_CASE_METHOD(test_utils::mpi_fixture, "config reports incorrect environment varibles", "[env-vars][config]") {

diff --git a/test/system/distr_tests.cc b/test/system/distr_tests.cc
@@ -138,7 +138,7 @@ namespace detail {
 	TEST_CASE_METHOD(
 	    test_utils::runtime_fixture, "runtime-shutdown graph printing works in the presence of a finished reduction", "[reductions][print_graph][smoke-test]") {
 #if CELERITY_FEATURE_SCALAR_REDUCTIONS
-		env::scoped_test_environment test_env(recording_enabled_env_setting);
+		env::scoped_test_environment test_env(print_graphs_env_setting);
 		// init runtime early so the distr_queue ctor doesn't override the log level set by log_capture
 		runtime::init(nullptr, nullptr);
 
@@ -263,7 +263,7 @@ namespace detail {
 	}
 
 	TEST_CASE_METHOD(test_utils::runtime_fixture, "generating same task graph on different nodes", "[task-graph]") {
-		env::scoped_test_environment tenv(recording_enabled_env_setting);
+		env::scoped_test_environment tenv(print_graphs_env_setting);
 		distr_queue q;
 		REQUIRE(runtime::get_instance().get_num_nodes() > 1);
 
@@ -374,7 +374,7 @@ namespace detail {
 	}
 
 	TEST_CASE_METHOD(test_utils::runtime_fixture, "command graph can be collected across distributed nodes", "[print_graph]") {
-		env::scoped_test_environment tenv(recording_enabled_env_setting);
+		env::scoped_test_environment tenv(print_graphs_env_setting);
 
 		int global_size = 0;
 		MPI_Comm_size(MPI_COMM_WORLD, &global_size);

diff --git a/test/test_utils.h b/test/test_utils.h
@@ -51,7 +51,7 @@
 namespace celerity {
 namespace detail {
 
-	const std::unordered_map<std::string, std::string> recording_enabled_env_setting{{"CELERITY_RECORDING", "1"}};
+	const std::unordered_map<std::string, std::string> print_graphs_env_setting{{"CELERITY_PRINT_GRAPHS", "1"}};
 
 	struct runtime_testspy {
 		static scheduler& get_schdlr(runtime& rt) { return *rt.m_schdlr; }