diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e3ae2f8f..6606fdad2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Versioning](http://semver.org/spec/v2.0.0.html). ### Added +- Add new environment variable `CELERITY_PRINT_GRAPHS` to control whether task and command graphs are printed to stdout (#?) - Introduce new experimental `for_each_item` utility to iterate over a celerity range (#199) - Add new environment variables `CELERITY_HORIZON_STEP` and `CELERITY_HORIZON_MAX_PARALLELISM` to control Horizon generation (#199) - Add new `experimental::constrain_split` API to limit how a kernel can be split (#?) diff --git a/README.md b/README.md index 1f01c3206..6e31c8090 100644 --- a/README.md +++ b/README.md @@ -122,7 +122,7 @@ Celerity's runtime behavior: automatically assign a unique device to each worker on a host. - `CELERITY_PROFILE_KERNEL` controls whether SYCL queue profiling information should be queried (currently not supported when using hipSYCL). -- `CELERITY_RECORDING` enables recording of the generated tasks and commands, - which allows printing dot graphs for debugging and analysis. +- `CELERITY_PRINT_GRAPHS` controls whether task and command graphs are printed to + stdout at the end of execution. - `CELERITY_DRY_RUN_NODES` takes a number and simulates a run with that many nodes without actually executing the commands. diff --git a/include/config.h b/include/config.h index c09779949..6ae53f175 100644 --- a/include/config.h +++ b/include/config.h @@ -40,7 +40,7 @@ namespace detail { const std::optional& get_device_config() const { return m_device_cfg; } std::optional get_enable_device_profiling() const { return m_enable_device_profiling; } bool is_dry_run() const { return m_dry_run_nodes > 0; } - bool is_recording() const { return m_recording; } + bool should_print_graphs() const { return m_should_print_graphs; } int get_dry_run_nodes() const { return m_dry_run_nodes; } std::optional get_horizon_step() const { return m_horizon_step; } std::optional get_horizon_max_parallelism() const { return m_horizon_max_parallelism; } @@ -50,7 +50,7 @@ namespace detail { std::optional m_device_cfg; std::optional m_enable_device_profiling; size_t m_dry_run_nodes = 0; - bool m_recording = false; + bool m_should_print_graphs = false; std::optional m_horizon_step; std::optional m_horizon_max_parallelism; }; diff --git a/src/config.cc b/src/config.cc index 16192909c..fc5ef9602 100644 --- a/src/config.cc +++ b/src/config.cc @@ -62,7 +62,7 @@ namespace { size_t parse_validate_graph_print_max_verts(const std::string_view str) { throw env::validation_error{"Support for CELERITY_GRAPH_PRINT_MAX_VERTS has been removed with Celerity 0.5.0.\n" - "Opt into graph recording by setting CELERITY_RECORDING."}; + "Opt into graph printing by setting CELERITY_PRINT_GRAPHS=1."}; return 0; } @@ -155,7 +155,7 @@ namespace detail { pref.register_variable>("DEVICES", [this](const std::string_view str) { return parse_validate_devices(str, m_host_cfg); }); const auto env_profile_kernel = pref.register_variable("PROFILE_KERNEL", parse_validate_profile_kernel); const auto env_dry_run_nodes = pref.register_variable("DRY_RUN_NODES", parse_validate_dry_run_nodes); - const auto env_recording = pref.register_variable("RECORDING"); + const auto env_print_graphs = pref.register_variable("PRINT_GRAPHS"); constexpr int horizon_max = 1024 * 64; const auto env_horizon_step = pref.register_range("HORIZON_STEP", 1, horizon_max); const auto env_horizon_max_para = pref.register_range("HORIZON_MAX_PARALLELISM", 1, horizon_max); @@ -201,7 +201,7 @@ namespace detail { const auto has_dry_run_nodes = parsed_and_validated_envs.get(env_dry_run_nodes); if(has_dry_run_nodes) { m_dry_run_nodes = *has_dry_run_nodes; } - m_recording = parsed_and_validated_envs.get_or(env_recording, false); + m_should_print_graphs = parsed_and_validated_envs.get_or(env_print_graphs, false); m_horizon_step = parsed_and_validated_envs.get(env_horizon_step); m_horizon_max_parallelism = parsed_and_validated_envs.get(env_horizon_max_para); diff --git a/src/runtime.cc b/src/runtime.cc index 8eb65aba0..8e150037b 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -149,7 +149,7 @@ namespace detail { m_reduction_mngr = std::make_unique(); m_host_object_mngr = std::make_unique(); - if(m_cfg->is_recording()) m_task_recorder = std::make_unique(m_buffer_mngr.get()); + if(m_cfg->should_print_graphs()) m_task_recorder = std::make_unique(m_buffer_mngr.get()); task_manager::policy_set task_mngr_policy; // Merely _declaring_ an uninitialized read is legitimate as long as the kernel does not actually perform the read at runtime - this might happen in the @@ -164,7 +164,7 @@ namespace detail { m_exec = std::make_unique(m_num_nodes, m_local_nid, *m_h_queue, *m_d_queue, *m_task_mngr, *m_buffer_mngr, *m_reduction_mngr); m_cdag = std::make_unique(); - if(m_cfg->is_recording()) m_command_recorder = std::make_unique(m_task_mngr.get(), m_buffer_mngr.get()); + if(m_cfg->should_print_graphs()) m_command_recorder = std::make_unique(m_task_mngr.get(), m_buffer_mngr.get()); distributed_graph_generator::policy_set dggen_policy; // Any uninitialized read that is observed on CDAG generation was already logged on task generation, unless we have a bug. @@ -224,17 +224,17 @@ namespace detail { m_d_queue->wait(); m_h_queue->wait(); - if(spdlog::should_log(log_level::trace) && m_cfg->is_recording()) { + if(m_cfg->should_print_graphs()) { if(m_local_nid == 0) { // It's the same across all nodes assert(m_task_recorder.get() != nullptr); const auto graph_str = detail::print_task_graph(*m_task_recorder); - CELERITY_TRACE("Task graph:\n\n{}\n", graph_str); + fmt::print("\nTask graph:\n\n{}\n", graph_str); } // must be called on all nodes auto cmd_graph = gather_command_graph(); if(m_local_nid == 0) { std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Avoid racing on stdout with other nodes (funneled through mpirun) - CELERITY_TRACE("Command graph:\n\n{}\n", cmd_graph); + fmt::print("\nCommand graph:\n\n{}\n", cmd_graph); } } diff --git a/test/print_graph_tests.cc b/test/print_graph_tests.cc index 5653b5610..fdef73669 100644 --- a/test/print_graph_tests.cc +++ b/test/print_graph_tests.cc @@ -112,7 +112,7 @@ TEST_CASE("command graph printing is unchanged", "[print_graph][command-graph]") } TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer debug names show up in the generated graph", "[print_graph]") { - env::scoped_test_environment tenv(recording_enabled_env_setting); + env::scoped_test_environment tenv(print_graphs_env_setting); distr_queue q; celerity::range<1> range(16); @@ -141,8 +141,8 @@ TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer debug names show up in the } } -TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY_RECORDING is set", "[print_graph]") { - env::scoped_test_environment tenv(recording_enabled_env_setting); +TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY_PRINT_GRAPHS is set", "[print_graph]") { + env::scoped_test_environment tenv(print_graphs_env_setting); distr_queue q; celerity::range<1> range(16); diff --git a/test/runtime_tests.cc b/test/runtime_tests.cc index 27ce385ef..7f908d7ea 100644 --- a/test/runtime_tests.cc +++ b/test/runtime_tests.cc @@ -1305,7 +1305,7 @@ namespace detail { {"CELERITY_DEVICES", "1 1"}, {"CELERITY_PROFILE_KERNEL", "1"}, {"CELERITY_DRY_RUN_NODES", "4"}, - {"CELERITY_RECORDING", "true"}, + {"CELERITY_PRINT_GRAPHS", "true"}, }; const auto test_env = env::scoped_test_environment(env_map); auto cfg = config(nullptr, nullptr); @@ -1319,7 +1319,7 @@ namespace detail { REQUIRE(has_prof.has_value()); CHECK((*has_prof) == true); CHECK(cfg.get_dry_run_nodes() == 4); - CHECK(cfg.is_recording() == true); + CHECK(cfg.should_print_graphs() == true); } TEST_CASE_METHOD(test_utils::mpi_fixture, "config reports incorrect environment varibles", "[env-vars][config]") { diff --git a/test/system/distr_tests.cc b/test/system/distr_tests.cc index bed2ba300..5aef2bea8 100644 --- a/test/system/distr_tests.cc +++ b/test/system/distr_tests.cc @@ -263,7 +263,7 @@ namespace detail { } TEST_CASE_METHOD(test_utils::runtime_fixture, "generating same task graph on different nodes", "[task-graph]") { - env::scoped_test_environment tenv(recording_enabled_env_setting); + env::scoped_test_environment tenv(print_graphs_env_setting); distr_queue q; REQUIRE(runtime::get_instance().get_num_nodes() > 1); @@ -374,7 +374,7 @@ namespace detail { } TEST_CASE_METHOD(test_utils::runtime_fixture, "command graph can be collected across distributed nodes", "[print_graph]") { - env::scoped_test_environment tenv(recording_enabled_env_setting); + env::scoped_test_environment tenv(print_graphs_env_setting); int global_size = 0; MPI_Comm_size(MPI_COMM_WORLD, &global_size); diff --git a/test/test_utils.h b/test/test_utils.h index f966abf02..17c3b198f 100644 --- a/test/test_utils.h +++ b/test/test_utils.h @@ -51,7 +51,7 @@ namespace celerity { namespace detail { - const std::unordered_map recording_enabled_env_setting{{"CELERITY_RECORDING", "1"}}; + const std::unordered_map print_graphs_env_setting{{"CELERITY_PRINT_GRAPHS", "1"}}; struct runtime_testspy { static scheduler& get_schdlr(runtime& rt) { return *rt.m_schdlr; }