Skip to content

Commit

Permalink
Introduce new CELERITY_PRINT_GRAPHS env var
Browse files Browse the repository at this point in the history
...to control whether task and command graphs are printed to stdout at
the end of execution.
  • Loading branch information
psalz committed Dec 20, 2023
1 parent e25f904 commit 2859d1d
Show file tree
Hide file tree
Showing 9 changed files with 21 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Versioning](http://semver.org/spec/v2.0.0.html).

### Added

- Add new environment variable `CELERITY_PRINT_GRAPHS` to control whether task and command graphs are printed to stdout (#?)
- Introduce new experimental `for_each_item` utility to iterate over a celerity range (#199)
- Add new environment variables `CELERITY_HORIZON_STEP` and `CELERITY_HORIZON_MAX_PARALLELISM` to control Horizon generation (#199)
- Add new `experimental::constrain_split` API to limit how a kernel can be split (#?)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ Celerity's runtime behavior:
automatically assign a unique device to each worker on a host.
- `CELERITY_PROFILE_KERNEL` controls whether SYCL queue profiling information
should be queried (currently not supported when using hipSYCL).
- `CELERITY_RECORDING` enables recording of the generated tasks and commands,
which allows printing dot graphs for debugging and analysis.
- `CELERITY_PRINT_GRAPHS` controls whether task and command graphs are printed to
stdout at the end of execution.
- `CELERITY_DRY_RUN_NODES` takes a number and simulates a run with that many nodes
without actually executing the commands.
4 changes: 2 additions & 2 deletions include/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace detail {
const std::optional<device_config>& get_device_config() const { return m_device_cfg; }
std::optional<bool> get_enable_device_profiling() const { return m_enable_device_profiling; }
bool is_dry_run() const { return m_dry_run_nodes > 0; }
bool is_recording() const { return m_recording; }
bool should_print_graphs() const { return m_should_print_graphs; }
int get_dry_run_nodes() const { return m_dry_run_nodes; }
std::optional<int> get_horizon_step() const { return m_horizon_step; }
std::optional<int> get_horizon_max_parallelism() const { return m_horizon_max_parallelism; }
Expand All @@ -50,7 +50,7 @@ namespace detail {
std::optional<device_config> m_device_cfg;
std::optional<bool> m_enable_device_profiling;
size_t m_dry_run_nodes = 0;
bool m_recording = false;
bool m_should_print_graphs = false;
std::optional<int> m_horizon_step;
std::optional<int> m_horizon_max_parallelism;
};
Expand Down
6 changes: 3 additions & 3 deletions src/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ namespace {

size_t parse_validate_graph_print_max_verts(const std::string_view str) {
throw env::validation_error{"Support for CELERITY_GRAPH_PRINT_MAX_VERTS has been removed with Celerity 0.5.0.\n"
"Opt into graph recording by setting CELERITY_RECORDING."};
"Opt into graph printing by setting CELERITY_PRINT_GRAPHS=1."};
return 0;
}

Expand Down Expand Up @@ -155,7 +155,7 @@ namespace detail {
pref.register_variable<std::vector<size_t>>("DEVICES", [this](const std::string_view str) { return parse_validate_devices(str, m_host_cfg); });
const auto env_profile_kernel = pref.register_variable<bool>("PROFILE_KERNEL", parse_validate_profile_kernel);
const auto env_dry_run_nodes = pref.register_variable<size_t>("DRY_RUN_NODES", parse_validate_dry_run_nodes);
const auto env_recording = pref.register_variable<bool>("RECORDING");
const auto env_print_graphs = pref.register_variable<bool>("PRINT_GRAPHS");
constexpr int horizon_max = 1024 * 64;
const auto env_horizon_step = pref.register_range<int>("HORIZON_STEP", 1, horizon_max);
const auto env_horizon_max_para = pref.register_range<int>("HORIZON_MAX_PARALLELISM", 1, horizon_max);
Expand Down Expand Up @@ -201,7 +201,7 @@ namespace detail {
const auto has_dry_run_nodes = parsed_and_validated_envs.get(env_dry_run_nodes);
if(has_dry_run_nodes) { m_dry_run_nodes = *has_dry_run_nodes; }

m_recording = parsed_and_validated_envs.get_or(env_recording, false);
m_should_print_graphs = parsed_and_validated_envs.get_or(env_print_graphs, false);
m_horizon_step = parsed_and_validated_envs.get(env_horizon_step);
m_horizon_max_parallelism = parsed_and_validated_envs.get(env_horizon_max_para);

Expand Down
10 changes: 5 additions & 5 deletions src/runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ namespace detail {
m_reduction_mngr = std::make_unique<reduction_manager>();
m_host_object_mngr = std::make_unique<host_object_manager>();

if(m_cfg->is_recording()) m_task_recorder = std::make_unique<task_recorder>(m_buffer_mngr.get());
if(m_cfg->should_print_graphs()) m_task_recorder = std::make_unique<task_recorder>(m_buffer_mngr.get());

task_manager::policy_set task_mngr_policy;
// Merely _declaring_ an uninitialized read is legitimate as long as the kernel does not actually perform the read at runtime - this might happen in the
Expand All @@ -164,7 +164,7 @@ namespace detail {
m_exec = std::make_unique<executor>(m_num_nodes, m_local_nid, *m_h_queue, *m_d_queue, *m_task_mngr, *m_buffer_mngr, *m_reduction_mngr);

m_cdag = std::make_unique<command_graph>();
if(m_cfg->is_recording()) m_command_recorder = std::make_unique<command_recorder>(m_task_mngr.get(), m_buffer_mngr.get());
if(m_cfg->should_print_graphs()) m_command_recorder = std::make_unique<command_recorder>(m_task_mngr.get(), m_buffer_mngr.get());

distributed_graph_generator::policy_set dggen_policy;
// Any uninitialized read that is observed on CDAG generation was already logged on task generation, unless we have a bug.
Expand Down Expand Up @@ -224,17 +224,17 @@ namespace detail {
m_d_queue->wait();
m_h_queue->wait();

if(spdlog::should_log(log_level::trace) && m_cfg->is_recording()) {
if(m_cfg->should_print_graphs()) {
if(m_local_nid == 0) { // It's the same across all nodes
assert(m_task_recorder.get() != nullptr);
const auto graph_str = detail::print_task_graph(*m_task_recorder);
CELERITY_TRACE("Task graph:\n\n{}\n", graph_str);
fmt::print("\nTask graph:\n\n{}\n", graph_str);
}
// must be called on all nodes
auto cmd_graph = gather_command_graph();
if(m_local_nid == 0) {
std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Avoid racing on stdout with other nodes (funneled through mpirun)
CELERITY_TRACE("Command graph:\n\n{}\n", cmd_graph);
fmt::print("\nCommand graph:\n\n{}\n", cmd_graph);
}
}

Expand Down
6 changes: 3 additions & 3 deletions test/print_graph_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ TEST_CASE("command graph printing is unchanged", "[print_graph][command-graph]")
}

TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer debug names show up in the generated graph", "[print_graph]") {
env::scoped_test_environment tenv(recording_enabled_env_setting);
env::scoped_test_environment tenv(print_graphs_env_setting);

distr_queue q;
celerity::range<1> range(16);
Expand Down Expand Up @@ -141,8 +141,8 @@ TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer debug names show up in the
}
}

TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY_RECORDING is set", "[print_graph]") {
env::scoped_test_environment tenv(recording_enabled_env_setting);
TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY_PRINT_GRAPHS is set", "[print_graph]") {
env::scoped_test_environment tenv(print_graphs_env_setting);

distr_queue q;
celerity::range<1> range(16);
Expand Down
4 changes: 2 additions & 2 deletions test/runtime_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1305,7 +1305,7 @@ namespace detail {
{"CELERITY_DEVICES", "1 1"},
{"CELERITY_PROFILE_KERNEL", "1"},
{"CELERITY_DRY_RUN_NODES", "4"},
{"CELERITY_RECORDING", "true"},
{"CELERITY_PRINT_GRAPHS", "true"},
};
const auto test_env = env::scoped_test_environment(env_map);
auto cfg = config(nullptr, nullptr);
Expand All @@ -1319,7 +1319,7 @@ namespace detail {
REQUIRE(has_prof.has_value());
CHECK((*has_prof) == true);
CHECK(cfg.get_dry_run_nodes() == 4);
CHECK(cfg.is_recording() == true);
CHECK(cfg.should_print_graphs() == true);
}

TEST_CASE_METHOD(test_utils::mpi_fixture, "config reports incorrect environment varibles", "[env-vars][config]") {
Expand Down
4 changes: 2 additions & 2 deletions test/system/distr_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ namespace detail {
}

TEST_CASE_METHOD(test_utils::runtime_fixture, "generating same task graph on different nodes", "[task-graph]") {
env::scoped_test_environment tenv(recording_enabled_env_setting);
env::scoped_test_environment tenv(print_graphs_env_setting);
distr_queue q;
REQUIRE(runtime::get_instance().get_num_nodes() > 1);

Expand Down Expand Up @@ -374,7 +374,7 @@ namespace detail {
}

TEST_CASE_METHOD(test_utils::runtime_fixture, "command graph can be collected across distributed nodes", "[print_graph]") {
env::scoped_test_environment tenv(recording_enabled_env_setting);
env::scoped_test_environment tenv(print_graphs_env_setting);

int global_size = 0;
MPI_Comm_size(MPI_COMM_WORLD, &global_size);
Expand Down
2 changes: 1 addition & 1 deletion test/test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
namespace celerity {
namespace detail {

const std::unordered_map<std::string, std::string> recording_enabled_env_setting{{"CELERITY_RECORDING", "1"}};
const std::unordered_map<std::string, std::string> print_graphs_env_setting{{"CELERITY_PRINT_GRAPHS", "1"}};

struct runtime_testspy {
static scheduler& get_schdlr(runtime& rt) { return *rt.m_schdlr; }
Expand Down

0 comments on commit 2859d1d

Please sign in to comment.