Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce new CELERITY_PRINT_GRAPHS env var #236

Merged
merged 1 commit into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Versioning](http://semver.org/spec/v2.0.0.html).

### Added

- Add new environment variable `CELERITY_PRINT_GRAPHS` to control whether task and command graphs are logged (#197, #236)
- Introduce new experimental `for_each_item` utility to iterate over a celerity range (#199)
- Add new environment variables `CELERITY_HORIZON_STEP` and `CELERITY_HORIZON_MAX_PARALLELISM` to control Horizon generation (#199)
- Add new `experimental::constrain_split` API to limit how a kernel can be split (#?)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ Celerity's runtime behavior:
automatically assign a unique device to each worker on a host.
- `CELERITY_PROFILE_KERNEL` controls whether SYCL queue profiling information
should be queried (currently not supported when using hipSYCL).
- `CELERITY_RECORDING` enables recording of the generated tasks and commands,
which allows printing dot graphs for debugging and analysis.
- `CELERITY_PRINT_GRAPHS` controls whether task and command graphs are logged
at the end of execution (requires log level `info` or higher).
- `CELERITY_DRY_RUN_NODES` takes a number and simulates a run with that many nodes
without actually executing the commands.
8 changes: 6 additions & 2 deletions include/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ namespace detail {
const std::optional<device_config>& get_device_config() const { return m_device_cfg; }
std::optional<bool> get_enable_device_profiling() const { return m_enable_device_profiling; }
bool is_dry_run() const { return m_dry_run_nodes > 0; }
bool is_recording() const { return m_recording; }
bool should_print_graphs() const { return m_should_print_graphs; }
bool should_record() const {
// Currently only graph printing requires recording, but this might change in the future.
return m_should_print_graphs;
}
int get_dry_run_nodes() const { return m_dry_run_nodes; }
std::optional<int> get_horizon_step() const { return m_horizon_step; }
std::optional<int> get_horizon_max_parallelism() const { return m_horizon_max_parallelism; }
Expand All @@ -50,7 +54,7 @@ namespace detail {
std::optional<device_config> m_device_cfg;
std::optional<bool> m_enable_device_profiling;
size_t m_dry_run_nodes = 0;
bool m_recording = false;
bool m_should_print_graphs = false;
std::optional<int> m_horizon_step;
std::optional<int> m_horizon_max_parallelism;
};
Expand Down
6 changes: 3 additions & 3 deletions src/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ namespace {

size_t parse_validate_graph_print_max_verts(const std::string_view str) {
throw env::validation_error{"Support for CELERITY_GRAPH_PRINT_MAX_VERTS has been removed with Celerity 0.5.0.\n"
"Opt into graph recording by setting CELERITY_RECORDING."};
"Opt into graph printing by setting CELERITY_PRINT_GRAPHS=1."};
return 0;
}

Expand Down Expand Up @@ -155,7 +155,7 @@ namespace detail {
pref.register_variable<std::vector<size_t>>("DEVICES", [this](const std::string_view str) { return parse_validate_devices(str, m_host_cfg); });
const auto env_profile_kernel = pref.register_variable<bool>("PROFILE_KERNEL", parse_validate_profile_kernel);
const auto env_dry_run_nodes = pref.register_variable<size_t>("DRY_RUN_NODES", parse_validate_dry_run_nodes);
const auto env_recording = pref.register_variable<bool>("RECORDING");
const auto env_print_graphs = pref.register_variable<bool>("PRINT_GRAPHS");
constexpr int horizon_max = 1024 * 64;
const auto env_horizon_step = pref.register_range<int>("HORIZON_STEP", 1, horizon_max);
const auto env_horizon_max_para = pref.register_range<int>("HORIZON_MAX_PARALLELISM", 1, horizon_max);
Expand Down Expand Up @@ -201,7 +201,7 @@ namespace detail {
const auto has_dry_run_nodes = parsed_and_validated_envs.get(env_dry_run_nodes);
if(has_dry_run_nodes) { m_dry_run_nodes = *has_dry_run_nodes; }

m_recording = parsed_and_validated_envs.get_or(env_recording, false);
m_should_print_graphs = parsed_and_validated_envs.get_or(env_print_graphs, false);
m_horizon_step = parsed_and_validated_envs.get(env_horizon_step);
m_horizon_max_parallelism = parsed_and_validated_envs.get(env_horizon_max_para);

Expand Down
10 changes: 5 additions & 5 deletions src/runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ namespace detail {
m_reduction_mngr = std::make_unique<reduction_manager>();
m_host_object_mngr = std::make_unique<host_object_manager>();

if(m_cfg->is_recording()) m_task_recorder = std::make_unique<task_recorder>(m_buffer_mngr.get());
if(m_cfg->should_record()) m_task_recorder = std::make_unique<task_recorder>(m_buffer_mngr.get());

task_manager::policy_set task_mngr_policy;
// Merely _declaring_ an uninitialized read is legitimate as long as the kernel does not actually perform the read at runtime - this might happen in the
Expand All @@ -164,7 +164,7 @@ namespace detail {
m_exec = std::make_unique<executor>(m_num_nodes, m_local_nid, *m_h_queue, *m_d_queue, *m_task_mngr, *m_buffer_mngr, *m_reduction_mngr);

m_cdag = std::make_unique<command_graph>();
if(m_cfg->is_recording()) m_command_recorder = std::make_unique<command_recorder>(m_task_mngr.get(), m_buffer_mngr.get());
if(m_cfg->should_record()) m_command_recorder = std::make_unique<command_recorder>(m_task_mngr.get(), m_buffer_mngr.get());

distributed_graph_generator::policy_set dggen_policy;
// Any uninitialized read that is observed on CDAG generation was already logged on task generation, unless we have a bug.
Expand Down Expand Up @@ -224,17 +224,17 @@ namespace detail {
m_d_queue->wait();
m_h_queue->wait();

if(spdlog::should_log(log_level::trace) && m_cfg->is_recording()) {
if(spdlog::should_log(log_level::info) && m_cfg->should_print_graphs()) {
if(m_local_nid == 0) { // It's the same across all nodes
assert(m_task_recorder.get() != nullptr);
const auto graph_str = detail::print_task_graph(*m_task_recorder);
CELERITY_TRACE("Task graph:\n\n{}\n", graph_str);
CELERITY_INFO("Task graph:\n\n{}\n", graph_str);
}
// must be called on all nodes
auto cmd_graph = gather_command_graph();
if(m_local_nid == 0) {
std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Avoid racing on stdout with other nodes (funneled through mpirun)
CELERITY_TRACE("Command graph:\n\n{}\n", cmd_graph);
CELERITY_INFO("Command graph:\n\n{}\n", cmd_graph);
}
}

Expand Down
6 changes: 3 additions & 3 deletions test/print_graph_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ TEST_CASE("command graph printing is unchanged", "[print_graph][command-graph]")
}

TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer debug names show up in the generated graph", "[print_graph]") {
env::scoped_test_environment tenv(recording_enabled_env_setting);
env::scoped_test_environment tenv(print_graphs_env_setting);

distr_queue q;
celerity::range<1> range(16);
Expand Down Expand Up @@ -141,8 +141,8 @@ TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer debug names show up in the
}
}

TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY_RECORDING is set", "[print_graph]") {
env::scoped_test_environment tenv(recording_enabled_env_setting);
TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY_PRINT_GRAPHS is set", "[print_graph]") {
env::scoped_test_environment tenv(print_graphs_env_setting);

distr_queue q;
celerity::range<1> range(16);
Expand Down
4 changes: 2 additions & 2 deletions test/runtime_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1305,7 +1305,7 @@ namespace detail {
{"CELERITY_DEVICES", "1 1"},
{"CELERITY_PROFILE_KERNEL", "1"},
{"CELERITY_DRY_RUN_NODES", "4"},
{"CELERITY_RECORDING", "true"},
{"CELERITY_PRINT_GRAPHS", "true"},
};
const auto test_env = env::scoped_test_environment(env_map);
auto cfg = config(nullptr, nullptr);
Expand All @@ -1319,7 +1319,7 @@ namespace detail {
REQUIRE(has_prof.has_value());
CHECK((*has_prof) == true);
CHECK(cfg.get_dry_run_nodes() == 4);
CHECK(cfg.is_recording() == true);
CHECK(cfg.should_print_graphs() == true);
}

TEST_CASE_METHOD(test_utils::mpi_fixture, "config reports incorrect environment varibles", "[env-vars][config]") {
Expand Down
6 changes: 3 additions & 3 deletions test/system/distr_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ namespace detail {
TEST_CASE_METHOD(
test_utils::runtime_fixture, "runtime-shutdown graph printing works in the presence of a finished reduction", "[reductions][print_graph][smoke-test]") {
#if CELERITY_FEATURE_SCALAR_REDUCTIONS
env::scoped_test_environment test_env(recording_enabled_env_setting);
env::scoped_test_environment test_env(print_graphs_env_setting);
// init runtime early so the distr_queue ctor doesn't override the log level set by log_capture
runtime::init(nullptr, nullptr);

Expand Down Expand Up @@ -263,7 +263,7 @@ namespace detail {
}

TEST_CASE_METHOD(test_utils::runtime_fixture, "generating same task graph on different nodes", "[task-graph]") {
env::scoped_test_environment tenv(recording_enabled_env_setting);
env::scoped_test_environment tenv(print_graphs_env_setting);
distr_queue q;
REQUIRE(runtime::get_instance().get_num_nodes() > 1);

Expand Down Expand Up @@ -374,7 +374,7 @@ namespace detail {
}

TEST_CASE_METHOD(test_utils::runtime_fixture, "command graph can be collected across distributed nodes", "[print_graph]") {
env::scoped_test_environment tenv(recording_enabled_env_setting);
env::scoped_test_environment tenv(print_graphs_env_setting);

int global_size = 0;
MPI_Comm_size(MPI_COMM_WORLD, &global_size);
Expand Down
2 changes: 1 addition & 1 deletion test/test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
namespace celerity {
namespace detail {

const std::unordered_map<std::string, std::string> recording_enabled_env_setting{{"CELERITY_RECORDING", "1"}};
const std::unordered_map<std::string, std::string> print_graphs_env_setting{{"CELERITY_PRINT_GRAPHS", "1"}};

struct runtime_testspy {
static scheduler& get_schdlr(runtime& rt) { return *rt.m_schdlr; }
Expand Down