Skip to content

Commit

Permalink
Fix daemon suicide and preserve output files
Browse files Browse the repository at this point in the history
Correctly set parent rank so that the OOB can
correctly identify its lifeline and cause the
daemon to abort when it dies. Fix the
`--debug-daemons-file` flag so it works, and
preserve the resulting output file from cleanup.

Signed-off-by: Ralph Castain <rhc@pmix.org>
  • Loading branch information
rhc54 committed Feb 10, 2024
1 parent 618dd0a commit a87d172
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 9 deletions.
6 changes: 0 additions & 6 deletions src/mca/ess/base/ess_base_std_prted.c
Original file line number Diff line number Diff line change
Expand Up @@ -530,12 +530,6 @@ int prte_ess_base_prted_finalize(void)
signals_set = false;
}

/* cleanup */
if (NULL != log_path) {
unlink(log_path);
}


if (NULL != prte_errmgr.finalize) {
prte_errmgr.finalize();
}
Expand Down
5 changes: 3 additions & 2 deletions src/rml/rml.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -100,11 +100,12 @@ void prte_rml_open(void)
PMIX_CONSTRUCT(&prte_rml_base.posted_recvs, pmix_list_t);
PMIX_CONSTRUCT(&prte_rml_base.unmatched_msgs, pmix_list_t);
PMIX_CONSTRUCT(&prte_rml_base.children, pmix_list_t);
prte_rml_base.lifeline = PRTE_PROC_MY_PARENT->rank;

/* compute the routing tree - only thing we need to know is the
* number of daemons in the DVM */
prte_rml_compute_routing_tree();

prte_rml_base.lifeline = PRTE_PROC_MY_PARENT->rank;
}

void prte_rml_send_callback(int status, pmix_proc_t *peer,
Expand Down
3 changes: 3 additions & 0 deletions src/tools/prte/prte.c
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,9 @@ int main(int argc, char *argv[])
if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DEBUG_DAEMONS)) {
prte_debug_daemons_flag = true;
}
if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DEBUG_DAEMONS_FILE)) {
prte_debug_daemons_file_flag = true;
}
if (pmix_cmd_line_is_taken(&results, PRTE_CLI_LEAVE_SESSION_ATTACHED)) {
prte_leave_session_attached = true;
}
Expand Down
3 changes: 3 additions & 0 deletions src/tools/prted/prted.c
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,9 @@ int main(int argc, char *argv[])
if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DEBUG_DAEMONS)) {
prte_debug_daemons_flag = true;
}
if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DEBUG_DAEMONS_FILE)) {
prte_debug_daemons_file_flag = true;
}
if (pmix_cmd_line_is_taken(&results, PRTE_CLI_LEAVE_SESSION_ATTACHED)) {
prte_leave_session_attached = true;
}
Expand Down
3 changes: 2 additions & 1 deletion src/util/session_dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ static bool _check_file(const char *root, const char *path)
* - non-zero files starting with "output-"
*/
if (0 == strncmp(path, "output-", strlen("output-"))) {
fullpath = pmix_os_path(false, &fullpath, root, path, NULL);
memset(&st, 0, sizeof(struct stat));
fullpath = pmix_os_path(false, root, path, NULL);
stat(fullpath, &st);
free(fullpath);
if (0 == st.st_size) {
Expand Down

0 comments on commit a87d172

Please sign in to comment.