From 8496d41d4c2ed07596d38ae12c0634bbe3949c38 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 26 Apr 2022 09:19:38 -0600 Subject: [PATCH 01/15] Update run command to newest recommended flags --- examples/01_hello_world/fenix/CMakeLists.txt | 2 +- examples/02_send_recv/fenix/CMakeLists.txt | 2 +- examples/05_subset_create/CMakeLists.txt | 2 +- examples/06_subset_createv/CMakeLists.txt | 2 +- test/failed_spares/CMakeLists.txt | 2 +- test/issend/CMakeLists.txt | 2 +- test/issend/fenix_issend_test.c | 1 - test/no_jump/CMakeLists.txt | 2 +- test/request_cancelled/CMakeLists.txt | 2 +- 9 files changed, 8 insertions(+), 9 deletions(-) diff --git a/examples/01_hello_world/fenix/CMakeLists.txt b/examples/01_hello_world/fenix/CMakeLists.txt index 88c175d..2dad662 100644 --- a/examples/01_hello_world/fenix/CMakeLists.txt +++ b/examples/01_hello_world/fenix/CMakeLists.txt @@ -16,5 +16,5 @@ if(BUILD_TESTING) add_executable(fenix_hello_world-debug fenix_hello_world.c) target_link_libraries(fenix_hello_world-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME hello_world - COMMAND mpirun --enable-recovery --omca mpi_ft_enable true -n 3 fenix_hello_world-debug "1") + COMMAND mpirun --with-ft mpi -n 3 fenix_hello_world-debug "1") endif() diff --git a/examples/02_send_recv/fenix/CMakeLists.txt b/examples/02_send_recv/fenix/CMakeLists.txt index d440e2e..aa5dc65 100644 --- a/examples/02_send_recv/fenix/CMakeLists.txt +++ b/examples/02_send_recv/fenix/CMakeLists.txt @@ -16,7 +16,7 @@ if(BUILD_TESTING) add_executable(fenix_ring-debug fenix_ring.c) target_link_libraries(fenix_ring-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME ring - COMMAND mpirun --enable-recovery --omca mpi_ft_enable true -np 5 fenix_ring-debug 1 2) + COMMAND mpirun --with-ft mpi -np 5 fenix_ring-debug 1 2) set_tests_properties(ring PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/examples/05_subset_create/CMakeLists.txt b/examples/05_subset_create/CMakeLists.txt index 0e24f29..bf2da45 100644 --- a/examples/05_subset_create/CMakeLists.txt +++ b/examples/05_subset_create/CMakeLists.txt @@ -16,7 +16,7 @@ if(BUILD_TESTING) add_executable(fenix_subset_create-debug subset_create.c) target_link_libraries(fenix_subset_create-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME subset_create - COMMAND mpirun --enable-recovery --omca mpi_ft_enable true -np 5 fenix_subset_create-debug 1) + COMMAND mpirun --with-ft mpi -np 5 fenix_subset_create-debug 1) set_tests_properties(subset_create PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/examples/06_subset_createv/CMakeLists.txt b/examples/06_subset_createv/CMakeLists.txt index 41eb16b..3a935a7 100644 --- a/examples/06_subset_createv/CMakeLists.txt +++ b/examples/06_subset_createv/CMakeLists.txt @@ -16,7 +16,7 @@ if(BUILD_TESTING) add_executable(fenix_subset_createv-debug subset_createv.c) target_link_libraries(fenix_subset_createv-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME subset_createv - COMMAND mpirun --enable-recovery --omca mpi_ft_enable true -np 5 fenix_subset_createv-debug 1) + COMMAND mpirun --with-ft mpi -np 5 fenix_subset_createv-debug 1) set_tests_properties(subset_createv PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/test/failed_spares/CMakeLists.txt b/test/failed_spares/CMakeLists.txt index be231ab..96827f3 100644 --- a/test/failed_spares/CMakeLists.txt +++ b/test/failed_spares/CMakeLists.txt @@ -12,4 +12,4 @@ add_executable(fenix_failed_spares fenix_failed_spares.c) target_link_libraries(fenix_failed_spares fenix ${MPI_C_LIBRARIES}) add_test(NAME failed_spares - COMMAND mpirun --enable-recovery --omca mpi_ft_enable true -n 6 fenix_failed_spares 3 1 3 4 ) + COMMAND mpirun --with-ft mpi -n 6 fenix_failed_spares 3 1 3 4 ) diff --git a/test/issend/CMakeLists.txt b/test/issend/CMakeLists.txt index 0566e67..c4f6918 100644 --- a/test/issend/CMakeLists.txt +++ b/test/issend/CMakeLists.txt @@ -12,4 +12,4 @@ set(CMAKE_BUILD_TYPE Debug) add_executable(fenix_issend_test fenix_issend_test.c) target_link_libraries(fenix_issend_test fenix ${MPI_C_LIBRARIES}) -add_test(NAME issend COMMAND mpirun --enable-recovery --omca mpi_ft_enable true -np 5 fenix_issend_test "1") +add_test(NAME issend COMMAND mpirun --with-ft mpi -np 5 fenix_issend_test "1") diff --git a/test/issend/fenix_issend_test.c b/test/issend/fenix_issend_test.c index 7e45e5c..0159297 100644 --- a/test/issend/fenix_issend_test.c +++ b/test/issend/fenix_issend_test.c @@ -66,7 +66,6 @@ const int kKillID = 1; int main(int argc, char **argv) { -#warning "It's a good idea to complain when not enough parameters! Should add this code to other examples too." if (argc < 2) { printf("Usage: %s <# spare ranks> \n", *argv); exit(0); diff --git a/test/no_jump/CMakeLists.txt b/test/no_jump/CMakeLists.txt index 2549189..b3258dd 100644 --- a/test/no_jump/CMakeLists.txt +++ b/test/no_jump/CMakeLists.txt @@ -12,4 +12,4 @@ set(CMAKE_BUILD_TYPE Debug) add_executable(fenix_no_jump_test fenix_no_jump_test.c) target_link_libraries(fenix_no_jump_test fenix ${MPI_C_LIBRARIES}) -add_test(NAME no_jump COMMAND mpirun --enable-recovery --omca mpi_ft_enable true -np 5 fenix_no_jump_test "1") +add_test(NAME no_jump COMMAND mpirun --with-ft mpi -np 5 fenix_no_jump_test "1") diff --git a/test/request_cancelled/CMakeLists.txt b/test/request_cancelled/CMakeLists.txt index e7d7cd8..a59af59 100644 --- a/test/request_cancelled/CMakeLists.txt +++ b/test/request_cancelled/CMakeLists.txt @@ -12,4 +12,4 @@ set(CMAKE_BUILD_TYPE Debug) add_executable(fenix_request_cancelled_test fenix_req_cancelled_test.c) target_link_libraries(fenix_request_cancelled_test fenix ${MPI_C_LIBRARIES}) -add_test(NAME request_cancelled COMMAND mpirun --enable-recovery --omca mpi_ft_enable true -np 5 fenix_request_cancelled_test "1") +add_test(NAME request_cancelled COMMAND mpirun --with-ft mpi -np 5 fenix_request_cancelled_test "1") From d254da2124e1168ab9965170f6020d75e98a2137 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 26 Apr 2022 11:20:58 -0400 Subject: [PATCH 02/15] Merge from main repo (#5) * Travis fixes (#55) Fix some travis/testing issues. Travis now pulls from ULFM master branch when it needs to rebuild ULFM. Travis has an environment variable enabling oversubscription during the tests, instead of having that on all platforms when running make test Tests that involve failure have their timeouts individually set to 1, so tests don't take 10+ seconds each w/ the default timeout of 10s Simplified travis scripts (no more .travis_helpers directory) * Revert "Travis fixes (#55)" (#56) Reverting un-reviewer PR, it was meant to be in my fork. This reverts commit a41fd3b53ea717c12eb3cfa2372fa3d137044fd8. * Update README.md * Merge updates for HCLIB (#57) * Add ability to query which processes failed * Add support for MPI_Test * Add support for testing pre-failure requests * Fix bug when ERR_PROC_FAILED/ERR_REVOKED discovered in MPI_Test * Fix MPI_Wait w/ cancelled requests * Add missing file to commit * Fix bug with MPI_STATUS_IGNORE * Fix another bug with MPI_Test * Add no-jump recovery option * Travis fixes (#2) Fix some travis/testing issues. Travis now pulls from ULFM master branch when it needs to rebuild ULFM. Travis has an environment variable enabling oversubscription during the tests, instead of having that on all platforms when running make test Tests that involve failure have their timeouts individually set to 1, so tests don't take 10+ seconds each w/ the default timeout of 10s Simplified travis scripts (no more .travis_helpers directory) * First pass at removing the request store New function, "Fenix_test_cancelled" for checking if pre-failure requests completed or were cancelled. One thing to try finding a solution for: If a failure was found during an MPI_Test, that request has already been removed from MPI internals and replaced w/ MPI_REQUEST_NULL. Fenix_test_cancelled will report that this req was completed * Implement custom errhandler This includes removing the option for comm_replace - users now must provide a comm pointer to fenix_init and cannot rely on fenix to automatically replace their input comm with the resilient comm. * Fenix comms are stack-allocated now, instead of malloced * Cleanup redundant set_errhandler calls * Fix data recovery bug * Add usage instructions to all examples/tests * Add support for MPI_Issend and MPI_Ssend (#3) Merge in Issend test Co-authored-by: mwhitlo@sandia.gov Co-authored-by: sriraj Co-authored-by: Keita Teranishi Co-authored-by: mwhitlo@sandia.gov Co-authored-by: sriraj --- README.md | 2 +- examples/02_send_recv/fenix/CMakeLists.txt | 2 +- src/fenix_process_recovery.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 27741c2..f2a2fa3 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ These instructions assume you are in your home directory. // WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY // OF SUCH DAMAGE. // -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// Authors Marc Gamell, Matthew Whitlock, Eric Valenzuela, Keita Teranishi, Manish Parashar // and Michael Heroux // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and diff --git a/examples/02_send_recv/fenix/CMakeLists.txt b/examples/02_send_recv/fenix/CMakeLists.txt index aa5dc65..365440a 100644 --- a/examples/02_send_recv/fenix/CMakeLists.txt +++ b/examples/02_send_recv/fenix/CMakeLists.txt @@ -19,4 +19,4 @@ if(BUILD_TESTING) COMMAND mpirun --with-ft mpi -np 5 fenix_ring-debug 1 2) set_tests_properties(ring PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") -endif() +endif() \ No newline at end of file diff --git a/src/fenix_process_recovery.c b/src/fenix_process_recovery.c index 269821e..5609326 100644 --- a/src/fenix_process_recovery.c +++ b/src/fenix_process_recovery.c @@ -155,8 +155,8 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha } } else { - /* No support. Setting it to print errs */ - fenix.print_unhandled = 1; + /* No support. Setting it to silent */ + fenix.print_unhandled = 0; } } } @@ -267,7 +267,7 @@ int __fenix_create_new_world() char errstr[MPI_MAX_ERROR_STRING]; MPI_Error_string(ret, errstr, &len); debug_print("MPI_Comm_split: %s\n", errstr); - } + } } return ret; @@ -432,7 +432,7 @@ int __fenix_repair_ranks() /* Assign new rank for reordering */ if (current_rank >= active_ranks) { // reorder ranks int rank_offset = ((world_size - 1) - current_rank); - + for(int fail_i = 0; fail_i < fenix.fail_world_size; fail_i++){ if(fenix.fail_world[fail_i] > current_rank) rank_offset--; } @@ -524,7 +524,7 @@ int __fenix_repair_ranks() if (current_rank >= active_ranks) { // reorder ranks int rank_offset = ((world_size - 1) - current_rank); - + for(int fail_i = 0; fail_i < fenix.fail_world_size; fail_i++){ if(fenix.fail_world[fail_i] > current_rank) rank_offset--; } From 750aeac3d8efdfba355c0cd617aa48d23de18b3f Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 26 Apr 2022 10:46:42 -0600 Subject: [PATCH 03/15] Update instructions to latest ULFM/OpenMPI recommended version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f2a2fa3..09efb60 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ These instructions assume you are in your home directory. * For example: ` git clone
` 2. Create a build directory. * For example: ` mkdir -p ~/build/fenix/ && cd ~/build/fenix/ ` -3. Specify the MPI C compiler to use. [ULFM2 Open MPI](https://bitbucket.org/icldistcomp/ulfm2) is the required version. +3. Specify the MPI C compiler to use. [Open MPI 5+](https://github.com/open-mpi/ompi/tree/v5.0.x) is the required version. * To manually indicate which compiler `cmake` should use, set the `MPICC` variable to point to it. * For example: ` export MPICC=~/install/mpi-ulfm/bin/mpicc ` * If the `MPICC` environment variable is not there, `cmake` will try to guess where the MPI implementation is. To help, make sure you include the installation directory of MPI in your `PATH`. From 553d1a6e8f580e4e69d8fe25d377fb3af5213e92 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 26 Apr 2022 11:15:34 -0600 Subject: [PATCH 04/15] Repair files from revert --- examples/01_hello_world/fenix/CMakeLists.txt | 2 +- include/fenix.h | 3 + include/fenix_comm_list.h | 2 +- include/fenix_data_group.h | 4 + include/fenix_data_member.h | 2 +- include/fenix_data_packet.h | 2 +- include/fenix_data_recovery.h | 3 +- include/fenix_f.h | 2 +- include/fenix_opt.h | 2 +- include/fenix_process_recovery.h | 2 +- include/fenix_process_recovery_global.h | 2 +- include/fenix_util.h | 4 +- src/fenix.c | 4 + src/fenix_callbacks.c | 2 +- src/fenix_comm_list.c | 2 +- src/fenix_data_policy.c | 3 +- src/fenix_data_policy_in_memory_raid.c | 222 +++++++++++++++---- src/fenix_data_recovery.c | 74 ++++++- src/fenix_mpi_override.c | 2 +- src/fenix_opt.c | 2 +- src/fenix_process_recovery.c | 15 +- src/fenix_util.c | 4 +- src/globals.c | 2 +- 23 files changed, 294 insertions(+), 68 deletions(-) diff --git a/examples/01_hello_world/fenix/CMakeLists.txt b/examples/01_hello_world/fenix/CMakeLists.txt index 10c4c0b..2dad662 100644 --- a/examples/01_hello_world/fenix/CMakeLists.txt +++ b/examples/01_hello_world/fenix/CMakeLists.txt @@ -12,7 +12,7 @@ add_executable(fenix_hello_world fenix_hello_world.c) target_link_libraries(fenix_hello_world fenix ${MPI_C_LIBRARIES}) if(BUILD_TESTING) - set(CMAKE_BUILD_TYPE Debug) + #set(CMAKE_BUILD_TYPE Debug) add_executable(fenix_hello_world-debug fenix_hello_world.c) target_link_libraries(fenix_hello_world-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME hello_world diff --git a/include/fenix.h b/include/fenix.h index 7a1e382..4d7ca67 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -181,6 +181,9 @@ int Fenix_Data_barrier(int group_id); int Fenix_Data_member_restore(int group_id, int member_id, void *target_buffer, int max_count, int time_stamp, Fenix_Data_subset* found_data); +int Fenix_Data_member_lrestore(int group_id, int member_id, void *target_buffer, + int max_count, int time_stamp, Fenix_Data_subset* found_data); + int Fenix_Data_member_restore_from_rank(int member_id, void *data, int max_count, int time_stamp, int group_id, int source_rank); diff --git a/include/fenix_comm_list.h b/include/fenix_comm_list.h index 1bd6210..c84f5c7 100644 --- a/include/fenix_comm_list.h +++ b/include/fenix_comm_list.h @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Rob Van der Wijngaart, and Michael Heroux +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/include/fenix_data_group.h b/include/fenix_data_group.h index cb37e25..67cb079 100644 --- a/include/fenix_data_group.h +++ b/include/fenix_data_group.h @@ -101,6 +101,10 @@ typedef struct __fenix_group_vtbl { void* target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found); + int (*member_lrestore)(fenix_group_t* group, int member_id, + void* target_buffer, int max_count, int time_stamp, + Fenix_Data_subset* data_found); + int (*member_restore_from_rank)(fenix_group_t* group, int member_id, void* target_buffer, int max_count, int time_stamp, int source_rank); diff --git a/include/fenix_data_member.h b/include/fenix_data_member.h index 6be2196..b37c652 100644 --- a/include/fenix_data_member.h +++ b/include/fenix_data_member.h @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// and Michael Heroux +// Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/include/fenix_data_packet.h b/include/fenix_data_packet.h index 018e9bc..372f58a 100644 --- a/include/fenix_data_packet.h +++ b/include/fenix_data_packet.h @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// and Michael Heroux +// Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/include/fenix_data_recovery.h b/include/fenix_data_recovery.h index 79d9553..856dbe5 100644 --- a/include/fenix_data_recovery.h +++ b/include/fenix_data_recovery.h @@ -107,8 +107,6 @@ typedef struct __data_entry_packet { } fenix_data_entry_packet_t; -int store_counter; - int __fenix_group_create(int, MPI_Comm, int, int, int, void*, int*); int __fenix_group_get_redundancy_policy(int, int*, int*, int*); int __fenix_member_create(int, int, void *, int, MPI_Datatype); @@ -122,6 +120,7 @@ int __fenix_data_commit(int, int *); int __fenix_data_commit_barrier(int, int *); int __fenix_data_barrier(int); int __fenix_member_restore(int, int, void *, int, int, Fenix_Data_subset*); +int __fenix_member_lrestore(int, int, void *, int, int, Fenix_Data_subset*); int __fenix_member_restore_from_rank(int, int, void *, int, int, int); int __fenix_get_number_of_members(int, int *); int __fenix_get_member_at_position(int, int *, int); diff --git a/include/fenix_f.h b/include/fenix_f.h index 69b84a6..a8f06c0 100644 --- a/include/fenix_f.h +++ b/include/fenix_f.h @@ -45,7 +45,7 @@ !// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. !// !// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -!// and Michael Heroux +!// Michael Heroux, and Matthew Whitlock !// !// Questions? Contact Keita Teranishi (knteran@sandia.gov) and !// Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/include/fenix_opt.h b/include/fenix_opt.h index 521f885..b032b02 100644 --- a/include/fenix_opt.h +++ b/include/fenix_opt.h @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// and Michael Heroux +// Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/include/fenix_process_recovery.h b/include/fenix_process_recovery.h index 90f2075..bb9d63a 100644 --- a/include/fenix_process_recovery.h +++ b/include/fenix_process_recovery.h @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Rob Van der Wijngaart, and Michael Heroux +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/include/fenix_process_recovery_global.h b/include/fenix_process_recovery_global.h index 4b7d0b5..bd06cc8 100644 --- a/include/fenix_process_recovery_global.h +++ b/include/fenix_process_recovery_global.h @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// Rob Van der Wijngaart, and Michael Heroux +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/include/fenix_util.h b/include/fenix_util.h index 1a99ca1..8f76275 100644 --- a/include/fenix_util.h +++ b/include/fenix_util.h @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// and Michael Heroux +// Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) @@ -75,7 +75,7 @@ #include #include -char *logname; +extern char *logname; #define LDEBUG(f...) {LLIND("debug",f);} #define LLIND(t,f...) {fprintf(stderr,"%s - %s (%i): %s: \n",logname,__PRETTY_FUNCTION__,getpid(),t); fprintf(stderr,f);} diff --git a/src/fenix.c b/src/fenix.c index 3590297..93f29f9 100644 --- a/src/fenix.c +++ b/src/fenix.c @@ -130,6 +130,10 @@ int Fenix_Data_member_restore(int group_id, int member_id, void *target_buffer, return __fenix_member_restore(group_id, member_id, target_buffer, max_count, time_stamp, data_found); } +int Fenix_Data_member_lrestore(int group_id, int member_id, void *target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found) { + return __fenix_member_lrestore(group_id, member_id, target_buffer, max_count, time_stamp, data_found); +} + int Fenix_Data_member_resore_from_rank(int group_id, int member_id, void *target_buffer, int max_count, int time_stamp, int source_rank) { return 0; } diff --git a/src/fenix_callbacks.c b/src/fenix_callbacks.c index f693080..885058d 100644 --- a/src/fenix_callbacks.c +++ b/src/fenix_callbacks.c @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Rob Van der Wijngaart, and Michael Heroux +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/src/fenix_comm_list.c b/src/fenix_comm_list.c index f9fe0cf..d1b56d2 100644 --- a/src/fenix_comm_list.c +++ b/src/fenix_comm_list.c @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Rob Van der Wijngaart, and Michael Heroux +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/src/fenix_data_policy.c b/src/fenix_data_policy.c index b368223..603aff1 100644 --- a/src/fenix_data_policy.c +++ b/src/fenix_data_policy.c @@ -44,7 +44,8 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Author Matthew Whitlock +// Authors Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// and Matthew Whitloc // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/src/fenix_data_policy_in_memory_raid.c b/src/fenix_data_policy_in_memory_raid.c index d9af83f..40b265d 100644 --- a/src/fenix_data_policy_in_memory_raid.c +++ b/src/fenix_data_policy_in_memory_raid.c @@ -88,6 +88,9 @@ int __imr_barrier(fenix_group_t* group); int __imr_member_restore(fenix_group_t* group, int member_id, void* target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found); +int __imr_member_lrestore(fenix_group_t* group, int member_id, + void* target_buffer, int max_count, int time_stamp, + Fenix_Data_subset* data_found); int __imr_member_restore_from_rank(fenix_group_t* group, int member_id, void* target_buffer, int max_count, int time_stamp, int source_rank); @@ -138,6 +141,7 @@ void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm com new_group->base.vtbl.snapshot_delete = *__imr_snapshot_delete; new_group->base.vtbl.barrier = *__imr_barrier; new_group->base.vtbl.member_restore = *__imr_member_restore; + new_group->base.vtbl.member_lrestore = *__imr_member_lrestore; new_group->base.vtbl.member_restore_from_rank = *__imr_member_restore_from_rank; new_group->base.vtbl.member_get_attribute = *__imr_member_get_attribute; new_group->base.vtbl.member_set_attribute = *__imr_member_set_attribute; @@ -154,16 +158,82 @@ void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm com MPI_Comm_rank(comm, &my_rank); if(new_group->raid_mode == 1){ + //Set up the person who's data I am storing as partner 0 + //Set up the person who is storing my data as partner 1 new_group->partners = (int*) malloc(sizeof(int) * 2); + + //odd-sized groups take some extra handling. + bool isOdd = ((comm_size%2) != 0); + + int remaining_size = comm_size; + if(isOdd) remaining_size -= 3; + + //We want to form groups of rank_separation*2 to pair within + int n_full_groups = remaining_size / (new_group->rank_separation*2); + + //We don't always get what we want though, one group may need to be smaller. + int mini_group_size = (remaining_size - n_full_groups*new_group->rank_separation*2)/2; - //Set up the person who's data I am storing - //We need to add comm size to the value since otherwise we might be modding a negative number, - // which is implementation-dependent behavior. - new_group->partners[0] = (comm_size + my_rank - new_group->rank_separation)%comm_size; - //Set up the person who is storing my data - new_group->partners[1] = (my_rank + new_group->rank_separation)%comm_size; - + int start_rank = mini_group_size + (isOdd?1:0); + int mid_rank = comm_size/2; //Only used when isOdd + + int end_mini_group_start = comm_size-mini_group_size-(isOdd?1:0); + int start_mini_group_start = (isOdd?1:0); + bool in_start_mini=false, in_end_mini=false; + + if(my_rank >= start_mini_group_start && my_rank < start_mini_group_start+mini_group_size){ + in_start_mini = true; + } else if(my_rank >= end_mini_group_start && my_rank < comm_size-(isOdd?1:0)){ + in_end_mini = true; + } + + //Allocate the "normal" ranks + if(my_rank >= start_rank && my_rank < end_mini_group_start && (!isOdd || my_rank != mid_rank)){ + //"effective" rank for determining which group I'm in and if I look forward or backward for a partner. + int e_rank = my_rank - start_rank; + if(isOdd && my_rank > mid_rank) --e_rank; //We skip the middle rank when isOdd + + int my_partner; + if(((e_rank/new_group->rank_separation)%2) == 0){ + //Look forward for partner. + my_partner = my_rank + new_group->rank_separation; + if(isOdd && my_rank < mid_rank && my_partner >= mid_rank) ++my_partner; + } else { + my_partner = my_rank - new_group->rank_separation; + if(isOdd && my_rank > mid_rank && my_partner <= mid_rank) --my_partner; + } + + new_group->partners[0] = my_partner; + new_group->partners[1] = my_partner; + } else if(in_start_mini) { + int e_rank = my_rank - start_mini_group_start; + int partner = end_mini_group_start + e_rank; + new_group->partners[0] = partner; + new_group->partners[1] = partner; + } else if(in_end_mini) { + int e_rank = my_rank - end_mini_group_start; + int partner = start_mini_group_start + e_rank; + new_group->partners[0] = partner; + new_group->partners[1] = partner; + } else { //Only things left are the three ranks that must be paired to handle odd-sized comms + if(my_rank == 0){ + new_group->partners[0] = comm_size-1; + new_group->partners[1] = mid_rank; + } else if(my_rank == mid_rank){ + new_group->partners[0] = 0; + new_group->partners[1] = comm_size-1; + } else if(my_rank == comm_size-1){ + new_group->partners[0] = mid_rank; + new_group->partners[1] = 0; + } else { + fprintf(stderr, "FENIX_IMR Fatal error: Rank <%d> no partner assigned, this is a bug in IMR!\n", my_rank); + *flag = FENIX_ERROR_GROUP_CREATE; + return; + } + } + + } else if(new_group->raid_mode == 5){ new_group->set_size = policy_vals[2]; new_group->partners = (int*) malloc(sizeof(int) * new_group->set_size); @@ -642,7 +712,7 @@ int __imr_member_restore(fenix_group_t* g, int member_id, int my_data_found, partner_data_found; //We need to know if both partners found their data. - //First send to partner 1 and recv from partner 0, then flip. + //First send to partner 0 and recv from partner 1, then flip. MPI_Sendrecv(&found_member, 1, MPI_INT, group->partners[0], PARTNER_STATUS_TAG, &my_data_found, 1, MPI_INT, group->partners[1], PARTNER_STATUS_TAG, group->base.comm, NULL); @@ -650,53 +720,63 @@ int __imr_member_restore(fenix_group_t* g, int member_id, &partner_data_found, 1, MPI_INT, group->partners[0], PARTNER_STATUS_TAG, group->base.comm, NULL); - if(found_member && partner_data_found){ + if(found_member && partner_data_found && my_data_found){ //I have my data, and the person who's data I am backing up has theirs. We're good to go. retval = FENIX_SUCCESS; - } else if (!found_member && !my_data_found) { - //I lost my data, and my partner 1 doesn't have a copy for me to restore from. - debug_print("ERROR Fenix_Data_member_restore: member_id <%d> does not exist at <%d> or partner <%d>\n", - member_id, group->base.current_rank, group->partners[0]); + } else if (!found_member && (!my_data_found || !partner_data_found)){ + //I lost my data, and my partner doesn't have a copy for me to restore from. + debug_print("ERROR Fenix_Data_member_restore: member_id <%d> does not exist at <%d> or partner(s) <%d> <%d>\n", + member_id, group->base.current_rank, group->partners[0], group->partners[1]); retval = FENIX_ERROR_INVALID_MEMBERID; - } else if(found_member && !partner_data_found){ - //My partner needs info on this member. This policy does nothing special w/ extra input params, so + } else if(found_member){ + //My partner(s) need info on this member. This policy does nothing special w/ extra input params, so //I can just send the basic member metadata. - __fenix_data_member_send_metadata(group->base.groupid, member_id, group->partners[0]); + if(!partner_data_found) + __fenix_data_member_send_metadata(group->base.groupid, member_id, group->partners[0]); //Now my partner will need all of the entries. First they'll need to know how many snapshots //to expect. - MPI_Send((void*) &(group->num_snapshots), 1, MPI_INT, group->partners[0], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); + if(!partner_data_found) + MPI_Send((void*) &(group->num_snapshots), 1, MPI_INT, group->partners[0], + RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); //They also need the timestamps for each snapshot, as well as the value for the next. - MPI_Send((void*)mentry->timestamp, group->num_snapshots+1, MPI_INT, group->partners[0], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); + if(!partner_data_found) + MPI_Send((void*)mentry->timestamp, group->num_snapshots+1, MPI_INT, group->partners[0], + RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ //send data region info next - __fenix_data_subset_send(mentry->data_regions + snapshot, group->partners[0], - __IMR_RECOVER_DATA_REGION_TAG ^ group->base.groupid, group->base.comm); + if(!partner_data_found) + __fenix_data_subset_send(mentry->data_regions + snapshot, group->partners[0], + __IMR_RECOVER_DATA_REGION_TAG ^ group->base.groupid, group->base.comm); - //send my data, to maintain resiliency on my data size_t size; - void* toSend = __fenix_data_subset_serialize(mentry->data_regions+snapshot, - mentry->data[snapshot], member_data.datatype_size, member_data.current_count, - &size); - MPI_Send(toSend, member_data.datatype_size*size, MPI_BYTE, group->partners[0], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); + void* toSend; + //send my data, to maintain resiliency on my data + if(!my_data_found){ + toSend = __fenix_data_subset_serialize(mentry->data_regions+snapshot, + mentry->data[snapshot], member_data.datatype_size, member_data.current_count, + &size); + MPI_Send(toSend, member_data.datatype_size*size, MPI_BYTE, group->partners[1], + RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); + free(toSend); + } //send their data - toSend = __fenix_data_subset_serialize(mentry->data_regions+snapshot, - ((char*)mentry->data[snapshot]) + member_data.datatype_size*member_data.current_count, - member_data.datatype_size, member_data.current_count, &size); - MPI_Send(toSend, member_data.datatype_size*size, MPI_BYTE, group->partners[0], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); + if(!partner_data_found){ + toSend = __fenix_data_subset_serialize(mentry->data_regions+snapshot, + ((char*)mentry->data[snapshot]) + member_data.datatype_size*member_data.current_count, + member_data.datatype_size, member_data.current_count, &size); + MPI_Send(toSend, member_data.datatype_size*size, MPI_BYTE, group->partners[0], + RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); + free(toSend); + } - free(toSend); } - } else if(!found_member && partner_data_found) { + } else if(!found_member) { //I need info on this member. fenix_member_entry_packet_t packet; __fenix_data_member_recv_metadata(group->base.groupid, group->partners[1], &packet); @@ -731,13 +811,13 @@ int __imr_member_restore(fenix_group_t* g, int member_id, if(recv_size > 0){ void* recv_buf = malloc(member_data.datatype_size * recv_size); //first recieve their data, so store in the resiliency section. - MPI_Recv(recv_buf, recv_size*member_data.datatype_size, MPI_BYTE, group->partners[1], + MPI_Recv(recv_buf, recv_size*member_data.datatype_size, MPI_BYTE, group->partners[0], RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm, NULL); __fenix_data_subset_deserialize(mentry->data_regions + snapshot, recv_buf, ((char*)mentry->data[snapshot]) + member_data.current_count*member_data.datatype_size, member_data.current_count, member_data.datatype_size); - //first recieve their data, so store in the resiliency section. + //Now receive my data. MPI_Recv(recv_buf, recv_size*member_data.datatype_size, MPI_BYTE, group->partners[1], RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm, NULL); __fenix_data_subset_deserialize(mentry->data_regions + snapshot, recv_buf, @@ -749,7 +829,7 @@ int __imr_member_restore(fenix_group_t* g, int member_id, } - recovery_locally_possible = found_member || my_data_found; + recovery_locally_possible = found_member || (my_data_found && partner_data_found); } else if (group->raid_mode == 5){ int* set_results = malloc(sizeof(int) * group->set_size); @@ -912,7 +992,7 @@ int __imr_member_restore(fenix_group_t* g, int member_id, __fenix_data_subset_init(1, data_found); //Don't try to restore if we weren't able to get the relevant data. - if(recovery_locally_possible){ + if(recovery_locally_possible && target_buffer != NULL){ data_found->specifier = __FENIX_SUBSET_EMPTY; int oldest_snapshot; @@ -956,6 +1036,70 @@ int __imr_member_restore(fenix_group_t* g, int member_id, return retval; } +int __imr_member_lrestore(fenix_group_t* g, int member_id, + void* target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found){ + int retval = -1; + + fenix_imr_group_t* group = (fenix_imr_group_t*)g; + + fenix_imr_mentry_t* mentry; + //find_mentry returns the error status. We found the member (and corresponding data) if there are no errors. + int found_member = !(__imr_find_mentry(group, member_id, &mentry)); + + if(!found_member){ + return FENIX_ERROR_INVALID_MEMBERID; + } + + int member_data_index = __fenix_search_memberid(group->base.member, member_id); + fenix_member_entry_t member_data = group->base.member->member_entry[member_data_index]; + + + + int return_found_data; + if(data_found == NULL){ + data_found = (Fenix_Data_subset*) malloc(sizeof(Fenix_Data_subset)); + return_found_data = 0; + } else { + return_found_data = 1; + } + __fenix_data_subset_init(1, data_found); + + data_found->specifier = __FENIX_SUBSET_EMPTY; + + + int oldest_snapshot; + for(oldest_snapshot = (mentry->current_head - 1); oldest_snapshot >= 0; oldest_snapshot--){ + __fenix_data_subset_merge_inplace(data_found, mentry->data_regions + oldest_snapshot); + + if(__fenix_data_subset_is_full(data_found, member_data.current_count)){ + //The snapshots have formed a full set of data, not need to add older snapshots. + break; + } + } + + //If there isn't a full set of data, don't try to pull from nonexistent snapshot. + if(oldest_snapshot == -1){ + oldest_snapshot = 0; + } + + for(int i = oldest_snapshot; i < mentry->current_head; i++){ + __fenix_data_subset_copy_data(&mentry->data_regions[i], target_buffer, + mentry->data[i], member_data.datatype_size, member_data.current_count); + } + + if(__fenix_data_subset_is_full(data_found, member_data.current_count)){ + retval = FENIX_SUCCESS; + } else { + retval = FENIX_WARNING_PARTIAL_RESTORE; + } + + //Dont forget to clear the commit buffer + mentry->data_regions[mentry->current_head].specifier = __FENIX_SUBSET_EMPTY; + + return retval; + +} + int __imr_member_restore_from_rank(fenix_group_t* group, int member_id, void* target_buffer, int max_count, int time_stamp, diff --git a/src/fenix_data_recovery.c b/src/fenix_data_recovery.c index 7c1c706..da87c30 100644 --- a/src/fenix_data_recovery.c +++ b/src/fenix_data_recovery.c @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Michael Heroux, and Matthew Whitloc +// Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) @@ -63,6 +63,7 @@ #include "fenix_util.h" #include "fenix_ext.h" +#include /** * @brief create new group or recover group data for lost processes @@ -583,12 +584,41 @@ int __fenix_data_commit_barrier(int groupid, int *timestamp) { retval = FENIX_ERROR_INVALID_GROUPID; } else { fenix_group_t *group = (fenix.data_recovery->group[group_index]); + + + //We want to make sure there aren't any revocations and also do a barrier. + //Start by disabling Fenix error handling so we don't generate any new revokations here. + int old_failure_handling = fenix.ignore_errs; + fenix.ignore_errs = 1; + + //We'll use comm_agree as a resilient barrier, which should also give time for + //any revocations to propogate + int tmp_throwaway = 1; + MPIX_Comm_agree(group->comm, &tmp_throwaway); + //Now use iprobe to check for revocations. + MPI_Status status; + int ret = MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, group->comm, + &tmp_throwaway, &status); + + fenix.ignore_errs = old_failure_handling; + + + if(ret != MPI_ERR_REVOKED){ + retval = group->vtbl.commit(group); + } - retval = group->vtbl.commit(group); - - int min_timestamp; - MPI_Allreduce( &(group->timestamp), &min_timestamp, 1, MPI_INT, MPI_MIN, group->comm ); + //Now that we've (hopefully) commited, we want to handle any errors we've + //learned about w.r.t failures or revocations. No reason to put handling those off. + if(ret != MPI_SUCCESS){ + retval = ret; + //Just re-calling should have Fenix handle things according to whatever method + //has been assigned. + MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, group->comm, + &tmp_throwaway, &status); + } + + if (timestamp != NULL) { *timestamp = group->timestamp; } @@ -631,6 +661,40 @@ int __fenix_member_restore(int groupid, int memberid, void *data, int maxcount, return retval; } +/** + * @brief + * @param group_id + * @param member_id + * @param data + * @param max_count + * @param time_stamp + */ +int __fenix_member_lrestore(int groupid, int memberid, void *data, int maxcount, int timestamp, Fenix_Data_subset* data_found) { + + int retval = FENIX_SUCCESS; + int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); + int member_index = -1; + + if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); + + + if (fenix.options.verbose == 25) { + verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, + member_index); + } + + if (group_index == -1) { + debug_print("ERROR Fenix_Data_member_lrestore: group_id <%d> does not exist\n", + groupid); + retval = FENIX_ERROR_INVALID_GROUPID; + } else { + fenix_group_t *group = (fenix.data_recovery->group[group_index]); + retval = group->vtbl.member_lrestore(group, memberid, data, maxcount, timestamp, data_found); + } + return retval; +} + /** * @brief * @param group_id diff --git a/src/fenix_mpi_override.c b/src/fenix_mpi_override.c index a3592a7..3761348 100644 --- a/src/fenix_mpi_override.c +++ b/src/fenix_mpi_override.c @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Rob Van der Wijngaart, and Michael Heroux +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/src/fenix_opt.c b/src/fenix_opt.c index 03e07f2..8d5bfcb 100644 --- a/src/fenix_opt.c +++ b/src/fenix_opt.c @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// and Michael Heroux +// Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) diff --git a/src/fenix_process_recovery.c b/src/fenix_process_recovery.c index 313de82..5609326 100644 --- a/src/fenix_process_recovery.c +++ b/src/fenix_process_recovery.c @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Rob Van der Wijngaart, and Michael Heroux +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) @@ -67,6 +67,8 @@ #include #include +#include + int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, char ***argv, int spare_ranks, int spawn, @@ -407,7 +409,6 @@ int __fenix_repair_ranks() if(fenix.role != FENIX_ROLE_INITIAL_RANK){ free(fenix.fail_world); } - fenix.fail_world = (int *) s_malloc(fenix.fail_world_size * sizeof(int)); fenix.fail_world = __fenix_get_fail_ranks(survivor_world, survivor_world_size, fenix.fail_world_size); @@ -530,8 +531,8 @@ int __fenix_repair_ranks() if (rank_offset < fenix.fail_world_size) { if (fenix.options.verbose == 2) { - verbose_print("reorder ranks; current_rank: %d -> new_rank: %d\n", - current_rank, fenix.fail_world[rank_offset]); + verbose_print("reorder ranks; current_rank: %d -> new_rank: %d (offset %d)\n", + current_rank, fenix.fail_world[rank_offset], rank_offset); } current_rank = fenix.fail_world[rank_offset]; } @@ -602,9 +603,11 @@ int* __fenix_get_fail_ranks(int *survivor_world, int survivor_world_size, int fa { qsort(survivor_world, survivor_world_size, sizeof(int), __fenix_comparator); int failed_pos = 0; + int *fail_ranks = calloc(fail_world_size, sizeof(int)); + int i; - for (i = 0; i < survivor_world_size; i++) { + for (i = 0; i < survivor_world_size + fail_world_size; i++) { if (__fenix_binary_search(survivor_world, survivor_world_size, i) != 1) { if (fenix.options.verbose == 14) { verbose_print("fail_rank: %d, fail_ranks[%d]: %d\n", i, failed_pos, @@ -753,6 +756,7 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) } switch (ret) { + case MPI_ERR_PROC_FAILED_PENDING: case MPI_ERR_PROC_FAILED: MPIX_Comm_revoke(fenix.world); MPIX_Comm_revoke(fenix.new_world); @@ -788,6 +792,7 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) #endif } + fenix.role = FENIX_ROLE_SURVIVOR_RANK; if(!fenix.finalized) { switch(fenix.resume_mode) { diff --git a/src/fenix_util.c b/src/fenix_util.c index 3b40933..b56d237 100644 --- a/src/fenix_util.c +++ b/src/fenix_util.c @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// and Michael Heroux +// Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) @@ -58,6 +58,8 @@ #include "fenix_process_recovery.h" #include "fenix_util.h" +char* logname; + /** * @brief * @param invec diff --git a/src/globals.c b/src/globals.c index e834a97..e812a08 100644 --- a/src/globals.c +++ b/src/globals.c @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// and Michael Heroux +// Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) From 446564368b93fe9f933cc447b65eaff7a440394e Mon Sep 17 00:00:00 2001 From: "mwhitlo@sandia.gov" Date: Tue, 31 May 2022 13:09:24 -0700 Subject: [PATCH 05/15] Fix recovery bugs for poorly-timed failures Mostly related to communicator state management --- include/fenix_ext.h | 6 +- src/fenix_data_group.c | 2 +- src/fenix_data_member.c | 4 +- src/fenix_process_recovery.c | 178 ++++++++++++++++++----------------- 4 files changed, 98 insertions(+), 92 deletions(-) diff --git a/include/fenix_ext.h b/include/fenix_ext.h index 785a108..fd4b1a6 100644 --- a/include/fenix_ext.h +++ b/include/fenix_ext.h @@ -90,9 +90,13 @@ typedef struct { //fenix_communicator_list_t* communicator_list; // singly linked list for Fenix resilient communicators fenix_debug_opt_t options; // This is reserved to store the user options - MPI_Comm world; // Duplicate of the MPI communicator provided by user + MPI_Comm *world; // Duplicate of the MPI communicator provided by user MPI_Comm new_world; // Global MPI communicator identical to g_world but without spare ranks MPI_Comm *user_world; // MPI communicator with repaired ranks + //Manage state of the comms. Necessary when failures happen rapidly, mussing up state + int new_world_exists, user_world_exists; + + MPI_Op agree_op; // This is reserved for the global agreement call for Fenix data recovery API diff --git a/src/fenix_data_group.c b/src/fenix_data_group.c index 7fec469..ad453aa 100644 --- a/src/fenix_data_group.c +++ b/src/fenix_data_group.c @@ -77,7 +77,7 @@ fenix_data_recovery_t * __fenix_data_recovery_init() { if (fenix.options.verbose == 41) { verbose_print("c-rank: %d, role: %d, g-count: %zu, g-size: %zu\n", - __fenix_get_current_rank(fenix.world), fenix.role, data_recovery->count, + __fenix_get_current_rank(fenix.new_world), fenix.role, data_recovery->count, data_recovery->total_size); } diff --git a/src/fenix_data_member.c b/src/fenix_data_member.c index 5cf604a..a780276 100644 --- a/src/fenix_data_member.c +++ b/src/fenix_data_member.c @@ -75,7 +75,7 @@ fenix_member_t *__fenix_data_member_init() { if (fenix.options.verbose == 42) { verbose_print("c-rank: %d, role: %d, m-count: %zu, m-size: %zu\n", - __fenix_get_current_rank(fenix.world), fenix.role, member->count, + __fenix_get_current_rank(fenix.new_world), fenix.role, member->count, member->total_size); } @@ -88,7 +88,7 @@ fenix_member_t *__fenix_data_member_init() { if (fenix.options.verbose == 42) { verbose_print("c-rank: %d, role: %d, m-memberid: %d, m-state: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role, + __fenix_get_current_rank(fenix.new_world), fenix.role, mentry->memberid, mentry->state); } } diff --git a/src/fenix_process_recovery.c b/src/fenix_process_recovery.c index 5609326..3287c08 100644 --- a/src/fenix_process_recovery.c +++ b/src/fenix_process_recovery.c @@ -82,9 +82,10 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha fenix.user_world = new_comm; MPI_Comm_create_errhandler(__fenix_test_MPI, &fenix.mpi_errhandler); - - MPI_Comm_dup(comm, &fenix.world); - PMPI_Comm_set_errhandler(fenix.world, fenix.mpi_errhandler); + + fenix.world = malloc(sizeof(MPI_Comm)); + MPI_Comm_dup(comm, fenix.world); + PMPI_Comm_set_errhandler(*fenix.world, fenix.mpi_errhandler); fenix.finalized = 0; fenix.spare_ranks = spare_ranks; @@ -123,13 +124,13 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha fenix.resume_mode = __FENIX_RESUME_AT_INIT; if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, value: %s\n", - __fenix_get_current_rank(fenix.world), fenix.role, value); + __fenix_get_current_rank(*fenix.world), fenix.role, value); } } else if (strcmp(value, "NO_JUMP") == 0) { fenix.resume_mode = __FENIX_RESUME_NO_JUMP; if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, value: %s\n", - __fenix_get_current_rank(fenix.world), fenix.role, value); + __fenix_get_current_rank(*fenix.world), fenix.role, value); } } else { @@ -145,13 +146,13 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha fenix.print_unhandled = 0; if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, UNHANDLED_MODE: %s\n", - __fenix_get_current_rank(fenix.world), fenix.role, value); + __fenix_get_current_rank(*fenix.world), fenix.role, value); } } else if (strcmp(value, "NO_JUMP") == 0) { fenix.print_unhandled = 1; if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, UNHANDLED_MODE: %s\n", - __fenix_get_current_rank(fenix.world), fenix.role, value); + __fenix_get_current_rank(*fenix.world), fenix.role, value); } } else { @@ -188,7 +189,7 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha fenix.num_inital_ranks = __fenix_get_world_size(fenix.new_world); if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, number_initial_ranks: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role, + __fenix_get_current_rank(*fenix.world), fenix.role, fenix.num_inital_ranks); } @@ -197,7 +198,7 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, number_initial_ranks: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role, + __fenix_get_current_rank(*fenix.world), fenix.role, fenix.num_inital_ranks); } } @@ -209,19 +210,21 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha int a; int myrank; MPI_Status mpi_status; - ret = PMPI_Recv(&a, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, fenix.world, + fenix.ignore_errs = 1; + ret = PMPI_Recv(&a, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, *fenix.world, &mpi_status); // listen for a failure + fenix.ignore_errs = 0; if (ret == MPI_SUCCESS) { if (fenix.options.verbose == 0) { verbose_print("Finalize the program; rank: %d, role: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role); + __fenix_get_current_rank(*fenix.world), fenix.role); } __fenix_finalize_spare(); } else { fenix.repair_result = __fenix_repair_ranks(); if (fenix.options.verbose == 0) { verbose_print("spare rank exiting from MPI_Recv - repair ranks; rank: %d, role: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role); + __fenix_get_current_rank(*fenix.world), fenix.role); } } fenix.role = FENIX_ROLE_RECOVERED_RANK; @@ -235,7 +238,7 @@ int __fenix_create_new_world() int ret; if ( __fenix_spare_rank() == 1) { - int current_rank = __fenix_get_current_rank(fenix.world); + int current_rank = __fenix_get_current_rank(*fenix.world); /*************************************************************************/ /** MPI_UNDEFINED makes the new communicator "undefined" at spare ranks **/ @@ -244,29 +247,32 @@ int __fenix_create_new_world() /*************************************************************************/ if (fenix.options.verbose == 1) { - verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(fenix.world), + verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(*fenix.world), fenix.role); } - ret = PMPI_Comm_split(fenix.world, MPI_UNDEFINED, current_rank, + ret = PMPI_Comm_split(*fenix.world, MPI_UNDEFINED, current_rank, &fenix.new_world); if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_split: %d\n", ret); } + fenix.new_world_exists = 0; //Should already be this } else { - int current_rank = __fenix_get_current_rank(fenix.world); + int current_rank = __fenix_get_current_rank(*fenix.world); if (fenix.options.verbose == 1) { - verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(fenix.world), + verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(*fenix.world), fenix.role); } - ret = PMPI_Comm_split(fenix.world, 0, current_rank, &fenix.new_world); + ret = PMPI_Comm_split(*fenix.world, 0, current_rank, &fenix.new_world); + fenix.new_world_exists = 1; if (ret != MPI_SUCCESS){ - int len; - char errstr[MPI_MAX_ERROR_STRING]; - MPI_Error_string(ret, errstr, &len); - debug_print("MPI_Comm_split: %s\n", errstr); + // int len; + // char errstr[MPI_MAX_ERROR_STRING]; + // MPI_Error_string(ret, errstr, &len); + // debug_print("MPI_Comm_split err %d: %s\n", ret, errstr); + fenix.new_world_exists = 0; } } @@ -278,7 +284,7 @@ int __fenix_repair_ranks() /*********************************************************/ /* Do not forget comm_free for broken communicators */ /*********************************************************/ - + fenix.ignore_errs = 1; int ret; int survived_flag; @@ -292,11 +298,11 @@ int __fenix_repair_ranks() int repair_success = 0; int num_try = 0; int flag_g_world_freed = 0; - MPI_Comm world_without_failures; + MPI_Comm* world_without_failures = malloc(sizeof(MPI_Comm)); while (!repair_success) { repair_success = 1; - ret = MPIX_Comm_shrink(fenix.world, &world_without_failures); + ret = MPIX_Comm_shrink(*fenix.world, world_without_failures); //if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_shrink. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; @@ -307,23 +313,25 @@ int __fenix_repair_ranks() /* Free up the storage for active process communicator */ /*********************************************************/ if ( __fenix_spare_rank() != 1) { - PMPI_Comm_free(&fenix.new_world); - PMPI_Comm_free(fenix.user_world); + if(fenix.new_world_exists) PMPI_Comm_free(&fenix.new_world); + if(fenix.user_world_exists) PMPI_Comm_free(fenix.user_world); + fenix.user_world_exists = 0; + fenix.new_world_exists = 0; } /*********************************************************/ /* Need closer look above */ /*********************************************************/ /* current_rank means the global MPI rank before failure */ - current_rank = __fenix_get_current_rank(fenix.world); - survivor_world_size = __fenix_get_world_size(world_without_failures); - world_size = __fenix_get_world_size(fenix.world); + current_rank = __fenix_get_current_rank(*fenix.world); + survivor_world_size = __fenix_get_world_size(*world_without_failures); + world_size = __fenix_get_world_size(*fenix.world); fenix.fail_world_size = world_size - survivor_world_size; if (fenix.options.verbose == 2) { verbose_print( "current_rank: %d, role: %d, world_size: %d, fail_world_size: %d, survivor_world_size: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role, world_size, + __fenix_get_current_rank(*fenix.world), fenix.role, world_size, fenix.fail_world_size, survivor_world_size); } @@ -333,7 +341,7 @@ int __fenix_repair_ranks() if (fenix.options.verbose == 2) { verbose_print( "current_rank: %d, role: %d, spare_ranks: %d, fail_world_size: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role, fenix.spare_ranks, + __fenix_get_current_rank(*fenix.world), fenix.role, fenix.spare_ranks, fenix.fail_world_size); } @@ -354,13 +362,13 @@ int __fenix_repair_ranks() survivor_world = (int *) s_malloc(survivor_world_size * sizeof(int)); ret = PMPI_Allgather(¤t_rank, 1, MPI_INT, survivor_world, 1, MPI_INT, - world_without_failures); + *world_without_failures); if (fenix.options.verbose == 2) { int index; for (index = 0; index < survivor_world_size; index++) { verbose_print("current_rank: %d, role: %d, survivor_world[%d]: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role, index, + __fenix_get_current_rank(*fenix.world), fenix.role, index, survivor_world[index]); } } @@ -369,9 +377,9 @@ int __fenix_repair_ranks() if (ret != MPI_SUCCESS) { repair_success = 0; if (ret == MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(world_without_failures); + MPIX_Comm_revoke(*world_without_failures); } - MPI_Comm_free(&world_without_failures); + MPI_Comm_free(world_without_failures); free(survivor_world); goto END_LOOP; } @@ -382,15 +390,15 @@ int __fenix_repair_ranks() } ret = PMPI_Allreduce(&survived_flag, &fenix.num_survivor_ranks, 1, - MPI_INT, MPI_SUM, world_without_failures); + MPI_INT, MPI_SUM, *world_without_failures); //if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; if (ret == MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(world_without_failures); + MPIX_Comm_revoke(*world_without_failures); } - MPI_Comm_free(&world_without_failures); + MPI_Comm_free(world_without_failures); free(survivor_world); goto END_LOOP; } @@ -402,7 +410,7 @@ int __fenix_repair_ranks() if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, recovered_ranks: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role, + __fenix_get_current_rank(*fenix.world), fenix.role, fenix.num_recovered_ranks); } @@ -425,7 +433,7 @@ int __fenix_repair_ranks() if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, active_ranks: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role, + __fenix_get_current_rank(*fenix.world), fenix.role, active_ranks); } @@ -467,14 +475,14 @@ int __fenix_repair_ranks() survivor_world = (int *) s_malloc(survivor_world_size * sizeof(int)); ret = PMPI_Allgather(¤t_rank, 1, MPI_INT, survivor_world, 1, MPI_INT, - world_without_failures); + *world_without_failures); //if (ret != MPI_SUCCESS) { debug_print("MPI_Allgather. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; if (ret == MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(world_without_failures); + MPIX_Comm_revoke(*world_without_failures); } - MPI_Comm_free(&world_without_failures); + MPI_Comm_free(world_without_failures); free(survivor_world); goto END_LOOP; } @@ -485,14 +493,14 @@ int __fenix_repair_ranks() } ret = PMPI_Allreduce(&survived_flag, &fenix.num_survivor_ranks, 1, - MPI_INT, MPI_SUM, world_without_failures); + MPI_INT, MPI_SUM, *world_without_failures); //if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; if (ret != MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(world_without_failures); + MPIX_Comm_revoke(*world_without_failures); } - MPI_Comm_free(&world_without_failures); + MPI_Comm_free(world_without_failures); free(survivor_world); goto END_LOOP; } @@ -519,7 +527,7 @@ int __fenix_repair_ranks() if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, active_ranks: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role, active_ranks); + __fenix_get_current_rank(*fenix.world), fenix.role, active_ranks); } if (current_rank >= active_ranks) { // reorder ranks @@ -544,7 +552,7 @@ int __fenix_repair_ranks() fenix.spare_ranks = fenix.spare_ranks - fenix.fail_world_size; if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, spare_ranks: %d\n", - __fenix_get_current_rank(fenix.world), fenix.role, + __fenix_get_current_rank(*fenix.world), fenix.role, fenix.spare_ranks); } } @@ -553,33 +561,26 @@ int __fenix_repair_ranks() /* Done with the global communicator */ /*********************************************************/ - if (!flag_g_world_freed) { - ret = PMPI_Comm_free(&fenix.world); - if (ret != MPI_SUCCESS) { flag_g_world_freed = 1; } - } - ret = PMPI_Comm_split(world_without_failures, 0, current_rank, &fenix.world); + MPI_Comm* swap = fenix.world; + fenix.world = world_without_failures; + world_without_failures = swap; - /* if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_split. repair_ranks\n"); } */ - if (ret != MPI_SUCCESS) { - repair_success = 0; - if (ret != MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(world_without_failures); - } - MPI_Comm_free(&world_without_failures); - goto END_LOOP; - } - ret = PMPI_Comm_free(&world_without_failures); + ret = PMPI_Comm_free(world_without_failures); /* As of 8/8/2016 */ /* Need special treatment for error handling */ - __fenix_create_new_world(); + ret = __fenix_create_new_world(); + if(ret != MPI_SUCCESS){ + repair_success = 0; + goto END_LOOP; + } - ret = PMPI_Barrier(fenix.world); + ret = PMPI_Barrier(*fenix.world); /* if (ret != MPI_SUCCESS) { debug_print("MPI_Barrier. repair_ranks\n"); } */ if (ret != MPI_SUCCESS) { repair_success = 0; if (ret != MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(fenix.world); + MPIX_Comm_revoke(*fenix.world); } } @@ -591,11 +592,13 @@ int __fenix_repair_ranks() /*******************************************************/ /* - if (__fenix_get_current_rank(fenix.world) == FENIX_ROOT) { + if (__fenix_get_current_rank(*fenix.world) == FENIX_ROOT) { LDEBUG("Fenix: communicators repaired\n"); } */ } + free(world_without_failures); + fenix.ignore_errs=0; return rt_code; } @@ -622,8 +625,8 @@ int* __fenix_get_fail_ranks(int *survivor_world, int survivor_world_size, int fa int __fenix_spare_rank() { int result = -1; - int current_rank = __fenix_get_current_rank(fenix.world); - int new_world_size = __fenix_get_world_size(fenix.world) - fenix.spare_ranks; + int current_rank = __fenix_get_current_rank(*fenix.world); + int new_world_size = __fenix_get_world_size(*fenix.world) - fenix.spare_ranks; if (current_rank >= new_world_size) { if (fenix.options.verbose == 6) { verbose_print("current_rank: %d, new_world_size: %d\n", current_rank, new_world_size); @@ -644,6 +647,7 @@ void __fenix_postinit(int *error) PMPI_Barrier(fenix.new_world); PMPI_Comm_dup(fenix.new_world, fenix.user_world); + fenix.user_world_exists = 1; if (fenix.repair_result != 0) { *error = fenix.repair_result; @@ -673,23 +677,20 @@ void __fenix_finalize() // after recovery. fenix.finalized = 1; - //We don't want to handle failures in here as normally, we just want to continue trying to finalize. - fenix.ignore_errs = 1; - int ret = MPI_Barrier( fenix.new_world ); if (ret != MPI_SUCCESS) { __fenix_finalize(); return; } - if (__fenix_get_current_rank(fenix.world) == 0) { + if (__fenix_get_current_rank(*fenix.world) == 0) { int spare_rank; - MPI_Comm_size(fenix.world, &spare_rank); + MPI_Comm_size(*fenix.world, &spare_rank); spare_rank--; int a; int i; for (i = 0; i < fenix.spare_ranks; i++) { - int ret = MPI_Send(&a, 1, MPI_INT, spare_rank, 1, fenix.world); + int ret = MPI_Send(&a, 1, MPI_INT, spare_rank, 1, *fenix.world); if (ret != MPI_SUCCESS) { __fenix_finalize(); return; @@ -698,16 +699,17 @@ void __fenix_finalize() } } - ret = MPI_Barrier(fenix.world); + ret = MPI_Barrier(*fenix.world); if (ret != MPI_SUCCESS) { __fenix_finalize(); return; } MPI_Op_free( &fenix.agree_op ); - MPI_Comm_set_errhandler( fenix.world, MPI_ERRORS_ARE_FATAL ); - MPI_Comm_free( &fenix.world ); - MPI_Comm_free( &fenix.new_world ); + MPI_Comm_set_errhandler( *fenix.world, MPI_ERRORS_ARE_FATAL ); + MPI_Comm_free( fenix.world ); + free(fenix.world); + if(fenix.new_world_exists) MPI_Comm_free( &fenix.new_world ); //It should, but just in case. Won't update because trying to free it again ought to generate an error anyway. if(fenix.role != FENIX_ROLE_INITIAL_RANK){ free(fenix.fail_world); @@ -725,12 +727,12 @@ void __fenix_finalize() void __fenix_finalize_spare() { fenix.fenix_init_flag = 0; - int ret = PMPI_Barrier(fenix.world); + int ret = PMPI_Barrier(*fenix.world); if (ret != MPI_SUCCESS) { debug_print("MPI_Barrier: %d\n", ret); } MPI_Op_free(&fenix.agree_op); - MPI_Comm_set_errhandler(fenix.world, MPI_ERRORS_ARE_FATAL); - MPI_Comm_free(&fenix.world); + MPI_Comm_set_errhandler(*fenix.world, MPI_ERRORS_ARE_FATAL); + MPI_Comm_free(fenix.world); /* Free callbacks */ __fenix_callback_destroy( fenix.callback_list ); @@ -741,7 +743,7 @@ void __fenix_finalize_spare() fenix.fenix_init_flag = 0; /* Future version do not close MPI. Jump to where Fenix_Finalize is called. */ - MPI_Finalize(); + //MPI_Finalize(); exit(0); } @@ -758,10 +760,10 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) switch (ret) { case MPI_ERR_PROC_FAILED_PENDING: case MPI_ERR_PROC_FAILED: - MPIX_Comm_revoke(fenix.world); + MPIX_Comm_revoke(*fenix.world); MPIX_Comm_revoke(fenix.new_world); - - MPIX_Comm_revoke(*fenix.user_world); + + if(fenix.user_world_exists) MPIX_Comm_revoke(*fenix.user_world); __fenix_comm_list_destroy(); @@ -785,7 +787,7 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) return; break; #ifdef MPICH - MPIX_Comm_revoke(fenix.world); + MPIX_Comm_revoke(*fenix.world); MPIX_Comm_revoke(fenix.new_world); //MPIX_Comm_revoke(*fenix.user_world); fenix.repair_result = __fenix_repair_ranks(); From eb10cca9f4bdf129ff13c7136c47bd0f1693dbdc Mon Sep 17 00:00:00 2001 From: "mwhitlo@sandia.gov" Date: Tue, 31 May 2022 14:49:44 -0700 Subject: [PATCH 06/15] Fix recovery rank placement issues --- src/fenix_process_recovery.c | 119 ++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 51 deletions(-) diff --git a/src/fenix_process_recovery.c b/src/fenix_process_recovery.c index 3287c08..6f28815 100644 --- a/src/fenix_process_recovery.c +++ b/src/fenix_process_recovery.c @@ -233,12 +233,26 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha return fenix.role; } -int __fenix_create_new_world() +int __fenix_spare_rank_within(MPI_Comm refcomm) +{ + int result = -1; + int current_rank = __fenix_get_current_rank(refcomm); + int new_world_size = __fenix_get_world_size(refcomm) - fenix.spare_ranks; + if (current_rank >= new_world_size) { + if (fenix.options.verbose == 6) { + verbose_print("current_rank: %d, new_world_size: %d\n", current_rank, new_world_size); + } + result = 1; + } + return result; +} + +int __fenix_create_new_world_from(MPI_Comm from_comm) { int ret; - if ( __fenix_spare_rank() == 1) { - int current_rank = __fenix_get_current_rank(*fenix.world); + if ( __fenix_spare_rank_within(from_comm) == 1) { + int current_rank = __fenix_get_current_rank(from_comm); /*************************************************************************/ /** MPI_UNDEFINED makes the new communicator "undefined" at spare ranks **/ @@ -247,31 +261,31 @@ int __fenix_create_new_world() /*************************************************************************/ if (fenix.options.verbose == 1) { - verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(*fenix.world), + verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(from_comm), fenix.role); } - ret = PMPI_Comm_split(*fenix.world, MPI_UNDEFINED, current_rank, + ret = PMPI_Comm_split(from_comm, MPI_UNDEFINED, current_rank, &fenix.new_world); - if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_split: %d\n", ret); } + //if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_split: %d\n", ret); } fenix.new_world_exists = 0; //Should already be this } else { - int current_rank = __fenix_get_current_rank(*fenix.world); + int current_rank = __fenix_get_current_rank(from_comm); if (fenix.options.verbose == 1) { - verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(*fenix.world), + verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(from_comm), fenix.role); } - ret = PMPI_Comm_split(*fenix.world, 0, current_rank, &fenix.new_world); + ret = PMPI_Comm_split(from_comm, 0, current_rank, &fenix.new_world); fenix.new_world_exists = 1; if (ret != MPI_SUCCESS){ - // int len; - // char errstr[MPI_MAX_ERROR_STRING]; - // MPI_Error_string(ret, errstr, &len); - // debug_print("MPI_Comm_split err %d: %s\n", ret, errstr); + //int len; + //char errstr[MPI_MAX_ERROR_STRING]; + //MPI_Error_string(ret, errstr, &len); + //debug_print("MPI_Comm_split err %d: %s\n", ret, errstr); fenix.new_world_exists = 0; } @@ -279,6 +293,10 @@ int __fenix_create_new_world() return ret; } +int __fenix_create_new_world(){ + return __fenix_create_new_world_from(*fenix.world); +} + int __fenix_repair_ranks() { /*********************************************************/ @@ -298,11 +316,11 @@ int __fenix_repair_ranks() int repair_success = 0; int num_try = 0; int flag_g_world_freed = 0; - MPI_Comm* world_without_failures = malloc(sizeof(MPI_Comm)); + MPI_Comm world_without_failures, fixed_world; while (!repair_success) { repair_success = 1; - ret = MPIX_Comm_shrink(*fenix.world, world_without_failures); + ret = MPIX_Comm_shrink(*fenix.world, &world_without_failures); //if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_shrink. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; @@ -324,7 +342,7 @@ int __fenix_repair_ranks() /* current_rank means the global MPI rank before failure */ current_rank = __fenix_get_current_rank(*fenix.world); - survivor_world_size = __fenix_get_world_size(*world_without_failures); + survivor_world_size = __fenix_get_world_size(world_without_failures); world_size = __fenix_get_world_size(*fenix.world); fenix.fail_world_size = world_size - survivor_world_size; @@ -362,7 +380,7 @@ int __fenix_repair_ranks() survivor_world = (int *) s_malloc(survivor_world_size * sizeof(int)); ret = PMPI_Allgather(¤t_rank, 1, MPI_INT, survivor_world, 1, MPI_INT, - *world_without_failures); + world_without_failures); if (fenix.options.verbose == 2) { int index; @@ -377,9 +395,9 @@ int __fenix_repair_ranks() if (ret != MPI_SUCCESS) { repair_success = 0; if (ret == MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(*world_without_failures); + MPIX_Comm_revoke(world_without_failures); } - MPI_Comm_free(world_without_failures); + MPI_Comm_free(&world_without_failures); free(survivor_world); goto END_LOOP; } @@ -390,15 +408,15 @@ int __fenix_repair_ranks() } ret = PMPI_Allreduce(&survived_flag, &fenix.num_survivor_ranks, 1, - MPI_INT, MPI_SUM, *world_without_failures); + MPI_INT, MPI_SUM, world_without_failures); //if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; if (ret == MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(*world_without_failures); + MPIX_Comm_revoke(world_without_failures); } - MPI_Comm_free(world_without_failures); + MPI_Comm_free(&world_without_failures); free(survivor_world); goto END_LOOP; } @@ -469,20 +487,19 @@ int __fenix_repair_ranks() } } else { - int active_ranks; survivor_world = (int *) s_malloc(survivor_world_size * sizeof(int)); ret = PMPI_Allgather(¤t_rank, 1, MPI_INT, survivor_world, 1, MPI_INT, - *world_without_failures); + world_without_failures); //if (ret != MPI_SUCCESS) { debug_print("MPI_Allgather. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; if (ret == MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(*world_without_failures); + MPIX_Comm_revoke(world_without_failures); } - MPI_Comm_free(world_without_failures); + MPI_Comm_free(&world_without_failures); free(survivor_world); goto END_LOOP; } @@ -493,18 +510,19 @@ int __fenix_repair_ranks() } ret = PMPI_Allreduce(&survived_flag, &fenix.num_survivor_ranks, 1, - MPI_INT, MPI_SUM, *world_without_failures); + MPI_INT, MPI_SUM, world_without_failures); //if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; if (ret != MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(*world_without_failures); + MPIX_Comm_revoke(world_without_failures); } - MPI_Comm_free(world_without_failures); + MPI_Comm_free(&world_without_failures); free(survivor_world); goto END_LOOP; } + fenix.num_inital_ranks = 0; fenix.num_recovered_ranks = fenix.fail_world_size; @@ -561,27 +579,35 @@ int __fenix_repair_ranks() /* Done with the global communicator */ /*********************************************************/ - MPI_Comm* swap = fenix.world; - fenix.world = world_without_failures; - world_without_failures = swap; + ret = PMPI_Comm_split(world_without_failures, 0, current_rank, &fixed_world); + + if (ret != MPI_SUCCESS) { + repair_success = 0; + if (ret != MPI_ERR_PROC_FAILED) { + MPIX_Comm_revoke(world_without_failures); + } + MPI_Comm_free(&world_without_failures); + goto END_LOOP; + } - ret = PMPI_Comm_free(world_without_failures); + MPI_Comm_free(&world_without_failures); /* As of 8/8/2016 */ /* Need special treatment for error handling */ - ret = __fenix_create_new_world(); + ret = __fenix_create_new_world_from(fixed_world); if(ret != MPI_SUCCESS){ repair_success = 0; + MPIX_Comm_revoke(fixed_world); + MPI_Comm_free(&fixed_world); goto END_LOOP; } - ret = PMPI_Barrier(*fenix.world); + ret = PMPI_Barrier(fixed_world); /* if (ret != MPI_SUCCESS) { debug_print("MPI_Barrier. repair_ranks\n"); } */ if (ret != MPI_SUCCESS) { repair_success = 0; - if (ret != MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(*fenix.world); - } + MPIX_Comm_revoke(fixed_world); + MPI_Comm_free(&fixed_world); } END_LOOP: @@ -597,7 +623,8 @@ int __fenix_repair_ranks() } */ } - free(world_without_failures); + + *fenix.world = fixed_world; fenix.ignore_errs=0; return rt_code; } @@ -622,18 +649,8 @@ int* __fenix_get_fail_ranks(int *survivor_world, int survivor_world_size, int fa return fail_ranks; } -int __fenix_spare_rank() -{ - int result = -1; - int current_rank = __fenix_get_current_rank(*fenix.world); - int new_world_size = __fenix_get_world_size(*fenix.world) - fenix.spare_ranks; - if (current_rank >= new_world_size) { - if (fenix.options.verbose == 6) { - verbose_print("current_rank: %d, new_world_size: %d\n", current_rank, new_world_size); - } - result = 1; - } - return result; +int __fenix_spare_rank(){ + return __fenix_spare_rank_within(*fenix.world); } void __fenix_postinit(int *error) From 49ad6fa8c4cf2cb50e7e6cae0cb38258c037844a Mon Sep 17 00:00:00 2001 From: "mwhitlo@sandia.gov" Date: Tue, 31 May 2022 15:47:50 -0700 Subject: [PATCH 07/15] Improve promises about error reporting Before, unfortunately placed errors could "overwrite" the reporting info of prior errors without that info ever making it to the user. Now, we guarantee that info at least makes it to the user's first recovery callback. IE, users will guaranteed see a role of FENIX_ROLE_INITIAL_RANK or FENIX_ROLE_RECOVERED_RANK for a process prior to seeing FENIX_ROLE_SURVIVOR_RANK. --- src/fenix_process_recovery.c | 46 +++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/fenix_process_recovery.c b/src/fenix_process_recovery.c index 6f28815..1bdeac6 100644 --- a/src/fenix_process_recovery.c +++ b/src/fenix_process_recovery.c @@ -230,6 +230,10 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha fenix.role = FENIX_ROLE_RECOVERED_RANK; } + + if(fenix.role != FENIX_ROLE_RECOVERED_RANK) MPI_Comm_dup(fenix.new_world, fenix.user_world); + fenix.user_world_exists = 1; + return fenix.role; } @@ -318,6 +322,11 @@ int __fenix_repair_ranks() int flag_g_world_freed = 0; MPI_Comm world_without_failures, fixed_world; + + /* current_rank means the global MPI rank before failure */ + current_rank = __fenix_get_current_rank(*fenix.world); + world_size = __fenix_get_world_size(*fenix.world); + while (!repair_success) { repair_success = 1; ret = MPIX_Comm_shrink(*fenix.world, &world_without_failures); @@ -340,16 +349,13 @@ int __fenix_repair_ranks() /* Need closer look above */ /*********************************************************/ - /* current_rank means the global MPI rank before failure */ - current_rank = __fenix_get_current_rank(*fenix.world); survivor_world_size = __fenix_get_world_size(world_without_failures); - world_size = __fenix_get_world_size(*fenix.world); fenix.fail_world_size = world_size - survivor_world_size; if (fenix.options.verbose == 2) { verbose_print( "current_rank: %d, role: %d, world_size: %d, fail_world_size: %d, survivor_world_size: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, world_size, + current_rank, fenix.role, world_size, fenix.fail_world_size, survivor_world_size); } @@ -359,7 +365,7 @@ int __fenix_repair_ranks() if (fenix.options.verbose == 2) { verbose_print( "current_rank: %d, role: %d, spare_ranks: %d, fail_world_size: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, fenix.spare_ranks, + current_rank, fenix.role, fenix.spare_ranks, fenix.fail_world_size); } @@ -386,7 +392,7 @@ int __fenix_repair_ranks() int index; for (index = 0; index < survivor_world_size; index++) { verbose_print("current_rank: %d, role: %d, survivor_world[%d]: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, index, + current_rank, fenix.role, index, survivor_world[index]); } } @@ -428,7 +434,7 @@ int __fenix_repair_ranks() if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, recovered_ranks: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, + current_rank, fenix.role, fenix.num_recovered_ranks); } @@ -451,7 +457,7 @@ int __fenix_repair_ranks() if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, active_ranks: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, + current_rank, fenix.role, active_ranks); } @@ -545,7 +551,7 @@ int __fenix_repair_ranks() if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, active_ranks: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, active_ranks); + current_rank, fenix.role, active_ranks); } if (current_rank >= active_ranks) { // reorder ranks @@ -570,7 +576,7 @@ int __fenix_repair_ranks() fenix.spare_ranks = fenix.spare_ranks - fenix.fail_world_size; if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, spare_ranks: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, + current_rank, fenix.role, fenix.spare_ranks); } } @@ -592,8 +598,6 @@ int __fenix_repair_ranks() MPI_Comm_free(&world_without_failures); - /* As of 8/8/2016 */ - /* Need special treatment for error handling */ ret = __fenix_create_new_world_from(fixed_world); if(ret != MPI_SUCCESS){ repair_success = 0; @@ -602,12 +606,24 @@ int __fenix_repair_ranks() goto END_LOOP; } + if(__fenix_spare_rank_within(fixed_world) == -1){ + ret = MPI_Comm_dup(fenix.new_world, fenix.user_world); + if (ret != MPI_SUCCESS){ + repair_success = 0; + MPIX_Comm_revoke(fixed_world); + MPI_Comm_free(&fixed_world); + goto END_LOOP; + } + fenix.user_world_exists = 1; + } + ret = PMPI_Barrier(fixed_world); /* if (ret != MPI_SUCCESS) { debug_print("MPI_Barrier. repair_ranks\n"); } */ if (ret != MPI_SUCCESS) { repair_success = 0; MPIX_Comm_revoke(fixed_world); MPI_Comm_free(&fixed_world); + goto END_LOOP; } END_LOOP: @@ -618,7 +634,7 @@ int __fenix_repair_ranks() /*******************************************************/ /* - if (__fenix_get_current_rank(*fenix.world) == FENIX_ROOT) { + if (current_rank == FENIX_ROOT) { LDEBUG("Fenix: communicators repaired\n"); } */ @@ -661,10 +677,6 @@ void __fenix_postinit(int *error) // fenix.role); //} - PMPI_Barrier(fenix.new_world); - - PMPI_Comm_dup(fenix.new_world, fenix.user_world); - fenix.user_world_exists = 1; if (fenix.repair_result != 0) { *error = fenix.repair_result; From efe476ca16a9495444e46693644b4174ee8514b4 Mon Sep 17 00:00:00 2001 From: Matthew Date: Tue, 25 Oct 2022 15:44:23 -0400 Subject: [PATCH 08/15] Fix a bug related to inconsistent state during commit_barrier --- include/fenix.h | 4 ++++ src/fenix_data_recovery.c | 40 +++++++++++++++--------------------- src/fenix_process_recovery.c | 14 +++++++++++-- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/include/fenix.h b/include/fenix.h index 4d7ca67..1a283bf 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -104,6 +104,10 @@ extern "C" { #define FENIX_DATA_SNAPSHOT_ALL 16 #define FENIX_DATA_SUBSET_CREATED 2 +#define FENIX_ERRHANDLER_LOC 1 +#define FENIX_DATA_COMMIT_BARRIER_LOC 2 + + #define FENIX_DATA_POLICY_IN_MEMORY_RAID 13 typedef enum { diff --git a/src/fenix_data_recovery.c b/src/fenix_data_recovery.c index da87c30..9ddc8ef 100644 --- a/src/fenix_data_recovery.c +++ b/src/fenix_data_recovery.c @@ -586,39 +586,33 @@ int __fenix_data_commit_barrier(int groupid, int *timestamp) { fenix_group_t *group = (fenix.data_recovery->group[group_index]); - //We want to make sure there aren't any revocations and also do a barrier. - //Start by disabling Fenix error handling so we don't generate any new revokations here. + //We want to make sure there aren't any failed MPI operations (IE unfinished stores) + //But we don't want to fail to commit if a failure has happened since a successful store. int old_failure_handling = fenix.ignore_errs; fenix.ignore_errs = 1; - //We'll use comm_agree as a resilient barrier, which should also give time for - //any revocations to propogate - int tmp_throwaway = 1; - MPIX_Comm_agree(group->comm, &tmp_throwaway); - //Now use iprobe to check for revocations. - MPI_Status status; - int ret = MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, group->comm, - &tmp_throwaway, &status); + int can_commit = 0; - fenix.ignore_errs = old_failure_handling; + //We'll use comm_agree as a resilient barrier + //Our error handler also enters an agree, with a unique location bit set. + //So if we aren't all here, we've hit an error already. + int location = FENIX_DATA_COMMIT_BARRIER_LOC; + int ret = MPIX_Comm_agree(*fenix.user_world, &location); + if(location == FENIX_DATA_COMMIT_BARRIER_LOC) can_commit = 1; + + fenix.ignore_errs = old_failure_handling; - if(ret != MPI_ERR_REVOKED){ + if(can_commit == 1){ retval = group->vtbl.commit(group); } - - //Now that we've (hopefully) commited, we want to handle any errors we've - //learned about w.r.t failures or revocations. No reason to put handling those off. - if(ret != MPI_SUCCESS){ - retval = ret; - //Just re-calling should have Fenix handle things according to whatever method - //has been assigned. - MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, group->comm, - &tmp_throwaway, &status); + if(can_commit != 1 || ret != MPI_SUCCESS) { + //A rank failure has happened, lets trigger error handling if enabled. + int throwaway = 1; + MPI_Allreduce(MPI_IN_PLACE, &throwaway, 1, MPI_INT, MPI_SUM, *fenix.user_world); } - - + if (timestamp != NULL) { *timestamp = group->timestamp; } diff --git a/src/fenix_process_recovery.c b/src/fenix_process_recovery.c index 1bdeac6..77080b0 100644 --- a/src/fenix_process_recovery.c +++ b/src/fenix_process_recovery.c @@ -326,8 +326,19 @@ int __fenix_repair_ranks() /* current_rank means the global MPI rank before failure */ current_rank = __fenix_get_current_rank(*fenix.world); world_size = __fenix_get_world_size(*fenix.world); + + //Double check that every process is here, not in some local error handling elsewhere. + //Assume that other locations will converge here. + if(__fenix_spare_rank() != 1){ + int location = FENIX_ERRHANDLER_LOC; + do { + location = FENIX_ERRHANDLER_LOC; + MPIX_Comm_agree(*fenix.user_world, &location); + } while(location != FENIX_ERRHANDLER_LOC); + } while (!repair_success) { + repair_success = 1; ret = MPIX_Comm_shrink(*fenix.world, &world_without_failures); //if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_shrink. repair_ranks\n"); } @@ -772,13 +783,12 @@ void __fenix_finalize_spare() fenix.fenix_init_flag = 0; /* Future version do not close MPI. Jump to where Fenix_Finalize is called. */ - //MPI_Finalize(); + MPI_Finalize(); exit(0); } void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) { - int ret_repair; int index; int ret = *pret; From d7472e28085287495c8a9a8b2d1fc9b8f1dd790b Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 14 Feb 2023 09:27:28 -0500 Subject: [PATCH 09/15] Implement Github actions for testing --- .github/Dockerfile | 29 ++++++ .github/docker-compose.yml | 81 +++++++++++++++++ .github/workflows/ci_checks.yaml | 34 ++++++++ CMakeLists.txt | 92 ++++---------------- examples/01_hello_world/fenix/CMakeLists.txt | 5 +- examples/02_send_recv/fenix/CMakeLists.txt | 5 +- examples/05_subset_create/CMakeLists.txt | 5 +- examples/06_subset_createv/CMakeLists.txt | 5 +- examples/CMakeLists.txt | 6 ++ src/CMakeLists.txt | 28 +----- test/CMakeLists.txt | 6 ++ test/failed_spares/CMakeLists.txt | 6 +- test/issend/CMakeLists.txt | 5 +- test/no_jump/CMakeLists.txt | 5 +- test/request_cancelled/CMakeLists.txt | 5 +- test/request_tracking/CMakeLists.txt | 5 +- test/subset_internal/CMakeLists.txt | 1 - test/subset_merging/CMakeLists.txt | 1 - 18 files changed, 190 insertions(+), 134 deletions(-) create mode 100644 .github/Dockerfile create mode 100644 .github/docker-compose.yml create mode 100644 .github/workflows/ci_checks.yaml create mode 100644 examples/CMakeLists.txt create mode 100644 test/CMakeLists.txt diff --git a/.github/Dockerfile b/.github/Dockerfile new file mode 100644 index 0000000..e3205fb --- /dev/null +++ b/.github/Dockerfile @@ -0,0 +1,29 @@ +#Built for testing, not designed for application use. + +FROM ubuntu:20.04 +#="open-mpi/ompi" for github.com/open-mpi/ompi +ARG OPENMPI_REPO="open-mpi/ompi" +#="tags" or ="heads", for tag or branch name +ARG OPENMPI_VERS_PREFIX="tags" +#="v5.0.0rc10" or ="v5.0.x", ie tag name or branch name. +ARG OPENMPI_VERS="v5.0.0rc10" +run echo Using https://github.com/${OPENMPI_REPO}/git/refs/${OPENMPI_VERS_PREFIX}/${OPENMPI_VERS} + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential python3 m4 autoconf automake libtool flex git zlib1g-dev + +#Add files listing latest commit for this branch/tag, which invalidates the clone +#when a change has been pushed. +ADD https://api.github.com/repos/${OPENMPI_REPO}/git/refs/${OPENMPI_VERS_PREFIX}/${OPENMPI_VERS} commit_info +RUN git clone --recursive --branch ${OPENMPI_VERS} --depth 1 https://github.com/${OPENMPI_REPO}.git ompi_src && \ + mkdir ompi_build ompi_install && cd ompi_src && export AUTOMAKE_JOBS=8 && ./autogen.pl && cd ../ompi_build && ../ompi_src/configure --prefix=/ompi_install --disable-man-pages --with-ft=ulfm && make install -j8 && cd .. + + +#New build stage, tosses out src/build trees from openmpi +FROM ubuntu:20.04 +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential cmake ssh zlib1g-dev +COPY . ./fenix_src +COPY --from=0 ompi_install/ /ompi_install/ +ENV PATH="$PATH:/ompi_install/bin" +RUN mkdir fenix_build fenix_install && cd fenix_build && cmake ../fenix_src -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/ompi_install/bin/mpicc \ + -DBUILD_EXAMPLES=ON -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX=../fenix_install -DMPIEXEC_PREFLAGS="--allow-run-as-root;--map-by;:OVERSUBSCRIBE" && make install -j8 +CMD ["sh", "-c", "cd fenix_build && ctest --verbose"] diff --git a/.github/docker-compose.yml b/.github/docker-compose.yml new file mode 100644 index 0000000..46088b5 --- /dev/null +++ b/.github/docker-compose.yml @@ -0,0 +1,81 @@ +version: "3.9" + +x-fenix: &fenix + build: &fenix-build + context: ./ + dockerfile: .github/Dockerfile + args: + OPENMPI_REPO: open-mpi/ompi + OPENMPI_VERS_PREFIX: tags + OPENMPI_VERS: v5.0.0rc10 + #Caches should be manually scoped, or they'll conflict. + x-bake: + cache-from: + - type=gha,scope=default + cache-to: + - type=gha,scope=default,mode=max + +services: + fenix_ompi_5rc10: + <<: *fenix + image: "fenix:ompi_5rc10" + build: + <<: *fenix-build + x-bake: + cache-from: + - type=gha,scope=ompi_5rc10 + cache-to: + - type=gha,scope=ompi_5rc10,mode=max + + fenix_ompi_5: + <<: *fenix + image: "fenix:ompi_5" + build: + <<: *fenix-build + args: + - OPENMPI_VERS_PREFIX=heads + - OPENMPI_VERS=v5.0.x + x-bake: + cache-from: + - type=gha,scope=ompi_5 + cache-to: + - type=gha,scope=ompi_5,mode=max + + fenix_ompi_main: + <<: *fenix + image: "fenix:ompi_main" + build: + <<: *fenix-build + args: + - OPENMPI_VERS_PREFIX=heads + - OPENMPI_VERS=main + x-bake: + cache-from: + - type=gha,scope=ompi_main + cache-to: + - type=gha,scope=ompi_main,mode=max + + fenix_icldisco_latest: + <<: *fenix + image: "fenix:icldisco_latest" + build: + <<: *fenix-build + args: + - OPENMPI_REPO=icldisco/ompi + - OPENMPI_VERS_PREFIX=heads + - OPENMPI_VERS=ulfm/latest + x-bake: + cache-from: + - type=gha,scope=icldisco_latest + cache-to: + - type=gha,scope=icldisco_latest,mode=max + + #fenix_icldisco_experimental: + # <<: *fenix + # image: fenix/icldisco + # build: + # <<: *fenix-build + # args: + # - OPENMPI_REPO=icldisco/ompi + # - OPENMPI_VERS_PREFIX=heads + # - OPENMPI_VERS=ulfm/experimental diff --git a/.github/workflows/ci_checks.yaml b/.github/workflows/ci_checks.yaml new file mode 100644 index 0000000..e4671a4 --- /dev/null +++ b/.github/workflows/ci_checks.yaml @@ -0,0 +1,34 @@ +name: Build & Test + +on: + push: + pull_request_target: + types: + - opened + - synchronized + - edited + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: docker/setup-buildx-action@v2 + - name: Build + uses: docker/bake-action@master + with: + files: | + .github/docker-compose.yml + load: true + - name: Test open-mpi v5.0.0rc10 + if: success() || failure() + run: docker run fenix:ompi_5rc10 + - name: Test open-mpi v5.0.x + if: success() || failure() + run: docker run fenix:ompi_5 + - name: Test open-mpi main + if: success() || failure() + run: docker run fenix:ompi_main + - name: Test icldisco latest + if: success() || failure() + run: docker run fenix:icldisco_latest diff --git a/CMakeLists.txt b/CMakeLists.txt index b866e11..9b0b1a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ # directory. # -cmake_minimum_required(VERSION 3.0.2) +cmake_minimum_required(VERSION 3.10.2) project(Fenix C) # The version number. @@ -16,56 +16,33 @@ set(FENIX_VERSION_MAJOR 1) set(FENIX_VERSION_MINOR 0) option(BUILD_EXAMPLES "Builds example programs from the examples directory" OFF) -option(BUILD_TESTING "Builds tests and test modes of files" ON) +option(BUILD_TESTING "Builds tests and test modes of files" OFF) +#Solves an issue with some system environments putting their MPI headers before +#the headers CMake includes. +option(CRAYPE_INC_FIX "Adds detected MPI headers directly to this project" ON) -# Set empty string for shared linking (we use static library only at this moment) -set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS) +find_package(MPI REQUIRED) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) - -#set(CMAKE_BUILD_TYPE Release) -set(CMAKE_BUILD_TYPE Debug) -#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -O0 -ggdb") +add_subdirectory(src) -#ENABLE_TESTING -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) -#include(testref/TestAgainstReference) -configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/include/fenix-config.h.in - ${CMAKE_CURRENT_BINARY_DIR}/include/fenix-config.h @ONLY -) +include(CTest) +list(APPEND MPIEXEC_PREFLAGS "--with-ft;mpi") +if(BUILD_EXAMPLES) + add_subdirectory(examples) +endif() -#Check for MPICC definition, if not try to find MPI -if(NOT "a$ENV{MPICC}" STREQUAL "a") - #set(CMAKE_C_COMPILER ${MPI_C_COMPILER} CACHE STRING "The compiler CMake should use - often set to mpicc" FORCE) - set(MPI_C_COMPILER $ENV{MPICC}) - set(CMAKE_C_COMPILER ${MPI_C_COMPILER}) - - message("[fenix] MPICC has been passed: $ENV{MPICC}") -else() - message("[fenix] MPICC was not passed, searching for MPI") - find_package(MPI REQUIRED) - if(${MPI_C_FOUND}) - message("[fenix] Found MPICC: ${MPI_C_COMPILER}") - else() - message( FATAL_ERROR "[fenix] MPI not found :( Aborting!") - endif() +if(BUILD_TESTING) + add_subdirectory(test) endif() -#Helper function for linking with MPI only if needed -function(linkMPI TOLINK) - #We only want to try to find MPI outrselves if it wasn't provided in MPICC by user - if("a$ENV{MPICC}" STREQUAL "a") - #find_package(MPI REQUIRED) - target_link_libraries(${TOLINK} MPI::MPI_C) - endif() -endfunction(linkMPI) -add_subdirectory(src) +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/include/fenix-config.h.in + ${CMAKE_CURRENT_BINARY_DIR}/include/fenix-config.h @ONLY +) include(CMakePackageConfigHelpers) configure_package_config_file(fenixConfig.cmake.in @@ -80,36 +57,3 @@ install( ${CMAKE_CURRENT_BINARY_DIR}/fenixConfigVersion.cmake DESTINATION cmake ) - - -include(CTest) - -if(BUILD_EXAMPLES) - add_subdirectory(examples/01_hello_world/fenix) - add_subdirectory(examples/01_hello_world/mpi) - add_subdirectory(examples/02_send_recv/fenix) - add_subdirectory(examples/02_send_recv/mpi) - add_subdirectory(examples/03_reduce/fenix) - #add_subdirectory(examples/03_reduce/mpi) - add_subdirectory(examples/04_Isend_Irecv/fenix) - add_subdirectory(examples/04_Isend_Irecv/mpi) - add_subdirectory(examples/05_subset_create) - add_subdirectory(examples/06_subset_createv) - -elseif(BUILD_TESTING) - #Some examples are useful tests as well. - add_subdirectory(examples/01_hello_world/fenix) - add_subdirectory(examples/02_send_recv/fenix) - add_subdirectory(examples/03_reduce/fenix) - add_subdirectory(examples/05_subset_create) - add_subdirectory(examples/06_subset_createv) -endif() - -if(BUILD_TESTING) - add_subdirectory(test/subset_internal) - add_subdirectory(test/subset_merging) - add_subdirectory(test/request_tracking) - add_subdirectory(test/request_cancelled) - add_subdirectory(test/no_jump) - add_subdirectory(test/issend) -endif() diff --git a/examples/01_hello_world/fenix/CMakeLists.txt b/examples/01_hello_world/fenix/CMakeLists.txt index 2dad662..6a344f4 100644 --- a/examples/01_hello_world/fenix/CMakeLists.txt +++ b/examples/01_hello_world/fenix/CMakeLists.txt @@ -12,9 +12,6 @@ add_executable(fenix_hello_world fenix_hello_world.c) target_link_libraries(fenix_hello_world fenix ${MPI_C_LIBRARIES}) if(BUILD_TESTING) - #set(CMAKE_BUILD_TYPE Debug) - add_executable(fenix_hello_world-debug fenix_hello_world.c) - target_link_libraries(fenix_hello_world-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME hello_world - COMMAND mpirun --with-ft mpi -n 3 fenix_hello_world-debug "1") + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS} fenix_hello_world ${MPIEXEC_POSTFLAGS} "1") endif() diff --git a/examples/02_send_recv/fenix/CMakeLists.txt b/examples/02_send_recv/fenix/CMakeLists.txt index aa5dc65..bf40679 100644 --- a/examples/02_send_recv/fenix/CMakeLists.txt +++ b/examples/02_send_recv/fenix/CMakeLists.txt @@ -12,11 +12,8 @@ add_executable(fenix_ring fenix_ring.c) target_link_libraries(fenix_ring fenix ${MPI_C_LIBRARIES} m ) if(BUILD_TESTING) - set(CMAKE_BUILD_TYPE Debug) - add_executable(fenix_ring-debug fenix_ring.c) - target_link_libraries(fenix_ring-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME ring - COMMAND mpirun --with-ft mpi -np 5 fenix_ring-debug 1 2) + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_ring ${MPIEXEC_POSTFLAGS} 1 2) set_tests_properties(ring PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/examples/05_subset_create/CMakeLists.txt b/examples/05_subset_create/CMakeLists.txt index bf2da45..7f1efcd 100644 --- a/examples/05_subset_create/CMakeLists.txt +++ b/examples/05_subset_create/CMakeLists.txt @@ -12,11 +12,8 @@ add_executable(subset_create subset_create.c) target_link_libraries(subset_create fenix ${MPI_C_LIBRARIES}) if(BUILD_TESTING) - set(CMAKE_BUILD_TYPE Debug) - add_executable(fenix_subset_create-debug subset_create.c) - target_link_libraries(fenix_subset_create-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME subset_create - COMMAND mpirun --with-ft mpi -np 5 fenix_subset_create-debug 1) + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} subset_create ${MPIEXEC_POSTFLAGS} 1) set_tests_properties(subset_create PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/examples/06_subset_createv/CMakeLists.txt b/examples/06_subset_createv/CMakeLists.txt index 3a935a7..c242648 100644 --- a/examples/06_subset_createv/CMakeLists.txt +++ b/examples/06_subset_createv/CMakeLists.txt @@ -12,11 +12,8 @@ add_executable(subset_createv subset_createv.c) target_link_libraries(subset_createv fenix ${MPI_C_LIBRARIES}) if(BUILD_TESTING) - set(CMAKE_BUILD_TYPE Debug) - add_executable(fenix_subset_createv-debug subset_createv.c) - target_link_libraries(fenix_subset_createv-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME subset_createv - COMMAND mpirun --with-ft mpi -np 5 fenix_subset_createv-debug 1) + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} subset_createv ${MPIEXEC_POSTFLAGS} 1) set_tests_properties(subset_createv PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..b1f7321 --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,6 @@ +add_subdirectory(01_hello_world/fenix) +add_subdirectory(02_send_recv/fenix) +add_subdirectory(03_reduce/fenix) +add_subdirectory(04_Isend_Irecv/fenix) +add_subdirectory(05_subset_create) +add_subdirectory(06_subset_createv) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7d413a1..096b76a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,12 +11,6 @@ configure_file (${CMAKE_SOURCE_DIR}/include/fenix-config.h.in "${CMAKE_CURRENT_BINARY_DIR}/fenix-config.h" @ONLY) -#configure_file(${CMAKE_SOURCE_DIR}/include/fenix.h -# "${CMAKE_BINARY_DIR}/include/fenix.h" COPYONLY) - -#configure_file(${CMAKE_SOURCE_DIR}/include/fenix_process_recovery.h -# "${CMAKE_BINARY_DIR}/include/fenix_process_recovery.h" COPYONLY) - #include_directories(${CMAKE_CURRENT_BINARY_DIR}) FILE(GLOB Fenix_HEADERS ${CMAKE_SOURCE_DIR}/include/*.h) @@ -39,25 +33,7 @@ globals.c add_library( fenix STATIC ${Fenix_SOURCES}) -#if("a$ENV{MPICC}" STREQUAL "a") -# message("[fenix] MPICC (MPI compiler) environment variable is not defined. Trying to find MPI compiler...") -# find_package(MPI REQUIRED) -# target_link_libraries(fenix MPI::MPI_C) -#else() -# message("[fenix] MPICC has been passed: $ENV{MPICC}") -# set(MPI_C_COMPILER $ENV{MPICC}) -# SET(CMAKE_C_COMPILER ${MPI_C_COMPILER}) -#endif() - -linkMPI(fenix) - -target_link_libraries(fenix ${MPI_C_LIBRARIES}) -if(MPI_COMPILE_FLAGS) - set_target_properties(fenix PROPERTIES COMPILE_FLAGS "${MPI_COMPILE_FLAGS}") -endif() -if(MPI_LINK_FLAGS) - set_target_properties(fenix PROPERTIES LINK_FLAGS "${MPI_LINK_FLAGS}") -endif() +target_link_libraries(fenix MPI::MPI_C) target_include_directories(fenix PUBLIC @@ -76,5 +52,3 @@ install(EXPORT fenix FILE fenixTargets.cmake DESTINATION cmake) install(FILES ${Fenix_HEADERS} DESTINATION include) - -#target_link_libraries( mpi ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..9ee9fbe --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,6 @@ +add_subdirectory(subset_internal) +add_subdirectory(subset_merging) +add_subdirectory(request_tracking) +add_subdirectory(request_cancelled) +add_subdirectory(no_jump) +add_subdirectory(issend) diff --git a/test/failed_spares/CMakeLists.txt b/test/failed_spares/CMakeLists.txt index 96827f3..8fd95b3 100644 --- a/test/failed_spares/CMakeLists.txt +++ b/test/failed_spares/CMakeLists.txt @@ -8,8 +8,8 @@ # directory. # -#set(CMAKE_BUILD_TYPE Debug) add_executable(fenix_failed_spares fenix_failed_spares.c) -target_link_libraries(fenix_failed_spares fenix ${MPI_C_LIBRARIES}) +target_link_libraries(fenix_failed_spares fenix MPI::MPI_C) + add_test(NAME failed_spares - COMMAND mpirun --with-ft mpi -n 6 fenix_failed_spares 3 1 3 4 ) + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 6 ${MPIEXEC_PREFLAGS} fenix_failed_spares ${MPIEXEC_POSTFLAGS} 3 1 3 4 ) diff --git a/test/issend/CMakeLists.txt b/test/issend/CMakeLists.txt index c4f6918..f141d40 100644 --- a/test/issend/CMakeLists.txt +++ b/test/issend/CMakeLists.txt @@ -8,8 +8,7 @@ # directory. # -set(CMAKE_BUILD_TYPE Debug) add_executable(fenix_issend_test fenix_issend_test.c) -target_link_libraries(fenix_issend_test fenix ${MPI_C_LIBRARIES}) +target_link_libraries(fenix_issend_test fenix MPI::MPI_C) -add_test(NAME issend COMMAND mpirun --with-ft mpi -np 5 fenix_issend_test "1") +add_test(NAME issend COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_issend_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/no_jump/CMakeLists.txt b/test/no_jump/CMakeLists.txt index b3258dd..dfc9311 100644 --- a/test/no_jump/CMakeLists.txt +++ b/test/no_jump/CMakeLists.txt @@ -8,8 +8,7 @@ # directory. # -set(CMAKE_BUILD_TYPE Debug) add_executable(fenix_no_jump_test fenix_no_jump_test.c) -target_link_libraries(fenix_no_jump_test fenix ${MPI_C_LIBRARIES}) +target_link_libraries(fenix_no_jump_test fenix MPI::MPI_C) -add_test(NAME no_jump COMMAND mpirun --with-ft mpi -np 5 fenix_no_jump_test "1") +add_test(NAME no_jump COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_no_jump_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/request_cancelled/CMakeLists.txt b/test/request_cancelled/CMakeLists.txt index a59af59..97dd331 100644 --- a/test/request_cancelled/CMakeLists.txt +++ b/test/request_cancelled/CMakeLists.txt @@ -8,8 +8,7 @@ # directory. # -set(CMAKE_BUILD_TYPE Debug) add_executable(fenix_request_cancelled_test fenix_req_cancelled_test.c) -target_link_libraries(fenix_request_cancelled_test fenix ${MPI_C_LIBRARIES}) +target_link_libraries(fenix_request_cancelled_test fenix MPI::MPI_C) -add_test(NAME request_cancelled COMMAND mpirun --with-ft mpi -np 5 fenix_request_cancelled_test "1") +add_test(NAME request_cancelled COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_request_cancelled_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/request_tracking/CMakeLists.txt b/test/request_tracking/CMakeLists.txt index c8269b2..8d008ed 100644 --- a/test/request_tracking/CMakeLists.txt +++ b/test/request_tracking/CMakeLists.txt @@ -8,9 +8,8 @@ # directory. # -set (CMAKE_BUILD_TYPE Debug) add_executable(fenix_request_tracking_test fenix_request_tracking_test.c) -target_link_libraries(fenix_request_tracking_test fenix ${MPI_C_LIBRARIES}) +target_link_libraries(fenix_request_tracking_test fenix MPI::MPI_C) add_test(NAME request_tracking - COMMAND mpirun -np 3 fenix_request_tracking_test) + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS} fenix_request_tracking_test ${MPIEXEC_POSTFLAGS}) diff --git a/test/subset_internal/CMakeLists.txt b/test/subset_internal/CMakeLists.txt index 24b6190..4dcfc28 100644 --- a/test/subset_internal/CMakeLists.txt +++ b/test/subset_internal/CMakeLists.txt @@ -7,7 +7,6 @@ # For more information, see the LICENSE file in the top Fenix # directory. # -set (CMAKE_BUILD_TYPE Debug) add_executable(fenix_subset_internal_test fenix_subset_internal_test.c) target_link_libraries(fenix_subset_internal_test fenix) diff --git a/test/subset_merging/CMakeLists.txt b/test/subset_merging/CMakeLists.txt index c6d5e46..603686e 100644 --- a/test/subset_merging/CMakeLists.txt +++ b/test/subset_merging/CMakeLists.txt @@ -8,7 +8,6 @@ # directory. # -set(CMAKE_BUILD_TYPE Debug) add_executable(fenix_subset_merging_test fenix_subset_merging_test.c) target_link_libraries(fenix_subset_merging_test fenix) From 43452ed4586f1f0e99d38daae104f81d6015db92 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Sun, 19 Feb 2023 23:38:21 -0500 Subject: [PATCH 10/15] Test Bugfixes + don't try to pass MPI_Datatype between ranks MPI_Datatype is vendor-dependent and we aren't allowed to assume anything about it. Right now, ompi implements as a pointer and we segfault on recovery sometimes. Fix unran test, add timeout parameter Remove ompi version expected to fail from tests --- .github/Dockerfile | 2 +- .github/docker-compose.yml | 20 ++++++++++---------- .github/workflows/ci_checks.yaml | 3 --- .gitignore | 1 + include/fenix_data_member.h | 4 +--- include/fenix_data_recovery.h | 3 +-- src/fenix.c | 2 +- src/fenix_data_member.c | 9 ++------- src/fenix_data_policy_in_memory_raid.c | 12 +++++++----- src/fenix_data_recovery.c | 7 ++----- test/CMakeLists.txt | 1 + 11 files changed, 27 insertions(+), 37 deletions(-) diff --git a/.github/Dockerfile b/.github/Dockerfile index e3205fb..dd6f49a 100644 --- a/.github/Dockerfile +++ b/.github/Dockerfile @@ -26,4 +26,4 @@ COPY --from=0 ompi_install/ /ompi_install/ ENV PATH="$PATH:/ompi_install/bin" RUN mkdir fenix_build fenix_install && cd fenix_build && cmake ../fenix_src -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/ompi_install/bin/mpicc \ -DBUILD_EXAMPLES=ON -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX=../fenix_install -DMPIEXEC_PREFLAGS="--allow-run-as-root;--map-by;:OVERSUBSCRIBE" && make install -j8 -CMD ["sh", "-c", "cd fenix_build && ctest --verbose"] +CMD ["sh", "-c", "cd fenix_build && ctest --verbose --timeout 60"] diff --git a/.github/docker-compose.yml b/.github/docker-compose.yml index 46088b5..b29e083 100644 --- a/.github/docker-compose.yml +++ b/.github/docker-compose.yml @@ -16,16 +16,16 @@ x-fenix: &fenix - type=gha,scope=default,mode=max services: - fenix_ompi_5rc10: - <<: *fenix - image: "fenix:ompi_5rc10" - build: - <<: *fenix-build - x-bake: - cache-from: - - type=gha,scope=ompi_5rc10 - cache-to: - - type=gha,scope=ompi_5rc10,mode=max + #fenix_ompi_5rc10: + # <<: *fenix + # image: "fenix:ompi_5rc10" + # build: + # <<: *fenix-build + # x-bake: + # cache-from: + # - type=gha,scope=ompi_5rc10 + # cache-to: + # - type=gha,scope=ompi_5rc10,mode=max fenix_ompi_5: <<: *fenix diff --git a/.github/workflows/ci_checks.yaml b/.github/workflows/ci_checks.yaml index e4671a4..ebeeef8 100644 --- a/.github/workflows/ci_checks.yaml +++ b/.github/workflows/ci_checks.yaml @@ -20,9 +20,6 @@ jobs: files: | .github/docker-compose.yml load: true - - name: Test open-mpi v5.0.0rc10 - if: success() || failure() - run: docker run fenix:ompi_5rc10 - name: Test open-mpi v5.0.x if: success() || failure() run: docker run fenix:ompi_5 diff --git a/.gitignore b/.gitignore index 20f1a05..83fc3ce 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ examples/05_subset_create/subset_create examples/06_subset_createv/subset_createv test/request_tracking/fenix_request_tracking_test test/request_tracking/fenix_request_tracking_test_nofenix +build/ # Other *~ diff --git a/include/fenix_data_member.h b/include/fenix_data_member.h index b37c652..391142b 100644 --- a/include/fenix_data_member.h +++ b/include/fenix_data_member.h @@ -67,7 +67,6 @@ typedef struct __fenix_member_entry { int memberid; enum states state; void *user_data; - MPI_Datatype current_datatype; int datatype_size; int current_count; } fenix_member_entry_t; @@ -80,7 +79,6 @@ typedef struct __fenix_member { typedef struct __member_entry_packet { int memberid; - MPI_Datatype current_datatype; int datatype_size; int current_count; } fenix_member_entry_packet_t; @@ -92,7 +90,7 @@ void __fenix_ensure_member_capacity( fenix_member_t *m ); void __fenix_ensure_version_capacity_from_member( fenix_member_t *m ); fenix_member_entry_t* __fenix_data_member_add_entry(fenix_member_t* member, - int memberid, void* data, int count, MPI_Datatype datatype); + int memberid, void* data, int count, int datatype_size); int __fenix_data_member_send_metadata(int groupid, int memberid, int dest_rank); int __fenix_data_member_recv_metadata(int groupid, int src_rank, diff --git a/include/fenix_data_recovery.h b/include/fenix_data_recovery.h index 856dbe5..4580cb9 100644 --- a/include/fenix_data_recovery.h +++ b/include/fenix_data_recovery.h @@ -101,7 +101,6 @@ typedef struct __data_entry_packet { - MPI_Datatype datatype; int count; int datatype_size; } fenix_data_entry_packet_t; @@ -109,7 +108,7 @@ typedef struct __data_entry_packet { int __fenix_group_create(int, MPI_Comm, int, int, int, void*, int*); int __fenix_group_get_redundancy_policy(int, int*, int*, int*); -int __fenix_member_create(int, int, void *, int, MPI_Datatype); +int __fenix_member_create(int, int, void *, int, int); int __fenix_data_wait(Fenix_Request); int __fenix_data_test(Fenix_Request, int *); int __fenix_member_store(int, int, Fenix_Data_subset); diff --git a/src/fenix.c b/src/fenix.c index 93f29f9..6be875f 100644 --- a/src/fenix.c +++ b/src/fenix.c @@ -83,7 +83,7 @@ int Fenix_Data_group_create( int group_id, MPI_Comm comm, int start_time_stamp, } int Fenix_Data_member_create( int group_id, int member_id, void *buffer, int count, MPI_Datatype datatype ) { - return __fenix_member_create(group_id, member_id, buffer, count, datatype); + return __fenix_member_create(group_id, member_id, buffer, count, __fenix_get_size(datatype)); } int Fenix_Data_group_get_redundancy_policy( int group_id, int* policy_name, void *policy_value, int *flag ) { diff --git a/src/fenix_data_member.c b/src/fenix_data_member.c index a780276..3d9d60d 100644 --- a/src/fenix_data_member.c +++ b/src/fenix_data_member.c @@ -141,7 +141,7 @@ int __fenix_find_next_member_position(fenix_member_t *member) { } fenix_member_entry_t* __fenix_data_member_add_entry(fenix_member_t* member, - int memberid, void* data, int count, MPI_Datatype datatype){ + int memberid, void* data, int count, int datatype_size){ int member_index = __fenix_find_next_member_position(member); fenix_member_entry_t* mentry = member->member_entry + member_index; @@ -150,11 +150,7 @@ fenix_member_entry_t* __fenix_data_member_add_entry(fenix_member_t* member, mentry->state = OCCUPIED; mentry->user_data = data; mentry->current_count = count; - mentry->current_datatype = datatype; - - int dsize; - MPI_Type_size(datatype, &dsize); - mentry->datatype_size = dsize; + mentry->datatype_size = datatype_size; member->count++; @@ -222,7 +218,6 @@ int __fenix_data_member_send_metadata(int groupid, int memberid, int dest_rank){ fenix_member_entry_packet_t packet; packet.memberid = mentry.memberid; - packet.current_datatype = mentry.current_datatype; packet.datatype_size = mentry.datatype_size; packet.current_count = mentry.current_count; diff --git a/src/fenix_data_policy_in_memory_raid.c b/src/fenix_data_policy_in_memory_raid.c index 40b265d..19341e2 100644 --- a/src/fenix_data_policy_in_memory_raid.c +++ b/src/fenix_data_policy_in_memory_raid.c @@ -703,8 +703,11 @@ int __imr_member_restore(fenix_group_t* g, int member_id, //find_mentry returns the error status. We found the member (and corresponding data) if there are no errors. int found_member = !(__imr_find_mentry(group, member_id, &mentry)); - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - fenix_member_entry_t member_data = group->base.member->member_entry[member_data_index]; + fenix_member_entry_t member_data; + if(found_member){ + int member_data_index = __fenix_search_memberid(group->base.member, member_id); + member_data = group->base.member->member_entry[member_data_index]; + } int recovery_locally_possible; @@ -783,12 +786,11 @@ int __imr_member_restore(fenix_group_t* g, int member_id, //We remake the new member just like the user would. __fenix_member_create(group->base.groupid, packet.memberid, NULL, packet.current_count, - packet.current_datatype); + packet.datatype_size); __imr_find_mentry(group, member_id, &mentry); int member_data_index = __fenix_search_memberid(group->base.member, member_id); member_data = group->base.member->member_entry[member_data_index]; - MPI_Recv((void*)&(group->num_snapshots), 1, MPI_INT, group->partners[1], RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm, NULL); @@ -886,7 +888,7 @@ int __imr_member_restore(fenix_group_t* g, int member_id, //We remake the new member just like the user would. __fenix_member_create(group->base.groupid, packet.memberid, NULL, packet.current_count, - packet.current_datatype); + packet.datatype_size); __imr_find_mentry(group, member_id, &mentry); int member_data_index = __fenix_search_memberid(group->base.member, member_id); diff --git a/src/fenix_data_recovery.c b/src/fenix_data_recovery.c index 9ddc8ef..6c74f35 100644 --- a/src/fenix_data_recovery.c +++ b/src/fenix_data_recovery.c @@ -190,8 +190,7 @@ int __fenix_group_get_redundancy_policy(int groupid, int* policy_name, int* poli * @param count * @param data_type */ -int __fenix_member_create(int groupid, int memberid, void *data, int count, MPI_Datatype datatype ) { - +int __fenix_member_create(int groupid, int memberid, void *data, int count, int datatype_size ) { int retval = -1; int group_index = __fenix_search_groupid( groupid, fenix.data_recovery ); int member_index = -1; @@ -219,9 +218,8 @@ int __fenix_member_create(int groupid, int memberid, void *data, int count, MPI_ //First, we'll make a fenix-core member entry, then pass that info to //the specific data policy. - int member_index = __fenix_find_next_member_position(member); fenix_member_entry_t* mentry; - mentry = __fenix_data_member_add_entry(member, memberid, data, count, datatype); + mentry = __fenix_data_member_add_entry(member, memberid, data, count, datatype_size); //Pass the info along to the policy retval = group->vtbl.member_create(group, mentry); @@ -924,7 +922,6 @@ int __fenix_member_set_attribute(int groupid, int memberid, int attributename, retval = FENIX_ERROR_INVALID_ATTRIBUTE_NAME; } - mentry->current_datatype = *((MPI_Datatype *)(attributevalue)); mentry->datatype_size = my_datatype_size; retval = FENIX_SUCCESS; break; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9ee9fbe..c4f2e92 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -4,3 +4,4 @@ add_subdirectory(request_tracking) add_subdirectory(request_cancelled) add_subdirectory(no_jump) add_subdirectory(issend) +add_subdirectory(failed_spares) From 347aaa0ed566c920720de737554e9352cfe746ba Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 21 Mar 2023 09:03:58 -0600 Subject: [PATCH 11/15] Implement MPI system include fix --- CMakeLists.txt | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b0b1a8..a49b781 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,18 +15,45 @@ project(Fenix C) set(FENIX_VERSION_MAJOR 1) set(FENIX_VERSION_MINOR 0) -option(BUILD_EXAMPLES "Builds example programs from the examples directory" OFF) -option(BUILD_TESTING "Builds tests and test modes of files" OFF) +option(BUILD_EXAMPLES "Builds example programs from the examples directory" OFF) +option(BUILD_TESTING "Builds tests and test modes of files" OFF) #Solves an issue with some system environments putting their MPI headers before -#the headers CMake includes. -option(CRAYPE_INC_FIX "Adds detected MPI headers directly to this project" ON) +#the headers CMake includes. Forces non-system MPI headers when incorrect headers +#detected in include path. +option(SYSTEM_INC_FIX "Attempts to force overriding any system MPI headers" ON) + find_package(MPI REQUIRED) -add_subdirectory(src) +if(${SYSTEM_INC_FIX}) + include(CheckIncludeFile) + set(CMAKE_REQUIRED_QUIET ON) + check_include_file("mpi.h" MPI_HEADER_CRASH) + set(CMAKE_REQUIRED_QUIET OFF) + + if(${MPI_HEADER_CRASH}) + message(WARNING "Detected system MPI headers, attempting to force use of ${MPI_C_INCLUDE_DIRS}. Set SYSTEM_INC_FIX=OFF to stop this behavior.") + + if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.25") + set_target_properties(MPI::MPI_C PROPERTIES SYSTEM "FALSE") + else() + foreach(MPI_DIR ${MPI_C_INCLUDE_DIRS}) + if(NOT ("${CMAKE_C_FLAGS}" MATCHES ".* -I${MPI_DIR}.*")) + message(STATUS ${MPI_DIR}) + set(CMAKE_C_FLAGS " -I${MPI_DIR} ${CMAKE_C_FLAGS}" CACHE INTERNAL "Flags to append to build commands for C.") + endif() + endforeach() + set_target_properties(MPI::MPI_C PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "") + endif() + endif() + +endif() + +add_subdirectory(src) + include(CTest) list(APPEND MPIEXEC_PREFLAGS "--with-ft;mpi") @@ -39,6 +66,7 @@ if(BUILD_TESTING) endif() + configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/include/fenix-config.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/fenix-config.h @ONLY From d6e33e303e1d3df971a8f650a0ed3f14ba3de820 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Mon, 17 Apr 2023 12:16:53 -0700 Subject: [PATCH 12/15] Improved system include fixes, removed reference to (in this branch) unimplemented feature, remove travis, cmake variable naming conventions --- .github/Dockerfile | 2 +- .gitignore | 1 + .travis.yml | 62 -------------------- CMakeLists.txt | 47 ++++++++++++--- examples/01_hello_world/fenix/CMakeLists.txt | 2 +- examples/02_send_recv/fenix/CMakeLists.txt | 2 +- examples/05_subset_create/CMakeLists.txt | 2 +- examples/06_subset_createv/CMakeLists.txt | 2 +- 8 files changed, 46 insertions(+), 74 deletions(-) delete mode 100644 .travis.yml diff --git a/.github/Dockerfile b/.github/Dockerfile index dd6f49a..75e9f40 100644 --- a/.github/Dockerfile +++ b/.github/Dockerfile @@ -25,5 +25,5 @@ COPY . ./fenix_src COPY --from=0 ompi_install/ /ompi_install/ ENV PATH="$PATH:/ompi_install/bin" RUN mkdir fenix_build fenix_install && cd fenix_build && cmake ../fenix_src -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/ompi_install/bin/mpicc \ - -DBUILD_EXAMPLES=ON -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX=../fenix_install -DMPIEXEC_PREFLAGS="--allow-run-as-root;--map-by;:OVERSUBSCRIBE" && make install -j8 + -DFENIX_EXAMPLES=ON -DFENIX_TESTS=ON -DCMAKE_INSTALL_PREFIX=../fenix_install -DMPIEXEC_PREFLAGS="--allow-run-as-root;--map-by;:OVERSUBSCRIBE" && make install -j8 CMD ["sh", "-c", "cd fenix_build && ctest --verbose --timeout 60"] diff --git a/.gitignore b/.gitignore index 83fc3ce..3e3dd51 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ examples/06_subset_createv/subset_createv test/request_tracking/fenix_request_tracking_test test/request_tracking/fenix_request_tracking_test_nofenix build/ +install/ # Other *~ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e292727..0000000 --- a/.travis.yml +++ /dev/null @@ -1,62 +0,0 @@ -language: c -addons: - apt: - packages: - - cmake - - autoconf - - automake - - libtool - - valgrind -cache: - directories: - - ulfm-install -before_install: - - echo "Configuring ULFM" - - if [ -f ulfm-install/lib/libmpi.so ]; then - echo "libmpi.so found -- nothing to build."; - cd ulfm-install; - else - ROOT=`pwd`; - mkdir ulfm-install; - echo "Downloading ULFM from repo"; - git clone --recursive https://bitbucket.org/icldistcomp/ulfm2.git ulfm-src/; - echo " - Configuring and building ULFM."; - cd ulfm-src; - echo " - Running autogen.pl"; - ./autogen.pl >../ulfm-install/ulfm_build_output.txt 2>&1; - echo " - Running configure"; - ./configure --prefix=$ROOT/ulfm-install >>../ulfm-install/ulfm_build_output.txt 2>&1; - echo " - Running make"; - make -j4 >>../ulfm-install/ulfm_build_output.txt 2>&1; - echo " - Running make install"; - make install >>../ulfm-install/ulfm_build_output.txt 2>&1; - echo " - Finished installing ULFM"; - cd ../ulfm-install/; - fi - - #Expect that any changes to the above still puts me in the install's home dir - - export MPI_HOME=`pwd` - - export PATH=$MPI_HOME/bin/:$PATH - - export LD_LIBRARY_PATH=$MPI_HOME/lib:$LD_LIBRARY_PATH - - export DYLD_LIBRARY_PATH=$MPI_HOME/lib:$DYLD_LIBRARY_PATH - - export MANPATH=$MPI_HOME/share/man:$MANPATH - - - export MPICC="`which mpicc`" - - export MPICXX="`which mpic++`" - - #Allow oversubscription for tests, since we're potentially single core - - export OMPI_MCA_rmaps_base_oversubscribe=1 - - - tail -n50 ./ulfm_build_output.txt - - cd ../ #End back at root -install: - - mkdir build && cd build - - cmake ../ -DBUILD_TESTING=ON && make -j4 VERBOSE=1 -script: - - make test -after_success: - - echo "Success, printing run logs:" - - cat Testing/Temporary/LastTest.log -after_failure: - - echo "Failure occured, printing run logs:" - - cat Testing/Temporary/LastTest.log diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b0b1a8..0464f44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,30 +15,63 @@ project(Fenix C) set(FENIX_VERSION_MAJOR 1) set(FENIX_VERSION_MINOR 0) -option(BUILD_EXAMPLES "Builds example programs from the examples directory" OFF) -option(BUILD_TESTING "Builds tests and test modes of files" OFF) +option(FENIX_EXAMPLES "Builds example programs from the examples directory" OFF) +option(FENIX_TESTS "Builds tests and test modes of files" OFF) #Solves an issue with some system environments putting their MPI headers before -#the headers CMake includes. -option(CRAYPE_INC_FIX "Adds detected MPI headers directly to this project" ON) +#the headers CMake includes. Forces non-system MPI headers when incorrect headers +#detected in include path. +option(SYSTEM_INC_FIX "Attempts to force overriding any system MPI headers" ON) + find_package(MPI REQUIRED) -add_subdirectory(src) +#If we're using mpicc, we don't need to worry about the includes. +if("${CMAKE_C_COMPILER}" MATCHES ".*/?mpicc") + set(SYSTEM_INC_FIX OFF) +endif() + +if(${SYSTEM_INC_FIX}) + include(CheckIncludeFile) + set(CMAKE_REQUIRED_QUIET ON) + check_include_file("mpi.h" MPI_HEADER_CRASH) + set(CMAKE_REQUIRED_QUIET OFF) + + if(${MPI_HEADER_CRASH}) + message(WARNING "Detected system MPI headers, attempting to force use of ${MPI_C_INCLUDE_DIRS}. Set SYSTEM_INC_FIX=OFF to stop this behavior.") + + if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.25") + set_target_properties(MPI::MPI_C PROPERTIES SYSTEM "FALSE") + else() + foreach(MPI_DIR ${MPI_C_INCLUDE_DIRS}) + if(NOT ("${CMAKE_C_FLAGS}" MATCHES ".* -I${MPI_DIR}.*")) + message(STATUS ${MPI_DIR}) + set(CMAKE_C_FLAGS " -I${MPI_DIR} ${CMAKE_C_FLAGS}" CACHE INTERNAL "Flags to append to build commands for C.") + endif() + endforeach() + set_target_properties(MPI::MPI_C PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "") + endif() + endif() +endif() #SYSTEM_INC_FIX + + + +add_subdirectory(src) include(CTest) list(APPEND MPIEXEC_PREFLAGS "--with-ft;mpi") -if(BUILD_EXAMPLES) +if(FENIX_EXAMPLES) add_subdirectory(examples) endif() -if(BUILD_TESTING) +if(FENIX_TESTS) add_subdirectory(test) endif() + configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/include/fenix-config.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/fenix-config.h @ONLY diff --git a/examples/01_hello_world/fenix/CMakeLists.txt b/examples/01_hello_world/fenix/CMakeLists.txt index 6a344f4..a474f82 100644 --- a/examples/01_hello_world/fenix/CMakeLists.txt +++ b/examples/01_hello_world/fenix/CMakeLists.txt @@ -11,7 +11,7 @@ add_executable(fenix_hello_world fenix_hello_world.c) target_link_libraries(fenix_hello_world fenix ${MPI_C_LIBRARIES}) -if(BUILD_TESTING) +if(FENIX_TESTS) add_test(NAME hello_world COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS} fenix_hello_world ${MPIEXEC_POSTFLAGS} "1") endif() diff --git a/examples/02_send_recv/fenix/CMakeLists.txt b/examples/02_send_recv/fenix/CMakeLists.txt index bf40679..0c81d03 100644 --- a/examples/02_send_recv/fenix/CMakeLists.txt +++ b/examples/02_send_recv/fenix/CMakeLists.txt @@ -11,7 +11,7 @@ add_executable(fenix_ring fenix_ring.c) target_link_libraries(fenix_ring fenix ${MPI_C_LIBRARIES} m ) -if(BUILD_TESTING) +if(FENIX_TESTS) add_test(NAME ring COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_ring ${MPIEXEC_POSTFLAGS} 1 2) set_tests_properties(ring PROPERTIES diff --git a/examples/05_subset_create/CMakeLists.txt b/examples/05_subset_create/CMakeLists.txt index 7f1efcd..fccb552 100644 --- a/examples/05_subset_create/CMakeLists.txt +++ b/examples/05_subset_create/CMakeLists.txt @@ -11,7 +11,7 @@ add_executable(subset_create subset_create.c) target_link_libraries(subset_create fenix ${MPI_C_LIBRARIES}) -if(BUILD_TESTING) +if(FENIX_TESTS) add_test(NAME subset_create COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} subset_create ${MPIEXEC_POSTFLAGS} 1) set_tests_properties(subset_create PROPERTIES diff --git a/examples/06_subset_createv/CMakeLists.txt b/examples/06_subset_createv/CMakeLists.txt index c242648..6a7b356 100644 --- a/examples/06_subset_createv/CMakeLists.txt +++ b/examples/06_subset_createv/CMakeLists.txt @@ -11,7 +11,7 @@ add_executable(subset_createv subset_createv.c) target_link_libraries(subset_createv fenix ${MPI_C_LIBRARIES}) -if(BUILD_TESTING) +if(FENIX_TESTS) add_test(NAME subset_createv COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} subset_createv ${MPIEXEC_POSTFLAGS} 1) set_tests_properties(subset_createv PROPERTIES From 087f57ebf3ccbfd51208f9c89b4195ef3f1ac733 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Mon, 17 Apr 2023 12:35:17 -0700 Subject: [PATCH 13/15] Update install directions, another cmake variable naming convention fix --- CMakeLists.txt | 6 +++--- README.md | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6ffe07..dd0e3f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,17 +21,17 @@ option(FENIX_TESTS "Builds tests and test modes of files" O #Solves an issue with some system environments putting their MPI headers before #the headers CMake includes. Forces non-system MPI headers when incorrect headers #detected in include path. -option(SYSTEM_INC_FIX "Attempts to force overriding any system MPI headers" ON) +option(FENIX_SYSTEM_INC_FIX "Attempts to force overriding any system MPI headers" ON) find_package(MPI REQUIRED) #If we're using mpicc, we don't need to worry about the includes. if("${CMAKE_C_COMPILER}" MATCHES ".*/?mpicc") - set(SYSTEM_INC_FIX OFF) + set(FENIX_SYSTEM_INC_FIX OFF) endif() -if(${SYSTEM_INC_FIX}) +if(${FENIX_SYSTEM_INC_FIX}) include(CheckIncludeFile) set(CMAKE_REQUIRED_QUIET ON) check_include_file("mpi.h" MPI_HEADER_CRASH) diff --git a/README.md b/README.md index 09efb60..20ab69a 100644 --- a/README.md +++ b/README.md @@ -17,17 +17,17 @@ These instructions assume you are in your home directory. 1. Checkout Fenix sources - * For example: ` git clone
` + * For example: ` git clone
&& cd fenix` 2. Create a build directory. - * For example: ` mkdir -p ~/build/fenix/ && cd ~/build/fenix/ ` + * For example: ` mkdir build && cd build ` 3. Specify the MPI C compiler to use. [Open MPI 5+](https://github.com/open-mpi/ompi/tree/v5.0.x) is the required version. - * To manually indicate which compiler `cmake` should use, set the `MPICC` variable to point to it. - * For example: ` export MPICC=~/install/mpi-ulfm/bin/mpicc ` - * If the `MPICC` environment variable is not there, `cmake` will try to guess where the MPI implementation is. To help, make sure you include the installation directory of MPI in your `PATH`. - * For example: ` export PATH=~/install/mpi-ulfm/bin:$PATH ` -4. Run ` cmake ` and ` make ` - * For example: ` cmake ~/Fenix && make ` -5. For best compatibility with other cmake projects, run ` make install ` and add the install directory to your CMAKE\_PREFIX\_PATH + * Check out the CMake documentation for the best information on how to do this, but in general: + * Set the CC environment variable to the correct `mpicc`, + * Invoke cmake with `-DCMAKE_C_COMPILER=mpicc`, + * Add the mpi install directory to CMAKE_PREFIX_PATH (see CMakeLists.txt FENIX_SYSTEM_INC_FIX option). + * If you experience segmentation faults during simple MPI function calls, it is likely you have mixed up +4. Run ` cmake ../ -DCMAKE_INSTALL_PREFIX=... && make install` +5. Optionally, add the install prefix to your CMAKE\_PREFIX\_PATHS environment variable, to enable `find_package(fenix)` in your other projects.

From 81e73705d428a2e8cb582eca00e208e7d0de318a Mon Sep 17 00:00:00 2001
From: Matthew Whitlock 
Date: Mon, 8 May 2023 13:37:08 -0700
Subject: [PATCH 14/15] Revert to BUILD_TESTING; make inc fix optionally
 transitive

---
 CMakeLists.txt                               | 58 +++++++-------------
 cmake/fenixConfig.cmake.in                   | 13 +++++
 cmake/systemMPIOverride.cmake                | 51 +++++++++++++++++
 examples/01_hello_world/fenix/CMakeLists.txt |  2 +-
 examples/02_send_recv/fenix/CMakeLists.txt   |  2 +-
 examples/05_subset_create/CMakeLists.txt     |  2 +-
 examples/06_subset_createv/CMakeLists.txt    |  2 +-
 fenixConfig.cmake.in                         |  5 --
 src/CMakeLists.txt                           |  2 +-
 9 files changed, 89 insertions(+), 48 deletions(-)
 create mode 100644 cmake/fenixConfig.cmake.in
 create mode 100644 cmake/systemMPIOverride.cmake
 delete mode 100644 fenixConfig.cmake.in

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dd0e3f7..7b8b20c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,57 +15,34 @@ project(Fenix C)
 set(FENIX_VERSION_MAJOR 1)
 set(FENIX_VERSION_MINOR 0)
 
-option(FENIX_EXAMPLES    "Builds example programs from the examples directory" OFF)
-option(FENIX_TESTS       "Builds tests and test modes of files"                OFF)
+option(BUILD_EXAMPLES    "Builds example programs from the examples directory" OFF)
+option(BUILD_TESTING     "Builds tests and test modes of files" ON)
+
 
 #Solves an issue with some system environments putting their MPI headers before
 #the headers CMake includes. Forces non-system MPI headers when incorrect headers
 #detected in include path.
-option(FENIX_SYSTEM_INC_FIX  "Attempts to force overriding any system MPI headers" ON)
-
+option(FENIX_SYSTEM_INC_FIX    "Attempts to force overriding any system MPI headers" ON)
+option(FENIX_PROPAGATE_INC_FIX "Attempt overriding system MPI headers in linking projects" ON)
 
 find_package(MPI REQUIRED)
 
-#If we're using mpicc, we don't need to worry about the includes.
-if("${CMAKE_C_COMPILER}" MATCHES ".*/?mpicc")
-  set(FENIX_SYSTEM_INC_FIX OFF)
-endif()
-
 if(${FENIX_SYSTEM_INC_FIX})
-  include(CheckIncludeFile)
-  set(CMAKE_REQUIRED_QUIET ON)
-  check_include_file("mpi.h" MPI_HEADER_CRASH)
-  set(CMAKE_REQUIRED_QUIET OFF)
-
-  if(${MPI_HEADER_CRASH})
-    message(WARNING "Detected system MPI headers, attempting to force use of ${MPI_C_INCLUDE_DIRS}. Set SYSTEM_INC_FIX=OFF to stop this behavior.")
-
-    if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.25")
-      set_target_properties(MPI::MPI_C PROPERTIES SYSTEM "FALSE")
-    else()
-      foreach(MPI_DIR ${MPI_C_INCLUDE_DIRS})
-        if(NOT ("${CMAKE_C_FLAGS}" MATCHES ".* -I${MPI_DIR}.*"))
-          message(STATUS ${MPI_DIR})
-          set(CMAKE_C_FLAGS " -I${MPI_DIR} ${CMAKE_C_FLAGS}" CACHE INTERNAL "Flags to append to build commands for C.")
-        endif()
-      endforeach()
-      set_target_properties(MPI::MPI_C PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "")
-    endif()
-  endif()
-
-endif() #SYSTEM_INC_FIX
+  include(cmake/systemMPIOverride.cmake)
+endif()
 
 
 add_subdirectory(src)
 
+
 include(CTest)
 list(APPEND MPIEXEC_PREFLAGS "--with-ft;mpi")
 
-if(FENIX_EXAMPLES)
+if(BUILD_EXAMPLES)
     add_subdirectory(examples)
 endif()
 
-if(FENIX_TESTS)
+if(BUILD_TESTING)
     add_subdirectory(test)
 endif()
 
@@ -75,17 +52,22 @@ configure_file(
     ${CMAKE_CURRENT_SOURCE_DIR}/include/fenix-config.h.in
     ${CMAKE_CURRENT_BINARY_DIR}/include/fenix-config.h @ONLY
 )
+configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/cmake/systemMPIOverride.cmake
+    ${CMAKE_CURRENT_BINARY_DIR}/cmake/systemMPIOverride.cmake COPYONLY
+)
+
 
 include(CMakePackageConfigHelpers)
-configure_package_config_file(fenixConfig.cmake.in
-                              ${CMAKE_CURRENT_BINARY_DIR}/fenixConfig.cmake
+configure_package_config_file(cmake/fenixConfig.cmake.in
+                              ${CMAKE_CURRENT_BINARY_DIR}/cmake/fenixConfig.cmake
                               INSTALL_DESTINATION cmake)
-write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/fenixConfigVersion.cmake
+write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/cmake/fenixConfigVersion.cmake
                                  VERSION "${FENIX_VERSION_MAJOR}.${FENIX_VERSION_MINOR}"
                                  COMPATIBILITY SameMajorVersion)
 install(
     FILES
-    ${CMAKE_CURRENT_BINARY_DIR}/fenixConfig.cmake
-    ${CMAKE_CURRENT_BINARY_DIR}/fenixConfigVersion.cmake
+    ${CMAKE_CURRENT_BINARY_DIR}/cmake/fenixConfig.cmake
+    ${CMAKE_CURRENT_BINARY_DIR}/cmake/systemMPIOverride.cmake
     DESTINATION cmake
 )
diff --git a/cmake/fenixConfig.cmake.in b/cmake/fenixConfig.cmake.in
new file mode 100644
index 0000000..464e150
--- /dev/null
+++ b/cmake/fenixConfig.cmake.in
@@ -0,0 +1,13 @@
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+
+include("${CMAKE_CURRENT_LIST_DIR}/fenixTargets.cmake")
+
+set(FENIX_SYSTEM_INC_FIX @FENIX_SYSTEM_INC_FIX@)
+if(${FENIX_SYSTEM_INC_FIX})
+  option(FENIX_PROPAGATE_INC_FIX "Attempt overriding system MPI headers in linking projects" @FENIX_PROPAGATE_INC_FIX@)
+  if(${FENIX_PROPAGATE_INC_FIX})
+    include("${CMAKE_CURRENT_LIST_DIR}/systemMPIOverride.cmake")
+  endif()
+endif()
diff --git a/cmake/systemMPIOverride.cmake b/cmake/systemMPIOverride.cmake
new file mode 100644
index 0000000..95b2619
--- /dev/null
+++ b/cmake/systemMPIOverride.cmake
@@ -0,0 +1,51 @@
+#If we're using mpicc, we don't need to worry about the includes.
+if("${CMAKE_C_COMPILER}" MATCHES ".*/?mpic")
+    return()
+endif()
+
+include(CheckIncludeFile)
+set(CMAKE_REQUIRED_QUIET ON)
+check_include_file("mpi.h" MPI_HEADER_CLASH)
+set(CMAKE_REQUIRED_QUIET OFF)
+
+if(${MPI_HEADER_CLASH})
+  if(TARGET fenix)
+    message(WARNING "Fenix detected system MPI headers, attempting to force use of ${MPI_C_INCLUDE_DIRS}. Disable FENIX_PROPAGATE_INC_FIX to stop this behavior.")
+  else()
+    message(WARNING "Detected system MPI headers, attempting to force use of ${MPI_C_INCLUDE_DIRS}. Disable FENIX_SYSTEM_INC_FIX to stop this behavior.")
+  endif()
+
+  if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.25")
+
+    if(TARGET MPI::MPI_C)
+      set_target_properties(MPI::MPI_C PROPERTIES SYSTEM "FALSE")
+    endif()
+    if(TARGET MPI::MPI_CXX)
+      set_target_properties(MPI::MPI_CXX PROPERTIES SYSTEM "FALSE")
+    endif()
+
+  else()
+
+    if(TARGET MPI::MPI_C)
+      set_property(DIRECTORY ${CMAKE_SOURCE_DIR} APPEND PROPERTY INCLUDE_DIRECTORIES "${MPI_C_INCLUDE_DIRS}")
+    endif()
+    if(TARGET MPI::MPI_CXX)
+      set_property(DIRECTORY ${CMAKE_SOURCE_DIR} APPEND PROPERTY INCLUDE_DIRECTORIES "${MPI_CXX_INCLUDE_DIRS}")
+    endif()
+
+    if(TARGET fenix)
+      get_target_property(FENIX_INCLUDES fenix INTERFACE_INCLUDE_DIRECTORIES)
+      list(REMOVE_ITEM FENIX_INCLUDES ${MPI_C_INCLUDE_DIRS})
+      list(REMOVE_ITEM FENIX_INCLUDES ${MPI_CXX_INCLUDE_DIRS})
+      set_target_properties(fenix PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${FENIX_INCLUDES}")
+    endif()
+    
+    if(TARGET MPI::MPI_C)
+      set_target_properties(MPI::MPI_C PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "")
+    endif()
+    if(TARGET MPI::MPI_CXX)
+      set_target_properties(MPI::MPI_CXX PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "")
+    endif()
+
+  endif()
+endif()
diff --git a/examples/01_hello_world/fenix/CMakeLists.txt b/examples/01_hello_world/fenix/CMakeLists.txt
index a474f82..6a344f4 100644
--- a/examples/01_hello_world/fenix/CMakeLists.txt
+++ b/examples/01_hello_world/fenix/CMakeLists.txt
@@ -11,7 +11,7 @@
 add_executable(fenix_hello_world fenix_hello_world.c)
 target_link_libraries(fenix_hello_world fenix ${MPI_C_LIBRARIES})
 
-if(FENIX_TESTS)
+if(BUILD_TESTING)
    add_test(NAME hello_world 
       COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS} fenix_hello_world ${MPIEXEC_POSTFLAGS} "1")
 endif()
diff --git a/examples/02_send_recv/fenix/CMakeLists.txt b/examples/02_send_recv/fenix/CMakeLists.txt
index 0c81d03..bf40679 100644
--- a/examples/02_send_recv/fenix/CMakeLists.txt
+++ b/examples/02_send_recv/fenix/CMakeLists.txt
@@ -11,7 +11,7 @@
 add_executable(fenix_ring fenix_ring.c)
 target_link_libraries(fenix_ring fenix ${MPI_C_LIBRARIES} m )
 
-if(FENIX_TESTS)
+if(BUILD_TESTING)
    add_test(NAME ring 
       COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_ring ${MPIEXEC_POSTFLAGS} 1 2)
    set_tests_properties(ring PROPERTIES
diff --git a/examples/05_subset_create/CMakeLists.txt b/examples/05_subset_create/CMakeLists.txt
index fccb552..7f1efcd 100644
--- a/examples/05_subset_create/CMakeLists.txt
+++ b/examples/05_subset_create/CMakeLists.txt
@@ -11,7 +11,7 @@
 add_executable(subset_create subset_create.c)
 target_link_libraries(subset_create fenix ${MPI_C_LIBRARIES})
 
-if(FENIX_TESTS) 
+if(BUILD_TESTING) 
    add_test(NAME subset_create 
       COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} subset_create ${MPIEXEC_POSTFLAGS} 1)
    set_tests_properties(subset_create PROPERTIES
diff --git a/examples/06_subset_createv/CMakeLists.txt b/examples/06_subset_createv/CMakeLists.txt
index 6a7b356..c242648 100644
--- a/examples/06_subset_createv/CMakeLists.txt
+++ b/examples/06_subset_createv/CMakeLists.txt
@@ -11,7 +11,7 @@
 add_executable(subset_createv subset_createv.c)
 target_link_libraries(subset_createv fenix ${MPI_C_LIBRARIES})
 
-if(FENIX_TESTS) 
+if(BUILD_TESTING) 
    add_test(NAME subset_createv 
       COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} subset_createv ${MPIEXEC_POSTFLAGS} 1)
    set_tests_properties(subset_createv PROPERTIES
diff --git a/fenixConfig.cmake.in b/fenixConfig.cmake.in
deleted file mode 100644
index 6f59550..0000000
--- a/fenixConfig.cmake.in
+++ /dev/null
@@ -1,5 +0,0 @@
-@PACKAGE_INIT@
-
-include(CMakeFindDependencyMacro)
-
-include("${CMAKE_CURRENT_LIST_DIR}/fenixTargets.cmake")
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 096b76a..7c823fd 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -33,7 +33,7 @@ globals.c
 
 add_library( fenix STATIC ${Fenix_SOURCES})
 
-target_link_libraries(fenix MPI::MPI_C)
+target_link_libraries(fenix PUBLIC MPI::MPI_C)
 
 target_include_directories(fenix 
     PUBLIC  

From bdb409e0f6132136681f533d71c4593e175c34ff Mon Sep 17 00:00:00 2001
From: Matthew Whitlock 
Date: Thu, 12 Oct 2023 16:31:18 -0400
Subject: [PATCH 15/15] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 2cb81c1..b7f4c97 100644
--- a/README.md
+++ b/README.md
@@ -23,8 +23,8 @@ These instructions assume you are in your home directory.
    * Check out the CMake documentation for the best information on how to do this, but in general:
       * Set the CC environment variable to the correct `mpicc`,
       * Invoke cmake with `-DCMAKE_C_COMPILER=mpicc`,
-      * Add the mpi install directory to CMAKE_PREFIX_PATH (see CMakeLists.txt FENIX_SYSTEM_INC_FIX option).
-   * If you experience segmentation faults during simple MPI function calls, it is likely you have mixed up 
+      * Add the mpi install directory to CMAKE_PREFIX_PATH.
+   * If you experience segmentation faults during simple MPI function calls, this is often caused by accidentally building against multiple versions of MPI. See the FENIX_SYSTEM_INC_FIX CMake option for a potential fix.
 4. Run ` cmake ../ -DCMAKE_INSTALL_PREFIX=... && make install`
 5. Optionally, add the install prefix to your CMAKE\_PREFIX\_PATHS environment variable, to enable `find_package(fenix)` in your other projects.