Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v5.0.x] backport bugfixes created during mtt bug bash #11821

Merged
merged 5 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ompi/communicator/comm_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ static void ompi_comm_construct(ompi_communicator_t* comm)
/* A keyhash will be created if/when an attribute is cached on
this communicator */
comm->c_keyhash = NULL;

comm->errhandler_type = OMPI_ERRHANDLER_TYPE_COMM;
comm->error_handler = &ompi_mpi_errors_are_fatal.eh;
#ifdef OMPI_WANT_PERUSE
comm->c_peruse_handles = NULL;
Expand Down
1 change: 1 addition & 0 deletions ompi/communicator/communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ struct ompi_communicator_t {
that the OMPI_ERRHDL_* macros can find it, regardless of whether
it's a comm, window, or file. */
ompi_errhandler_t *error_handler;
ompi_errhandler_type_t errhandler_type;

/* Hooks for PML to hang things */
struct mca_pml_comm_t *c_pml_comm;
Expand Down
7 changes: 6 additions & 1 deletion ompi/debuggers/predefined_gap_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ int main(int argc, char **argv) {
#else
GAP_CHECK("error_handler", test_comm, error_handler, c_f_to_c_index, 1);
#endif
GAP_CHECK("errhandler_type", test_comm, errhandler_type, error_handler, 1);
GAP_CHECK("c_pml_comm", test_comm, c_pml_comm, errhandler_type, 1);
GAP_CHECK("c_coll", test_comm, c_coll, c_pml_comm, 1);

/* Test Predefined group sizes */
Expand Down Expand Up @@ -125,7 +127,8 @@ int main(int argc, char **argv) {
GAP_CHECK("w_keyhash", test_win, w_keyhash, w_flags, 1);
GAP_CHECK("w_f_to_c_index", test_win, w_f_to_c_index, w_keyhash, 1);
GAP_CHECK("error_handler", test_win, error_handler, w_f_to_c_index, 1);

GAP_CHECK("errhandler_type", test_win, errhandler_type, error_handler, 1);
GAP_CHECK("w_osc_module", test_win, w_osc_module, errhandler_type, 1);
/* Test Predefined info sizes */
printf("=============================================\n");
printf("ompi_predefined_info_t = %lu bytes\n", sizeof(ompi_predefined_info_t));
Expand All @@ -145,6 +148,8 @@ int main(int argc, char **argv) {
GAP_CHECK("f_flags", test_file, f_flags, f_amode, 1);
GAP_CHECK("f_f_to_c_index", test_file, f_f_to_c_index, f_flags, 1);
GAP_CHECK("error_handler", test_file, error_handler, f_f_to_c_index, 1);
GAP_CHECK("errhandler_type", test_file, errhandler_type, error_handler, 1);
GAP_CHECK("f_io_version", test_file, f_io_version, errhandler_type, 1);
GAP_CHECK("f_io_selected_component", test_file, f_io_selected_component, f_io_version, 1);
GAP_CHECK("f_io_selected_module", test_file, f_io_selected_module, f_io_selected_component, 1);
GAP_CHECK("f_io_selected_data", test_file, f_io_selected_data, f_io_selected_module, 1);
Expand Down
8 changes: 4 additions & 4 deletions ompi/errhandler/errhandler.c
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,10 @@ int ompi_errhandler_proc_failed_internal(ompi_proc_t* ompi_proc, int status, boo
OMPI_NAME_PRINT(&ompi_proc->super.proc_name),
ompi_comm_print_cid(comm),
proc_rank,
(OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->error_handler->eh_mpi_object_type ? "P" :
(OMPI_ERRHANDLER_TYPE_COMM == comm->error_handler->eh_mpi_object_type ? "C" :
(OMPI_ERRHANDLER_TYPE_WIN == comm->error_handler->eh_mpi_object_type ? "W" :
(OMPI_ERRHANDLER_TYPE_FILE == comm->error_handler->eh_mpi_object_type ? "F" : "U") ) ) )
(OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->errhandler_type ? "P" :
(OMPI_ERRHANDLER_TYPE_COMM == comm->errhandler_type ? "C" :
(OMPI_ERRHANDLER_TYPE_WIN == comm->errhandler_type ? "W" :
(OMPI_ERRHANDLER_TYPE_FILE == comm->errhandler_type ? "F" : "U") ) ) )
));
}

Expand Down
6 changes: 3 additions & 3 deletions ompi/errhandler/errhandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ extern opal_atomic_int32_t ompi_instance_count;
#define OMPI_ERRHANDLER_INVOKE(mpi_object, err_code, message) \
ompi_errhandler_invoke((mpi_object)->error_handler, \
(mpi_object), \
(int)(mpi_object)->error_handler->eh_mpi_object_type, \
(int)(mpi_object)->errhandler_type, \
ompi_errcode_get_mpi_code(err_code), \
(message));

Expand Down Expand Up @@ -269,7 +269,7 @@ extern opal_atomic_int32_t ompi_instance_count;
int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \
ompi_errhandler_invoke((mpi_object)->error_handler, \
(mpi_object), \
(int) (mpi_object)->error_handler->eh_mpi_object_type, \
(int) (mpi_object)->errhandler_type, \
(__mpi_err_code), \
(message)); \
return (__mpi_err_code); \
Expand Down Expand Up @@ -307,7 +307,7 @@ extern opal_atomic_int32_t ompi_instance_count;
int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \
ompi_errhandler_invoke((mpi_object)->error_handler, \
(mpi_object), \
(int)(mpi_object)->error_handler->eh_mpi_object_type, \
(int)(mpi_object)->errhandler_type, \
(__mpi_err_code), \
(message)); \
return (__mpi_err_code); \
Expand Down
6 changes: 3 additions & 3 deletions ompi/errhandler/errhandler_invoke.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,19 +202,19 @@ int ompi_errhandler_request_invoke(int count,
case OMPI_REQUEST_COLL:
return ompi_errhandler_invoke(mpi_object.comm->error_handler,
mpi_object.comm,
mpi_object.comm->error_handler->eh_mpi_object_type,
mpi_object.comm->errhandler_type,
ec, message);
break;
case OMPI_REQUEST_IO:
return ompi_errhandler_invoke(mpi_object.file->error_handler,
mpi_object.file,
mpi_object.file->error_handler->eh_mpi_object_type,
mpi_object.file->errhandler_type,
ec, message);
break;
case OMPI_REQUEST_WIN:
return ompi_errhandler_invoke(mpi_object.win->error_handler,
mpi_object.win,
mpi_object.win->error_handler->eh_mpi_object_type,
mpi_object.win->errhandler_type,
ec, message);
break;
default:
Expand Down
21 changes: 11 additions & 10 deletions ompi/group/group_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -419,16 +419,6 @@ int ompi_group_init(void)
return OMPI_ERROR;
}

#if OPAL_ENABLE_FT_MPI
/* Setup global list of failed processes */
ompi_group_all_failed_procs = OBJ_NEW(ompi_group_t);
ompi_group_all_failed_procs->grp_proc_count = 0;
ompi_group_all_failed_procs->grp_my_rank = MPI_UNDEFINED;
ompi_group_all_failed_procs->grp_proc_pointers = NULL;
ompi_group_all_failed_procs->grp_flags |= OMPI_GROUP_DENSE;
ompi_group_all_failed_procs->grp_flags |= OMPI_GROUP_INTRINSIC;
#endif

/* add MPI_GROUP_NULL to table */
OBJ_CONSTRUCT(&ompi_mpi_group_null, ompi_group_t);
ompi_mpi_group_null.group.grp_proc_count = 0;
Expand All @@ -445,6 +435,17 @@ int ompi_group_init(void)
ompi_mpi_group_empty.group.grp_flags |= OMPI_GROUP_DENSE;
ompi_mpi_group_empty.group.grp_flags |= OMPI_GROUP_INTRINSIC;

#if OPAL_ENABLE_FT_MPI
/* Setup global list of failed processes */
ompi_group_all_failed_procs = OBJ_NEW(ompi_group_t);
ompi_group_all_failed_procs->grp_proc_count = 0;
ompi_group_all_failed_procs->grp_my_rank = MPI_UNDEFINED;
ompi_group_all_failed_procs->grp_proc_pointers = NULL;
ompi_group_all_failed_procs->grp_flags |= OMPI_GROUP_DENSE;
ompi_group_all_failed_procs->grp_flags |= OMPI_GROUP_INTRINSIC;
#endif


ompi_mpi_instance_append_finalize (ompi_group_finalize);

return OMPI_SUCCESS;
Expand Down
11 changes: 9 additions & 2 deletions ompi/mca/coll/han/coll_han_reduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,10 @@ mca_coll_han_reduce_intra(const void *sbuf,
mca_coll_task_t *t_next_seg = OBJ_NEW(mca_coll_task_t);
/* Setup up t_next_seg task arguments */
t->cur_task = t_next_seg;
t->sbuf = (char *) t->sbuf + extent * t->seg_count;
if (t->sbuf != MPI_IN_PLACE) {
t->sbuf = (char *) t->sbuf + extent * t->seg_count;
}

if (up_rank == root_up_rank) {
t->rbuf = (char *) t->rbuf + extent * t->seg_count;
}
Expand Down Expand Up @@ -242,6 +245,7 @@ int mca_coll_han_reduce_t1_task(void *task_args) {
if (next_seg <= t->num_segments - 1) {
int tmp_count = t->seg_count;
char *tmp_rbuf = NULL;
char *tmp_sbuf = NULL;
if (next_seg == t->num_segments - 1 && t->last_seg_count != t->seg_count) {
tmp_count = t->last_seg_count;
}
Expand All @@ -250,7 +254,10 @@ int mca_coll_han_reduce_t1_task(void *task_args) {
} else if (NULL != t->rbuf) {
tmp_rbuf = (char*)t->rbuf + extent * t->seg_count;
}
t->low_comm->c_coll->coll_reduce((char *) t->sbuf + extent * t->seg_count,

tmp_sbuf = (t->sbuf == MPI_IN_PLACE) ? MPI_IN_PLACE : (char *)t->sbuf + extent * t->seg_count;

t->low_comm->c_coll->coll_reduce((char *) tmp_sbuf,
(char *) tmp_rbuf, tmp_count,
t->dtype, t->op, t->root_low_rank, t->low_comm,
t->low_comm->c_coll->coll_reduce_module);
Expand Down
2 changes: 1 addition & 1 deletion ompi/mca/pml/cm/pml_cm_sendreq.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ do { \
&max_data ); \
opal_convertor_prepare_for_send( &sendreq->req_send.req_base.req_convertor, \
&(ompi_mpi_packed.dt.super), \
max_data, sendreq->req_buff ); \
max_data, sendreq->req_addr ); \
} \
} \
} while(0);
Expand Down
4 changes: 2 additions & 2 deletions ompi/runtime/ompi_mpi_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ bool ompi_async_mpi_finalize = false;
uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT;
bool ompi_mpi_dynamics_enabled = true;

bool ompi_mpi_compat_mpi3 = false;
bool ompi_mpi_compat_mpi3 = true;

char *ompi_mpi_spc_attach_string = NULL;
bool ompi_mpi_spc_dump_enabled = false;
Expand Down Expand Up @@ -362,7 +362,7 @@ int ompi_mpi_register_params(void)
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}

ompi_mpi_compat_mpi3 = false;
ompi_mpi_compat_mpi3 = true;
(void) mca_base_var_register("ompi", "mpi", NULL, "compat_mpi3",
"A boolean value for whether Open MPI operates in MPI-3 compatibility mode; this changes the following behavior: in operations without a handle, errors are raised on (true) MPI_COMM_WORLD (MPI-3 behavior) or (false) MPI_COMM_SELF (MPI-4 behavior).",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
Expand Down
4 changes: 2 additions & 2 deletions ompi/runtime/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ OMPI_DECLSPEC extern int ompi_mpi_abort_delay;
/**
* Whether we operate in MPI3 compatibility, or MPI4 mode (default).
*
* true: use MPI3 compatibility
* false: use MPI4 compatibility (default)
* true: use MPI3 compatibility (default)
* false: use MPI4 compatibility
*
* Behavioral changes:
* - errors in operations without a handle are raised on MPI_COMM_WORLD (MPI-3 behavior) or MPI_COMM_SELF (MPI-4 behavior)
Expand Down