Skip to content

Commit

Permalink
communicator: make c_name a dynamic array and reorder struct
Browse files Browse the repository at this point in the history
make the c_name element of the communicator structure a dynamic
element. This allows us to reduce the size of PREDEFINED_COMMUNICATOR_PAD
back to 512 to maintain backwards compatibility with the ompi 4.1.x release
series.

Reorder the communicator fields to reduce the struct size.
This brings the communicator size at 536 bytes with FT, PERUSE enabled
and compiled in debug mode.

Fixes issue open-mpi#11373

Signed-off-by: Edgar Gabriel <edgar.gabriel@amd.com>
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
  • Loading branch information
edgargabriel committed Feb 14, 2023
1 parent e5abeb8 commit 2d68804
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 58 deletions.
6 changes: 6 additions & 0 deletions ompi/communicator/comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2018-2022 Triad National Security, LLC. All rights
* reserved.
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -215,6 +216,11 @@ int ompi_comm_set_nb (ompi_communicator_t **ncomm, ompi_communicator_t *oldcomm,
if (NULL == newcomm) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
newcomm->c_name = (char*) malloc (OPAL_MAX_OBJECT_NAME);
if (NULL == newcomm->c_name) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
newcomm->c_name[0] = '\0';
newcomm->super.s_info = NULL;
/* fill in the inscribing hyper-cube dimensions */
newcomm->c_cube_dim = opal_cube_dim(local_size);
Expand Down
21 changes: 12 additions & 9 deletions ompi/communicator/comm_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2020 The University of Tennessee and The University
* Copyright (c) 2004-2023 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand All @@ -25,6 +25,7 @@
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* Copyright (c) 2018-2022 Triad National Security, LLC. All rights
* reserved.
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -169,8 +170,7 @@ int ompi_comm_init(void)
(void)opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, 0, &ompi_mpi_comm_null);
(void)opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, 1, &ompi_mpi_comm_null);

opal_string_copy(ompi_mpi_comm_null.comm.c_name, "MPI_COMM_NULL",
sizeof(ompi_mpi_comm_null.comm.c_name));
ompi_mpi_comm_null.comm.c_name = strdup ("MPI_COMM_NULL");
ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC |
OMPI_COMM_GLOBAL_INDEX;

Expand Down Expand Up @@ -221,8 +221,7 @@ int ompi_comm_init_mpi3 (void)
OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_world.comm);
opal_pointer_array_set_item (&ompi_mpi_communicators, 0, &ompi_mpi_comm_world);

opal_string_copy(ompi_mpi_comm_world.comm.c_name, "MPI_COMM_WORLD",
sizeof(ompi_mpi_comm_world.comm.c_name));
ompi_mpi_comm_world.comm.c_name = strdup("MPI_COMM_WORLD");
ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC |
OMPI_COMM_GLOBAL_INDEX;
ompi_mpi_comm_world.comm.instance = group->grp_instance;
Expand Down Expand Up @@ -280,8 +279,7 @@ int ompi_comm_init_mpi3 (void)
OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_self.comm);
opal_pointer_array_set_item (&ompi_mpi_communicators, 1, &ompi_mpi_comm_self);

opal_string_copy(ompi_mpi_comm_self.comm.c_name, "MPI_COMM_SELF",
sizeof(ompi_mpi_comm_self.comm.c_name));
ompi_mpi_comm_self.comm.c_name = strdup("MPI_COMM_SELF");
ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC |
OMPI_COMM_GLOBAL_INDEX;
ompi_mpi_comm_self.comm.instance = group->grp_instance;
Expand Down Expand Up @@ -412,7 +410,7 @@ static int ompi_comm_finalize (void)
static void ompi_comm_construct(ompi_communicator_t* comm)
{
int idx;
comm->c_name[0] = '\0';
comm->c_name = NULL;
comm->c_index = MPI_UNDEFINED;
comm->c_flags = 0;
comm->c_my_rank = 0;
Expand Down Expand Up @@ -444,7 +442,7 @@ static void ompi_comm_construct(ompi_communicator_t* comm)
this communicator */
comm->c_keyhash = NULL;

comm->errhandler_type = OMPI_ERRHANDLER_TYPE_COMM;
comm->error_handler = &ompi_mpi_errors_are_fatal.eh;
#ifdef OMPI_WANT_PERUSE
comm->c_peruse_handles = NULL;
#endif
Expand Down Expand Up @@ -520,6 +518,11 @@ static void ompi_comm_destruct(ompi_communicator_t* comm)
comm->error_handler = NULL;
}

if (NULL != comm->c_name) {
free (comm->c_name);
comm->c_name = NULL;
}

#if OPAL_ENABLE_FT_MPI
if( NULL != comm->agreement_specific ) {
OBJ_RELEASE( comm->agreement_specific );
Expand Down
44 changes: 20 additions & 24 deletions ompi/communicator/communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2020 The University of Tennessee and The University
* Copyright (c) 2004-2023 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand All @@ -24,6 +24,7 @@
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* Copyright (c) 2018-2022 Triad National Security, LLC. All rights
* reserved.
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -261,20 +262,30 @@ struct ompi_communicator_t {
opal_infosubscriber_t super;
opal_mutex_t c_lock; /* mutex for name and potentially
attributes */
char c_name[MPI_MAX_OBJECT_NAME];
char *c_name;
ompi_comm_extended_cid_t c_contextid;
ompi_comm_extended_cid_block_t c_contextidb;
uint32_t c_index;
int c_my_rank;
uint32_t c_flags; /* flags, e.g. intercomm,
topology, etc. */
uint32_t c_assertions; /* info assertions */
int c_id_available; /* the currently available Cid for allocation
to a child*/
int c_id_start_index; /* the starting index of the block of cids
allocated to this communicator*/
#if OPAL_ENABLE_FT_MPI
uint32_t c_epoch; /* Identifier used to differentiate between two communicators
using the same c_contextid (not at the same time, obviously) */
#endif
/* Non-blocking collective tag. These tags might be shared between
* all non-blocking collective modules (to avoid message collision
* between them in the case where multiple outstanding non-blocking
* collective coexists using multiple backends).
*/
opal_atomic_int32_t c_nbc_tag;

/**< inscribing cube dimension */
int c_cube_dim;

/* index in Fortran <-> C translation array */
int c_f_to_c_index;

ompi_group_t *c_local_group;
ompi_group_t *c_remote_group;
Expand All @@ -287,16 +298,10 @@ struct ompi_communicator_t {
/* Attributes */
struct opal_hash_table_t *c_keyhash;

/**< inscribing cube dimension */
int c_cube_dim;

/* Standard information about the selected topology module (or NULL
if this is not a cart, graph or dist graph communicator) */
struct mca_topo_base_module_t* c_topo;

/* index in Fortran <-> C translation array */
int c_f_to_c_index;

#ifdef OMPI_WANT_PERUSE
/*
* Place holder for the PERUSE events.
Expand All @@ -307,9 +312,7 @@ struct ompi_communicator_t {
/* Error handling. This field does not have the "c_" prefix so
that the OMPI_ERRHDL_* macros can find it, regardless of whether
it's a comm, window, or file. */

ompi_errhandler_t *error_handler;
ompi_errhandler_type_t errhandler_type;

/* Hooks for PML to hang things */
struct mca_pml_comm_t *c_pml_comm;
Expand All @@ -320,21 +323,14 @@ struct ompi_communicator_t {
/* Collectives module interface and data */
mca_coll_base_comm_coll_t *c_coll;

/* Non-blocking collective tag. These tags might be shared between
* all non-blocking collective modules (to avoid message collision
* between them in the case where multiple outstanding non-blocking
* collective coexists using multiple backends).
*/
opal_atomic_int32_t c_nbc_tag;

/* instance that this comm belongs to */
ompi_instance_t* instance;

#if OPAL_ENABLE_FT_MPI
/** MPI_ANY_SOURCE Failed Group Offset - OMPI_Comm_failure_get_acked */
int any_source_offset;
/** agreement caching info for topology and previous returned decisions */
opal_object_t *agreement_specific;
/** MPI_ANY_SOURCE Failed Group Offset - OMPI_Comm_failure_get_acked */
int any_source_offset;
/** Are MPI_ANY_SOURCE operations enabled? - OMPI_Comm_failure_ack */
bool any_source_enabled;
/** Has this communicator been revoked - OMPI_Comm_revoke() */
Expand Down Expand Up @@ -437,7 +433,7 @@ typedef struct ompi_communicator_t ompi_communicator_t;
* the PREDEFINED_COMMUNICATOR_PAD macro?
* A: Most likely not, but it would be good to check.
*/
#define PREDEFINED_COMMUNICATOR_PAD 1024
#define PREDEFINED_COMMUNICATOR_PAD 512

struct ompi_predefined_communicator_t {
struct ompi_communicator_t comm;
Expand Down
10 changes: 1 addition & 9 deletions ompi/debuggers/predefined_gap_test.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2009 Sun Microsystems, Inc All rights reserved.
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 The University of Tennessee and The University
* Copyright (c) 2012-2023 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2012-2013 Inria. All rights reserved.
Expand Down Expand Up @@ -59,8 +59,6 @@ int main(int argc, char **argv) {
GAP_CHECK("c_contextid", test_comm, c_contextid, c_name, 1);
GAP_CHECK("c_my_rank", test_comm, c_my_rank, c_contextid, 1);
GAP_CHECK("c_flags", test_comm, c_flags, c_my_rank, 1);
GAP_CHECK("c_id_available", test_comm, c_id_available, c_flags, 1);
GAP_CHECK("c_id_start_index", test_comm, c_id_start_index, c_id_available, 1);
GAP_CHECK("c_remote_group", test_comm, c_remote_group, c_local_group, 1);
GAP_CHECK("c_local_comm", test_comm, c_local_comm, c_remote_group, 1);
GAP_CHECK("c_keyhash", test_comm, c_keyhash, c_local_comm, 1);
Expand All @@ -73,8 +71,6 @@ int main(int argc, char **argv) {
#else
GAP_CHECK("error_handler", test_comm, error_handler, c_f_to_c_index, 1);
#endif
GAP_CHECK("errhandler_type", test_comm, errhandler_type, error_handler, 1);
GAP_CHECK("c_pml_comm", test_comm, c_pml_comm, errhandler_type, 1);
GAP_CHECK("c_coll", test_comm, c_coll, c_pml_comm, 1);

/* Test Predefined group sizes */
Expand Down Expand Up @@ -129,8 +125,6 @@ int main(int argc, char **argv) {
GAP_CHECK("w_keyhash", test_win, w_keyhash, w_flags, 1);
GAP_CHECK("w_f_to_c_index", test_win, w_f_to_c_index, w_keyhash, 1);
GAP_CHECK("error_handler", test_win, error_handler, w_f_to_c_index, 1);
GAP_CHECK("errhandler_type", test_win, errhandler_type, error_handler, 1);
GAP_CHECK("w_osc_module", test_win, w_osc_module, errhandler_type, 1);

/* Test Predefined info sizes */
printf("=============================================\n");
Expand All @@ -151,8 +145,6 @@ int main(int argc, char **argv) {
GAP_CHECK("f_flags", test_file, f_flags, f_amode, 1);
GAP_CHECK("f_f_to_c_index", test_file, f_f_to_c_index, f_flags, 1);
GAP_CHECK("error_handler", test_file, error_handler, f_f_to_c_index, 1);
GAP_CHECK("errhandler_type", test_file, errhandler_type, error_handler, 1);
GAP_CHECK("f_io_version", test_file, f_io_version, errhandler_type, 1);
GAP_CHECK("f_io_selected_component", test_file, f_io_selected_component, f_io_version, 1);
GAP_CHECK("f_io_selected_module", test_file, f_io_selected_module, f_io_selected_component, 1);
GAP_CHECK("f_io_selected_data", test_file, f_io_selected_data, f_io_selected_module, 1);
Expand Down
10 changes: 5 additions & 5 deletions ompi/errhandler/errhandler.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2022 The University of Tennessee and The University
* Copyright (c) 2004-2023 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -388,10 +388,10 @@ int ompi_errhandler_proc_failed_internal(ompi_proc_t* ompi_proc, int status, boo
OMPI_NAME_PRINT(&ompi_proc->super.proc_name),
ompi_comm_print_cid(comm),
proc_rank,
(OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->errhandler_type ? "P" :
(OMPI_ERRHANDLER_TYPE_COMM == comm->errhandler_type ? "C" :
(OMPI_ERRHANDLER_TYPE_WIN == comm->errhandler_type ? "W" :
(OMPI_ERRHANDLER_TYPE_FILE == comm->errhandler_type ? "F" : "U") ) ) )
(OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->error_handler->eh_mpi_object_type ? "P" :
(OMPI_ERRHANDLER_TYPE_COMM == comm->error_handler->eh_mpi_object_type ? "C" :
(OMPI_ERRHANDLER_TYPE_WIN == comm->error_handler->eh_mpi_object_type ? "W" :
(OMPI_ERRHANDLER_TYPE_FILE == comm->error_handler->eh_mpi_object_type ? "F" : "U") ) ) )
));
}

Expand Down
8 changes: 4 additions & 4 deletions ompi/errhandler/errhandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2022 The University of Tennessee and The University
* Copyright (c) 2004-2023 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -238,7 +238,7 @@ extern opal_atomic_int32_t ompi_instance_count;
#define OMPI_ERRHANDLER_INVOKE(mpi_object, err_code, message) \
ompi_errhandler_invoke((mpi_object)->error_handler, \
(mpi_object), \
(int)(mpi_object)->errhandler_type, \
(int)(mpi_object)->error_handler->eh_mpi_object_type, \
ompi_errcode_get_mpi_code(err_code), \
(message));

Expand Down Expand Up @@ -269,7 +269,7 @@ extern opal_atomic_int32_t ompi_instance_count;
int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \
ompi_errhandler_invoke((mpi_object)->error_handler, \
(mpi_object), \
(int) (mpi_object)->errhandler_type, \
(int) (mpi_object)->error_handler->eh_mpi_object_type, \
(__mpi_err_code), \
(message)); \
return (__mpi_err_code); \
Expand Down Expand Up @@ -307,7 +307,7 @@ extern opal_atomic_int32_t ompi_instance_count;
int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \
ompi_errhandler_invoke((mpi_object)->error_handler, \
(mpi_object), \
(int)(mpi_object)->errhandler_type, \
(int)(mpi_object)->error_handler->eh_mpi_object_type, \
(__mpi_err_code), \
(message)); \
return (__mpi_err_code); \
Expand Down
8 changes: 4 additions & 4 deletions ompi/errhandler/errhandler_invoke.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2020 The University of Tennessee and The University
* Copyright (c) 2004-2023 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -183,19 +183,19 @@ int ompi_errhandler_request_invoke(int count,
case OMPI_REQUEST_COLL:
return ompi_errhandler_invoke(mpi_object.comm->error_handler,
mpi_object.comm,
mpi_object.comm->errhandler_type,
mpi_object.comm->error_handler->eh_mpi_object_type,
ec, message);
break;
case OMPI_REQUEST_IO:
return ompi_errhandler_invoke(mpi_object.file->error_handler,
mpi_object.file,
mpi_object.file->errhandler_type,
mpi_object.file->error_handler->eh_mpi_object_type,
ec, message);
break;
case OMPI_REQUEST_WIN:
return ompi_errhandler_invoke(mpi_object.win->error_handler,
mpi_object.win,
mpi_object.win->errhandler_type,
mpi_object.win->error_handler->eh_mpi_object_type,
ec, message);
break;
default:
Expand Down
4 changes: 1 addition & 3 deletions ompi/include/ompi/memchecker.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2010-2017 The University of Tennessee and The University
* Copyright (c) 2010-2023 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
Expand Down Expand Up @@ -220,8 +220,6 @@ static inline int memchecker_comm(MPI_Comm comm)
opal_memchecker_base_isdefined (&comm->c_name, MPI_MAX_OBJECT_NAME);
opal_memchecker_base_isdefined (&comm->c_my_rank, sizeof(int));
opal_memchecker_base_isdefined (&comm->c_flags, sizeof(uint32_t));
opal_memchecker_base_isdefined (&comm->c_id_available, sizeof(int));
opal_memchecker_base_isdefined (&comm->c_id_start_index, sizeof(int));
opal_memchecker_base_isdefined (&comm->c_local_group, sizeof(ompi_group_t *));
opal_memchecker_base_isdefined (&comm->c_remote_group, sizeof(ompi_group_t *));
opal_memchecker_base_isdefined (&comm->c_keyhash, sizeof(struct opal_hash_table_t *));
Expand Down

0 comments on commit 2d68804

Please sign in to comment.