Skip to content

Commit

Permalink
Merge branch 'open-mpi:main' into 29Aug2024_nocid_array
Browse files Browse the repository at this point in the history
  • Loading branch information
amd-nithyavs committed Sep 12, 2024
2 parents 3392b94 + adee1a4 commit cdbc2b4
Show file tree
Hide file tree
Showing 22 changed files with 384 additions and 67 deletions.
4 changes: 2 additions & 2 deletions .ci/community-jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ println('Tests Completed')
def prepare_check_stages() {
def configure_options = ["--disable-dlopen", "--disable-oshmem", "--enable-builtin-atomic", "--enable-ipv6"]
def compilers = ["clang10", "gcc5", "gcc6", "gcc7", "gcc8", "gcc9", "gcc10"]
def platforms = ["amazon_linux_2", "amazon_linux_2-arm64", "rhel7", "rhel8", "ubuntu_18.04"]
def platforms = ["amazon_linux_2", "amazon_linux_2-arm64", "rhel8", "ubuntu_18.04"]
def check_stages_list = []

// Build everything stage
Expand All @@ -71,7 +71,7 @@ def prepare_check_stages() {

for (configure_option in configure_options) {
def name = "Configure: ${configure_option}".replaceAll("-", "")
build_parallel_map.put(name, prepare_build(name, "(ec2&&linux)", "--configure-args \\\"${configure_option}\\\""))
build_parallel_map.put(name, prepare_build(name, "amazon_linux_2", "--configure-args \\\"${configure_option}\\\""))
}

build_parallel_map.put("distcheck", prepare_build("distcheck", "tarball_build", "--distcheck"))
Expand Down
6 changes: 6 additions & 0 deletions ompi/mca/coll/accelerator/coll_accelerator.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/*
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
Expand Down Expand Up @@ -45,6 +46,11 @@ mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, size_t count,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
mca_coll_base_module_t *module);

int mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
Expand Down
4 changes: 4 additions & 0 deletions ompi/mca/coll/accelerator/coll_accelerator_module.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/*
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
Expand Down Expand Up @@ -94,6 +95,7 @@ mca_coll_accelerator_comm_query(struct ompi_communicator_t *comm,

accelerator_module->super.coll_allreduce = mca_coll_accelerator_allreduce;
accelerator_module->super.coll_reduce = mca_coll_accelerator_reduce;
accelerator_module->super.coll_reduce_local = mca_coll_accelerator_reduce_local;
accelerator_module->super.coll_reduce_scatter_block = mca_coll_accelerator_reduce_scatter_block;
if (!OMPI_COMM_IS_INTER(comm)) {
accelerator_module->super.coll_scan = mca_coll_accelerator_scan;
Expand Down Expand Up @@ -141,6 +143,7 @@ mca_coll_accelerator_module_enable(mca_coll_base_module_t *module,

ACCELERATOR_INSTALL_COLL_API(comm, s, allreduce);
ACCELERATOR_INSTALL_COLL_API(comm, s, reduce);
ACCELERATOR_INSTALL_COLL_API(comm, s, reduce_local);
ACCELERATOR_INSTALL_COLL_API(comm, s, reduce_scatter_block);
if (!OMPI_COMM_IS_INTER(comm)) {
/* MPI does not define scan/exscan on intercommunicators */
Expand All @@ -159,6 +162,7 @@ mca_coll_accelerator_module_disable(mca_coll_base_module_t *module,

ACCELERATOR_UNINSTALL_COLL_API(comm, s, allreduce);
ACCELERATOR_UNINSTALL_COLL_API(comm, s, reduce);
ACCELERATOR_UNINSTALL_COLL_API(comm, s, reduce_local);
ACCELERATOR_UNINSTALL_COLL_API(comm, s, reduce_scatter_block);
if (!OMPI_COMM_IS_INTER(comm))
{
Expand Down
58 changes: 58 additions & 0 deletions ompi/mca/coll/accelerator/coll_accelerator_reduce.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/*
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2004-2023 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
Expand Down Expand Up @@ -84,3 +85,60 @@ mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
}
return rc;
}

int
mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
mca_coll_base_module_t *module)
{
ptrdiff_t gap;
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
size_t bufsize;
int rc;

bufsize = opal_datatype_span(&dtype->super, count, &gap);

rc = mca_coll_accelerator_check_buf((void *)sbuf);
if (rc < 0) {
return rc;
}

if ((MPI_IN_PLACE != sbuf) && (rc > 0)) {
sbuf1 = (char*)malloc(bufsize);
if (NULL == sbuf1) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
sbuf = sbuf1 - gap;
}

rc = mca_coll_accelerator_check_buf(rbuf);
if (rc < 0) {
return rc;
}

if (rc > 0) {
rbuf1 = (char*)malloc(bufsize);
if (NULL == rbuf1) {
if (NULL != sbuf1) free(sbuf1);
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
rbuf2 = rbuf; /* save away original buffer */
rbuf = rbuf1 - gap;
}

ompi_op_reduce(op, (void *)sbuf, rbuf, count, dtype);
rc = OMPI_SUCCESS;

if (NULL != sbuf1) {
free(sbuf1);
}
if (NULL != rbuf1) {
rbuf = rbuf2;
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
free(rbuf1);
}
return rc;
}
5 changes: 3 additions & 2 deletions ompi/mca/coll/base/coll_base_allgather.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2024 Jeffrey M. Squyres. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -776,7 +777,7 @@ int ompi_coll_base_allgather_intra_k_bruck(const void *sbuf, size_t scount,
int recvcount, distance;
ptrdiff_t rlb, rextent;
ptrdiff_t rsize, rgap = 0;
ompi_request_t **reqs;
ompi_request_t **reqs = NULL;
int num_reqs, max_reqs = 0;

char *tmpsend = NULL;
Expand Down Expand Up @@ -937,7 +938,7 @@ int ompi_coll_base_allgather_direct_messaging(const void *sbuf, size_t scount,
int line = -1, rank, comm_size, err = MPI_SUCCESS;
ptrdiff_t rlb, rextent;
ptrdiff_t incr;
ompi_request_t **reqs;
ompi_request_t **reqs = NULL;
int max_reqs = 0, reqs_needed = 0;
int peer_rank = 0;

Expand Down
7 changes: 3 additions & 4 deletions ompi/mca/coll/han/coll_han_alltoall.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,8 @@ int mca_coll_han_alltoall_using_smsc(
/* all ranks will pull from the other ranks' sbuf */
gather_buf_in[0] = (void*)sbuf;
}
gather_buf_in[1] = *(void**)&send_needs_bounce;
gather_buf_in[2] = *(void**)&ii_push_data;
gather_buf_in[1] = (void*)(intptr_t)send_needs_bounce;
gather_buf_in[2] = (void*)(intptr_t)ii_push_data;

rc = low_comm->c_coll->coll_allgather(gather_buf_in, nptrs_gather, MPI_AINT,
gather_buf_out, nptrs_gather, MPI_AINT, low_comm,
Expand Down Expand Up @@ -385,5 +385,4 @@ int mca_coll_han_alltoall_using_smsc(
OPAL_OUTPUT_VERBOSE((40, mca_coll_han_component.han_output,
"Alltoall Complete with %d\n",rc));
return rc;

}
}
5 changes: 4 additions & 1 deletion ompi/mca/coll/xhc/coll_xhc_module.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2021-2023 Computer Architecture and VLSI Systems (CARV)
* Laboratory, ICS Forth. All rights reserved.
* Copyright (c) 2024 Jeffrey M. Squyres. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -523,7 +524,7 @@ static int xhc_module_create_hierarchy(xhc_module_t *module,
continue;
}

int member_id;
int member_id = -1;
int members = 0;

// If working with rank list, set the ranks from the list as "local"
Expand Down Expand Up @@ -560,6 +561,8 @@ static int xhc_module_create_hierarchy(xhc_module_t *module,
}
}

assert(member_id != -1);

/* If split or max ranks was specified, math partition the locality
* and remove the previously added locality mapping to some ranks */
if(my_def->split > 1) {
Expand Down
4 changes: 4 additions & 0 deletions ompi/mca/common/ompio/common_ompio.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@ OMPI_DECLSPEC int mca_common_ompio_file_write_at (ompio_file_t *fh, OMPI_MPI_OFF
OMPI_DECLSPEC int mca_common_ompio_file_iwrite (ompio_file_t *fh, const void *buf, size_t count,
struct ompi_datatype_t *datatype, ompi_request_t **request);

OMPI_DECLSPEC int mca_common_ompio_file_iwrite_pregen (ompio_file_t *fh, ompi_request_t *request);

OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
const void *buf, size_t count, struct ompi_datatype_t *datatype,
ompi_request_t **request);
Expand Down Expand Up @@ -297,6 +299,8 @@ OMPI_DECLSPEC int mca_common_ompio_file_read_at (ompio_file_t *fh, OMPI_MPI_OFFS
OMPI_DECLSPEC int mca_common_ompio_file_iread (ompio_file_t *fh, void *buf, size_t count,
struct ompi_datatype_t *datatype, ompi_request_t **request);

OMPI_DECLSPEC int mca_common_ompio_file_iread_pregen (ompio_file_t *fh, ompi_request_t *request);

OMPI_DECLSPEC int mca_common_ompio_file_iread_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
void *buf, size_t count, struct ompi_datatype_t *datatype,
ompi_request_t **request);
Expand Down
64 changes: 63 additions & 1 deletion ompi/mca/common/ompio/common_ompio_file_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -530,6 +530,68 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
return ret;
}

/*
** This routine is invoked from file_read_all.
** It is only used if the temporary buffer is a gpu buffer,
** and the fbtl supports the ipreadv operation.
**
** The io-array has already been generated in file_read_all,
** and we use the pre-computed offsets to created a pseudo fview.
** The position of the file pointer is updated in the file_read_all
** operation, not here.
*/

int mca_common_ompio_file_iread_pregen (ompio_file_t *fh,
ompi_request_t *request)
{
uint32_t i;
size_t max_data;
size_t pipeline_buf_size;
mca_ompio_request_t *ompio_req = (mca_ompio_request_t *) request;

max_data = fh->f_io_array[0].length;
pipeline_buf_size = OMPIO_MCA_GET(fh, pipeline_buffer_size);

mca_common_ompio_register_progress ();

OMPIO_PREPARE_READ_BUF (fh, fh->f_io_array[0].memory_address, max_data, MPI_BYTE,
ompio_req->req_tbuf, &ompio_req->req_convertor, max_data,
pipeline_buf_size, NULL, i);

ompio_req->req_num_subreqs = ceil((double)max_data/pipeline_buf_size);
ompio_req->req_size = pipeline_buf_size;
ompio_req->req_max_data = max_data;
ompio_req->req_post_next_subreq = mca_common_ompio_post_next_read_subreq;
ompio_req->req_fh = fh;
ompio_req->req_ompi.req_status.MPI_ERROR = MPI_SUCCESS;

ompio_req->req_fview = (struct ompio_fview_t *) calloc(1, sizeof(struct ompio_fview_t));
if (NULL == ompio_req->req_fview) {
opal_output(1, "common_ompio: error allocating memory\n");
return OMPI_ERR_OUT_OF_RESOURCE;
}

ompio_req->req_fview->f_decoded_iov = (struct iovec*) malloc (fh->f_num_of_io_entries *
sizeof(struct iovec));
if (NULL == ompio_req->req_fview->f_decoded_iov) {
opal_output(1, "common_ompio_file_iread_pregen: could not allocate memory\n");
return OMPI_ERR_OUT_OF_RESOURCE;
}

ompio_req->req_fview->f_iov_count = fh->f_num_of_io_entries;
for (i=0; i < ompio_req->req_fview->f_iov_count; i++) {
ompio_req->req_fview->f_decoded_iov[i].iov_base = fh->f_io_array[i].offset;
ompio_req->req_fview->f_decoded_iov[i].iov_len = fh->f_io_array[i].length ;
}

fh->f_num_of_io_entries = 0;
free (fh->f_io_array);
fh->f_io_array = NULL;

mca_common_ompio_post_next_read_subreq(ompio_req, 0);
return OMPI_SUCCESS;
}

int mca_common_ompio_file_iread_at (ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE offset,
void *buf,
Expand Down
Loading

0 comments on commit cdbc2b4

Please sign in to comment.