Skip to content

Commit

Permalink
Fix the leak of fragments for persistent sends.
Browse files Browse the repository at this point in the history
The rdma_frag attached to the send request was not correctly released
upon request completion, leaking until MPI_Finalize. A quick solution
would have been to add RDMA_FRAG_RETURN at different locations on the
send request completion, but it would have unnecessarily made the
sendreq completion path more complex. Instead, I added the length to
the RDMA fragment so that it can be completed during the remote ack.

Be more explicit on the comment.

The rdma_frag can only be freed once when the peer forced a protocol
change (from RDMA GET to send/recv). Otherwise the fragment will be
returned once all data pertaining to it has been trasnferred.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
  • Loading branch information
bosilca committed Apr 27, 2019
1 parent 399b713 commit 172c7f2
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 20 deletions.
6 changes: 3 additions & 3 deletions ompi/mca/pml/ob1/pml_ob1_rdmafrag.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -46,7 +46,8 @@ struct mca_pml_ob1_rdma_frag_t {
mca_bml_base_btl_t *rdma_bml;
mca_pml_ob1_hdr_t rdma_hdr;
mca_pml_ob1_rdma_state_t rdma_state;
size_t rdma_length;
size_t rdma_length; /* how much the fragment will transfer */
opal_atomic_size_t rdma_bytes_remaining; /* how much has been currently transferred */
void *rdma_req;
uint32_t retries;
mca_pml_ob1_rdma_frag_callback_t cbfunc;
Expand All @@ -71,7 +72,6 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_rdma_frag_t);

#define MCA_PML_OB1_RDMA_FRAG_RETURN(frag) \
do { \
/* return fragment */ \
if (frag->local_handle) { \
mca_bml_base_deregister_mem (frag->rdma_bml, frag->local_handle); \
frag->local_handle = NULL; \
Expand Down
4 changes: 0 additions & 4 deletions ompi/mca/pml/ob1/pml_ob1_recvfrag.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,10 +577,6 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
* then throttle sends */
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA) {
if (NULL != sendreq->rdma_frag) {
if (NULL != sendreq->rdma_frag->local_handle) {
mca_bml_base_deregister_mem (sendreq->req_rdma[0].bml_btl, sendreq->rdma_frag->local_handle);
sendreq->rdma_frag->local_handle = NULL;
}
MCA_PML_OB1_RDMA_FRAG_RETURN(sendreq->rdma_frag);
sendreq->rdma_frag = NULL;
}
Expand Down
9 changes: 7 additions & 2 deletions ompi/mca/pml/ob1/pml_ob1_recvreq.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2018 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -319,7 +319,12 @@ static int mca_pml_ob1_recv_request_ack(
return OMPI_SUCCESS;
}

/* let know to shedule function there is no need to put ACK flag */
/* let know to shedule function there is no need to put ACK flag. If not all message went over
* RDMA then we cancel the GET protocol in order to switch back to send/recv. In this case send
* back the remote send request, the peer kept a poointer to the frag locally. In the future we
* might want to cancel the fragment itself, in which case we will have to send back the remote
* fragment instead of the remote request.
*/
recvreq->req_ack_sent = true;
return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.lval,
recvreq, recvreq->req_send_offset, 0,
Expand Down
19 changes: 12 additions & 7 deletions ompi/mca/pml/ob1/pml_ob1_sendreq.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2018 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -41,7 +41,6 @@
#include "ompi/mca/bml/base/base.h"
#include "ompi/memchecker.h"


OBJ_CLASS_INSTANCE(mca_pml_ob1_send_range_t, opal_free_list_item_t,
NULL, NULL);

Expand Down Expand Up @@ -148,10 +147,7 @@ static void mca_pml_ob1_send_request_destruct(mca_pml_ob1_send_request_t* req)
{
OBJ_DESTRUCT(&req->req_send_ranges);
OBJ_DESTRUCT(&req->req_send_range_lock);
if (req->rdma_frag) {
MCA_PML_OB1_RDMA_FRAG_RETURN(req->rdma_frag);
req->rdma_frag = NULL;
}
assert( NULL == req->rdma_frag );
}

OBJ_CLASS_INSTANCE( mca_pml_ob1_send_request_t,
Expand Down Expand Up @@ -262,12 +258,20 @@ mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length)
{
mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
size_t frag_remaining;

/* count bytes of user data actually delivered and check for request completion */
if (OPAL_LIKELY(0 < rdma_length)) {
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length);
frag_remaining = OPAL_THREAD_SUB_FETCH_SIZE_T(&frag->rdma_bytes_remaining, (size_t)rdma_length);
SPC_USER_OR_MPI(sendreq->req_send.req_base.req_ompi.req_status.MPI_TAG, (ompi_spc_value_t)rdma_length,
OMPI_SPC_BYTES_SENT_USER, OMPI_SPC_BYTES_SENT_MPI);

if( 0 == frag_remaining ) { /* this frag is now completed. Update the request and be done */
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
if( sendreq->rdma_frag == frag )
sendreq->rdma_frag = NULL;
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
}
}

send_request_pml_complete_check(sendreq);
Expand Down Expand Up @@ -701,6 +705,7 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq,
frag->rdma_req = sendreq;
frag->rdma_bml = bml_btl;
frag->rdma_length = size;
frag->rdma_bytes_remaining = size;
frag->cbfunc = mca_pml_ob1_rget_completion;
/* do not store the local handle in the fragment. it will be released by mca_pml_ob1_free_rdma_resources */

Expand Down
5 changes: 1 addition & 4 deletions ompi/mca/pml/ob1/pml_ob1_sendreq.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,7 @@ static inline void mca_pml_ob1_send_request_fini (mca_pml_ob1_send_request_t *se
{
/* Let the base handle the reference counts */
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send));
if (sendreq->rdma_frag) {
MCA_PML_OB1_RDMA_FRAG_RETURN (sendreq->rdma_frag);
sendreq->rdma_frag = NULL;
}
assert( NULL == sendreq->rdma_frag );
}

/*
Expand Down

0 comments on commit 172c7f2

Please sign in to comment.