Skip to content

Commit

Permalink
Merge pull request open-mpi#11 from anandhis/topic/rmlofi
Browse files Browse the repository at this point in the history
Addressed pull-request comments from Jsquyres
  • Loading branch information
rhc54 authored Jul 14, 2016
2 parents 591482a + f16fdfb commit 7515dc9
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 24 deletions.
2 changes: 1 addition & 1 deletion orte/mca/rml/ofi/rml_ofi.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


/** the maximum open conduit - assuming system will have no more than 20 transports*/
#define MAX_CONDUIT 20
#define MAX_CONDUIT 40

/** The OPAL key values **/
/* (char*) ofi socket address (type IN) of the node process is running on */
Expand Down
11 changes: 8 additions & 3 deletions orte/mca/rml/ofi/rml_ofi_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ rml_ofi_component_init(int* priority)
* (fi_info_list) and store it in the ofi_conduits array **/
orte_rml_ofi.conduit_open_num = 0;
for( fabric_info = orte_rml_ofi.fi_info_list ;
NULL != fabric_info; fabric_info = fabric_info->next)
NULL != fabric_info && orte_rml_ofi.conduit_open_num < MAX_CONDUIT ; fabric_info = fabric_info->next)
{
opal_output_verbose(100,orte_rml_base_framework.framework_output,
"%s:%d beginning to add endpoint for conduit_id=%d ",__FILE__,__LINE__,orte_rml_ofi.conduit_open_num);
Expand Down Expand Up @@ -839,8 +839,8 @@ rml_ofi_component_init(int* priority)
/*[debug] - print the sockaddr - port and s_addr */
struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)orte_rml_ofi.ofi_conduits[cur_conduit].ep_name;
opal_output_verbose(1,orte_rml_base_framework.framework_output,
"%s port = 0x%x, InternetAddr = 0x%x ",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ep_sockaddr->sin_port,ep_sockaddr->sin_addr.s_addr);
"%s port = 0x%x, InternetAddr = %s ",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr));
/*[end debug]*/
OPAL_MODEX_SEND_STRING( ret, OPAL_PMIX_GLOBAL,
OPAL_RML_OFI_FI_SOCKADDR_IN,
Expand Down Expand Up @@ -964,6 +964,11 @@ rml_ofi_component_init(int* priority)
"%s:%d Conduit id - %d created ",__FILE__,__LINE__,orte_rml_ofi.conduit_open_num);
orte_rml_ofi.conduit_open_num++;
}
if (fabric_info != NULL && orte_rml_ofi.conduit_open_num >= MAX_CONDUIT ) {
opal_output_verbose(1,orte_rml_base_framework.framework_output,
"%s:%d fi_getinfo list not fully parsed as MAX_CONDUIT - %d reached ",__FILE__,__LINE__,orte_rml_ofi.conduit_open_num);
}


}
/**
Expand Down
50 changes: 30 additions & 20 deletions orte/mca/rml/ofi/rml_ofi_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,19 +327,20 @@ int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t conduit_id)
static void send_msg(int fd, short args, void *cbdata)
{
orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata;
orte_process_name_t *peer = &(req->send.dst);
orte_rml_tag_t tag = req->send.tag;
char *dest_ep_name;
size_t dest_ep_namelen = 0;
int ret = OPAL_ERROR;
orte_process_name_t *peer = &(req->send.dst);
orte_rml_tag_t tag = req->send.tag;
char *dest_ep_name;
size_t dest_ep_namelen = 0;
int ret = OPAL_ERROR;
uint32_t total_packets;
fi_addr_t dest_fi_addr;
orte_rml_send_t *snd;
orte_rml_ofi_request_t* ofi_send_req = OBJ_NEW( orte_rml_ofi_request_t );
uint8_t conduit_id = req->conduit_id;
fi_addr_t dest_fi_addr;
orte_rml_send_t *snd;
orte_rml_ofi_request_t* ofi_send_req = OBJ_NEW( orte_rml_ofi_request_t );
uint8_t conduit_id = req->conduit_id;
orte_rml_ofi_send_pkt_t* ofi_msg_pkt;
size_t datalen_per_pkt, hdrsize, data_in_pkt; // the length of data in per packet excluding the header size


snd = OBJ_NEW(orte_rml_send_t);
snd->dst = *peer;
snd->origin = *ORTE_PROC_MY_NAME;
Expand Down Expand Up @@ -367,20 +368,29 @@ static void send_msg(int fd, short args, void *cbdata)
{
case FI_SOCKADDR_IN :
OPAL_MODEX_RECV_STRING(ret, OPAL_RML_OFI_FI_SOCKADDR_IN, peer , (char **) &dest_ep_name, &dest_ep_namelen);
/*print the sockaddr - port and s_addr */
struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*) dest_ep_name;
opal_output_verbose(10,orte_rml_base_framework.framework_output,
"%s obtained for peer %s port = 0x%printinx, InternetAddr = %s ",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer),ntohs(ep_sockaddr->sin_port),
inet_ntoa(ep_sockaddr->sin_addr));
break;
case FI_ADDR_PSMX :
OPAL_MODEX_RECV_STRING(ret, OPAL_RML_OFI_FI_ADDR_PSMX, peer , (char **) &dest_ep_name, &dest_ep_namelen);
break;
default:
/* we shouldn't be getting here as only above are supported and address sent
* to PMIX (OPAL_MODEX_SEND) in orte_component_init() */
opal_output_verbose(1, orte_rml_base_framework.framework_output,
"%s Error: Unhandled address format type in ofi_send_msg", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
ORTE_RML_SEND_COMPLETE(snd);
return;
}
opal_output_verbose(10, orte_rml_base_framework.framework_output,
opal_output_verbose(50, orte_rml_base_framework.framework_output,
"%s Return value from OPAL_MODEX_RECV_STRING - %d, length returned - %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret, dest_ep_namelen);
/*print the sockaddr - port and s_addr */
struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*) dest_ep_name;
opal_output_verbose(1,orte_rml_base_framework.framework_output,
"%s obtained for peer %s port = 0x%x, InternetAddr = 0x%x ",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer),ep_sockaddr->sin_port,
ep_sockaddr->sin_addr.s_addr);


if ( OPAL_SUCCESS == ret) {
opal_output_verbose(10, orte_rml_base_framework.framework_output,
Expand All @@ -395,8 +405,8 @@ static void send_msg(int fd, short args, void *cbdata)
/* call the send-callback fn with error and return, also return failure status */
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
ORTE_RML_SEND_COMPLETE(snd);
snd = NULL;
//OBJ_RELEASE( ofi_send_req);
return;
}

} else {
Expand All @@ -408,8 +418,8 @@ static void send_msg(int fd, short args, void *cbdata)
/* call the send-callback fn with error and return, also return failure status */
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
ORTE_RML_SEND_COMPLETE(snd);
snd = NULL;
//OBJ_RELEASE( ofi_send_req);
return;
}

ofi_send_req->send = snd;
Expand Down Expand Up @@ -562,7 +572,7 @@ int orte_rml_ofi_send_transport_nb(int conduit_id,
ORTE_NAME_PRINT(peer), tag);


if( (0 > conduit_id) || ( conduit_id > orte_rml_ofi.conduit_open_num ) ) {
if( (0 > conduit_id) || ( conduit_id >= orte_rml_ofi.conduit_open_num ) ) {
/* Invalid conduit ID provided */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
Expand Down Expand Up @@ -614,7 +624,7 @@ int orte_rml_ofi_send_buffer_transport_nb(int conduit_id,
ORTE_NAME_PRINT(peer), tag);


if( (0 > conduit_id) || ( conduit_id > orte_rml_ofi.conduit_open_num ) ) {
if( (0 > conduit_id) || ( conduit_id >= orte_rml_ofi.conduit_open_num ) ) {
/* Invalid conduit ID provided */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
Expand Down

0 comments on commit 7515dc9

Please sign in to comment.