Skip to content

Commit

Permalink
Merge pull request #8199 from rhc54/topic/locality
Browse files Browse the repository at this point in the history
Fix confusion between cpuset and locality
  • Loading branch information
rhc54 authored Nov 11, 2020
2 parents 57ccb83 + 2f7f1fe commit d489030
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 23 deletions.
19 changes: 3 additions & 16 deletions ompi/dpm/dpm.c
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
if (0 < opal_list_get_size(&ilist)) {
uint32_t *peer_ranks = NULL;
int prn, nprn = 0;
char *val, *mycpuset;
char *val;
uint16_t u16;
opal_process_name_t wildcard_rank;
/* convert the list of new procs to a proc_t array */
Expand All @@ -380,16 +380,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
opal_argv_free(peers);
}

/* get my locality string */
val = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
OMPI_PROC_MY_NAME, &val, PMIX_STRING);
if (OPAL_SUCCESS == rc && NULL != val) {
mycpuset = val;
} else {
mycpuset = NULL;
}

i = 0;
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
proc = cd->p;
Expand All @@ -406,8 +396,8 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
val = NULL;
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING,
&proc->super.proc_name, &val, OPAL_STRING);
if (OPAL_SUCCESS == rc && NULL != val) {
u16 = opal_hwloc_compute_relative_locality(mycpuset, val);
if (OPAL_SUCCESS == rc && NULL != ompi_process_info.locality) {
u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, val);
free(val);
} else {
/* all we can say is that it shares our node */
Expand All @@ -425,9 +415,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
}
++i;
}
if (NULL != mycpuset) {
free(mycpuset);
}
if (NULL != peer_ranks) {
free(peer_ranks);
}
Expand Down
16 changes: 11 additions & 5 deletions ompi/runtime/ompi_rte.c
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ int ompi_rte_init(int *pargc, char ***pargv)

/* identify our location */
val = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_CPUSET,
&opal_process_info.my_name, &val, PMIX_STRING);
if (PMIX_SUCCESS == rc && NULL != val) {
opal_process_info.cpuset = val;
Expand All @@ -774,6 +774,15 @@ int ompi_rte_init(int *pargc, char ***pargv)
opal_process_info.cpuset = NULL;
opal_process_info.proc_is_bound = false;
}
val = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
&opal_process_info.my_name, &val, PMIX_STRING);
if (PMIX_SUCCESS == rc && NULL != val) {
opal_process_info.locality = val;
val = NULL; // protect the string
} else {
opal_process_info.locality = NULL;
}

/* retrieve the local peers - defaults to local node */
val = NULL;
Expand Down Expand Up @@ -811,7 +820,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
&pname, &val, PMIX_STRING);
if (PMIX_SUCCESS == rc && NULL != val) {
u16 = opal_hwloc_compute_relative_locality(opal_process_info.cpuset, val);
u16 = opal_hwloc_compute_relative_locality(opal_process_info.locality, val);
free(val);
} else {
/* all we can say is that it shares our node */
Expand All @@ -826,9 +835,6 @@ int ompi_rte_init(int *pargc, char ***pargv)
ret = opal_pmix_convert_status(rc);
error = "local store of locality";
opal_argv_free(peers);
if (NULL != opal_process_info.cpuset) {
free(opal_process_info.cpuset);
}
goto error;
}
}
Expand Down
4 changes: 2 additions & 2 deletions opal/mca/common/ofi/common_ofi.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2020 Triad National Security, LLC. All rights
Expand Down Expand Up @@ -345,7 +345,7 @@ static uint32_t get_package_rank(opal_process_info_t *process_info)
}

// compute relative locality
relative_locality = opal_hwloc_compute_relative_locality(process_info->cpuset, locality_string);
relative_locality = opal_hwloc_compute_relative_locality(process_info->locality, locality_string);
free(locality_string);

if (relative_locality & OPAL_PROC_ON_SOCKET) {
Expand Down
1 change: 1 addition & 0 deletions opal/util/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ opal_process_info_t opal_process_info = {
.my_local_rank = 0, /* I'm the only process around here */
.my_node_rank = 0,
.cpuset = NULL,
.locality = NULL,
.pid = 0,
.num_procs = 0,
.app_num = 0,
Expand Down
1 change: 1 addition & 0 deletions opal/util/proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ typedef struct opal_process_info_t {
uint16_t my_local_rank; /**< local rank on this node within my job */
uint16_t my_node_rank;
char *cpuset; /**< String-representation of bitmap where we are bound */
char *locality; /**< String-representation of process locality */
pid_t pid;
uint32_t num_procs;
uint32_t app_num;
Expand Down

0 comments on commit d489030

Please sign in to comment.