Skip to content

Commit

Permalink
Merge pull request #6830 from rhc54/topic/dpm
Browse files Browse the repository at this point in the history
Provide locality for all procs on node
  • Loading branch information
rhc54 authored Jul 23, 2019
2 parents 20dd06c + d202e10 commit 8f32a59
Show file tree
Hide file tree
Showing 25 changed files with 482 additions and 381 deletions.
73 changes: 65 additions & 8 deletions ompi/dpm/dpm.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
Expand Down Expand Up @@ -406,9 +406,43 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
goto exit;
}
if (0 < opal_list_get_size(&ilist)) {
uint32_t *peer_ranks = NULL;
int prn, nprn;
char *val, *mycpuset;
uint16_t u16;
opal_process_name_t wildcard_rank;
/* convert the list of new procs to a proc_t array */
new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist),
sizeof(ompi_proc_t *));
/* get the list of local peers for the new procs */
cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist);
proc = cd->p;
wildcard_rank.jobid = proc->super.proc_name.jobid;
wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid;
/* retrieve the local peers */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_PEERS,
&wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == rc && NULL != val) {
char **peers = opal_argv_split(val, ',');
free(val);
nprn = opal_argv_count(peers);
peer_ranks = (uint32_t*)calloc(nprn, sizeof(uint32_t));
for (prn = 0; NULL != peers[prn]; prn++) {
peer_ranks[prn] = strtoul(peers[prn], NULL, 10);
}
opal_argv_free(peers);
}

/* get my locality string */
val = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCALITY_STRING,
OMPI_PROC_MY_NAME, &val, OPAL_STRING);
if (OPAL_SUCCESS == rc && NULL != val) {
mycpuset = val;
} else {
mycpuset = NULL;
}

i = 0;
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
opal_value_t *kv;
Expand All @@ -418,15 +452,38 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
* OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without
* them, we are just fine */
ompi_proc_complete_init_single(proc);
/* save the locality for later */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCALITY);
kv->type = OPAL_UINT16;
kv->data.uint16 = proc->super.proc_flags;
opal_pmix.store_local(&proc->super.proc_name, kv);
OBJ_RELEASE(kv); // maintain accounting
/* if this proc is local, then get its locality */
if (NULL != peer_ranks) {
for (prn=0; prn < nprn; prn++) {
if (peer_ranks[prn] == proc->super.proc_name.vpid) {
/* get their locality string */
val = NULL;
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, OPAL_PMIX_LOCALITY_STRING,
&proc->super.proc_name, &val, OPAL_STRING);
if (OPAL_SUCCESS == rc && NULL != val) {
u16 = opal_hwloc_compute_relative_locality(mycpuset, val);
free(val);
} else {
/* all we can say is that it shares our node */
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
}
proc->super.proc_flags = u16;
/* save the locality for later */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCALITY);
kv->type = OPAL_UINT16;
kv->data.uint16 = proc->super.proc_flags;
opal_pmix.store_local(&proc->super.proc_name, kv);
OBJ_RELEASE(kv); // maintain accounting
break;
}
}
}
++i;
}
if (NULL != mycpuset) {
free(mycpuset);
}
/* call add_procs on the new ones */
rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist)));
free(new_proc_list);
Expand Down
6 changes: 3 additions & 3 deletions opal/mca/pmix/pmix4x/pmix/VERSION
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ release=0
# The only requirement is that it must be entirely printable ASCII
# characters and have no white space.

greek=a1
greek=

# If repo_rev is empty, then the repository version number will be
# obtained during "make dist" via the "git describe --tags --always"
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".

repo_rev=git03a8b5da
repo_rev=git628a724c

# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
Expand All @@ -44,7 +44,7 @@ tarball_version=

# The date when this release was created

date="Jul 16, 2019"
date="Jul 21, 2019"

# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library
Expand Down
4 changes: 2 additions & 2 deletions opal/mca/pmix/pmix4x/pmix/contrib/pmix.spec
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2013 Mellanox Technologies, Inc.
# All rights reserved.
# Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
# Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
# Copyright (c) 2015 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# $COPYRIGHT$
Expand Down Expand Up @@ -192,7 +192,7 @@

Summary: An extended/exascale implementation of PMI
Name: %{?_name:%{_name}}%{!?_name:pmix}
Version: 4.0.0a1
Version: 4.0.0
Release: 1%{?dist}
License: BSD
Group: Development/Libraries
Expand Down
16 changes: 7 additions & 9 deletions opal/mca/pmix/pmix4x/pmix/src/client/pmi1.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Mellanox Technologies, Inc.
Expand Down Expand Up @@ -83,7 +83,7 @@ PMIX_EXPORT int PMI_Init(int *spawned)

/* getting internal key requires special rank value */
memcpy(&proc, &myproc, sizeof(myproc));
proc.rank = PMIX_RANK_UNDEF;
proc.rank = PMIX_RANK_WILDCARD;

/* set controlling parameters
* PMIX_OPTIONAL - expect that these keys should be available on startup
Expand Down Expand Up @@ -392,8 +392,6 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum)
pmix_value_t *val;
pmix_info_t info[1];
bool val_optinal = 1;
pmix_proc_t proc = myproc;
proc.rank = PMIX_RANK_WILDCARD;

PMI_CHECK();

Expand All @@ -412,11 +410,11 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum)
PMIX_INFO_CONSTRUCT(&info[0]);
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);

rc = PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val);
rc = PMIx_Get(&myproc, PMIX_APPNUM, info, 1, &val);
if (PMIX_SUCCESS == rc) {
rc = convert_int(appnum, val);
PMIX_VALUE_RELEASE(val);
} else if( PMIX_ERR_NOT_FOUND == rc ){
} else {
/* this is optional value, set to 0 */
*appnum = 0;
rc = PMIX_SUCCESS;
Expand All @@ -443,7 +441,7 @@ PMIX_EXPORT int PMI_Publish_name(const char service_name[], const char port[])
}

/* pass the service/port */
pmix_strncpy(info.key, service_name, PMIX_MAX_KEYLEN);
pmix_strncpy(info.key, service_name, PMIX_MAX_KEYLEN);
info.value.type = PMIX_STRING;
info.value.data.string = (char*) port;

Expand Down Expand Up @@ -495,7 +493,7 @@ PMIX_EXPORT int PMI_Lookup_name(const char service_name[], char port[])
PMIX_PDATA_CONSTRUCT(&pdata);

/* pass the service */
pmix_strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN);
pmix_strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN);

/* PMI-1 doesn't want the nspace back */
if (PMIX_SUCCESS != (rc = PMIx_Lookup(&pdata, 1, NULL, 0))) {
Expand All @@ -512,7 +510,7 @@ PMIX_EXPORT int PMI_Lookup_name(const char service_name[], char port[])
* potential we could overrun it. As this feature
* isn't widely supported in PMI-1, try being
* conservative */
pmix_strncpy(port, pdata.value.data.string, PMIX_MAX_KEYLEN);
pmix_strncpy(port, pdata.value.data.string, PMIX_MAX_KEYLEN);
PMIX_PDATA_DESTRUCT(&pdata);

return PMIX_SUCCESS;
Expand Down
14 changes: 7 additions & 7 deletions opal/mca/pmix/pmix4x/pmix/src/client/pmix_client_fence.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
Expand Down Expand Up @@ -72,7 +72,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs,

PMIX_ACQUIRE_THREAD(&pmix_global_lock);

pmix_output_verbose(2, pmix_globals.debug_output,
pmix_output_verbose(2, pmix_client_globals.fence_output,
"pmix: executing fence");

if (pmix_globals.init_cntr <= 0) {
Expand Down Expand Up @@ -105,7 +105,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs,
rc = cb->status;
PMIX_RELEASE(cb);

pmix_output_verbose(2, pmix_globals.debug_output,
pmix_output_verbose(2, pmix_client_globals.fence_output,
"pmix: fence released");

return rc;
Expand All @@ -124,7 +124,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs

PMIX_ACQUIRE_THREAD(&pmix_global_lock);

pmix_output_verbose(2, pmix_globals.debug_output,
pmix_output_verbose(2, pmix_client_globals.fence_output,
"pmix: fence_nb called");

if (pmix_globals.init_cntr <= 0) {
Expand Down Expand Up @@ -184,7 +184,7 @@ static pmix_status_t unpack_return(pmix_buffer_t *data)
pmix_status_t ret;
int32_t cnt;

pmix_output_verbose(2, pmix_globals.debug_output,
pmix_output_verbose(2, pmix_client_globals.fence_output,
"client:unpack fence called");

/* unpack the status code */
Expand All @@ -195,7 +195,7 @@ static pmix_status_t unpack_return(pmix_buffer_t *data)
PMIX_ERROR_LOG(rc);
return rc;
}
pmix_output_verbose(2, pmix_globals.debug_output,
pmix_output_verbose(2, pmix_client_globals.fence_output,
"client:unpack fence received status %d", ret);
return ret;
}
Expand Down Expand Up @@ -254,7 +254,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr,
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
pmix_status_t rc;

pmix_output_verbose(2, pmix_globals.debug_output,
pmix_output_verbose(2, pmix_client_globals.fence_output,
"pmix: fence_nb callback recvd");

if (NULL == cb) {
Expand Down
Loading

0 comments on commit 8f32a59

Please sign in to comment.