Skip to content

Commit

Permalink
Fix performance regression caused by enabling opal thread support
Browse files Browse the repository at this point in the history
This commit adds opal_using_threads() protection around the atomic
operation in OBJ_RETAIN/OBJ_RELEASE. This resolves the performance
issues seen when running psm with MPI_THREAD_SINGLE.

To avoid issues with header dependencies opal_using_threads() has been
moved to a new header (thread_usage.h). The OPAL_THREAD_ADD* and
OPAL_THREAD_CMPSET* macros have also been relocated to this header.

This commit is cherry-picked off a fix that was submitted for the v1.8
release series but never applied to master. This fixes part of the
problem reported by @nysal in open-mpi#1902.

(cherry picked from commit open-mpi/ompi-release@ce91307)

Signed-off-by: Nathan Hjelm <hjelmn@me.com>
  • Loading branch information
hjelmn authored and bosilca committed Oct 3, 2016
1 parent 15f6f9e commit e3e4b1c
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 155 deletions.
6 changes: 4 additions & 2 deletions opal/class/opal_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -121,7 +123,7 @@
#include <assert.h>
#include <stdlib.h>

#include "opal/sys/atomic.h"
#include "opal/threads/thread_usage.h"

BEGIN_C_DECLS

Expand Down Expand Up @@ -508,7 +510,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls)
static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__;
static inline int opal_obj_update(opal_object_t *object, int inc)
{
return opal_atomic_add_32(&(object->obj_reference_count), inc);
return OPAL_THREAD_ADD32(&object->obj_reference_count, inc);
}

END_C_DECLS
Expand Down
3 changes: 2 additions & 1 deletion opal/threads/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ headers += \
threads/mutex_unix.h \
threads/threads.h \
threads/tsd.h \
threads/wait_sync.h
threads/wait_sync.h \
threads/thread_usage.h

lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \
threads/condition.c \
Expand Down
153 changes: 1 addition & 152 deletions opal/threads/mutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@

#include "opal_config.h"

#include "opal/sys/atomic.h"
#include "opal/prefetch.h"
#include "opal/threads/thread_usage.h"

BEGIN_C_DECLS

Expand All @@ -41,11 +40,6 @@ BEGIN_C_DECLS
* Functions for locking of critical sections.
*/

/*
* declaring this here so that CL does not complain
*/
OPAL_DECLSPEC extern bool opal_uses_threads;

/**
* Opaque mutex object
*/
Expand Down Expand Up @@ -107,67 +101,6 @@ END_C_DECLS

BEGIN_C_DECLS

/**
* Check and see if the process is using multiple threads.
*
* @retval true If the process may have more than one thread.
* @retval false If the process only has a single thread.
*
* The value that this function returns is influenced by:
*
* - how MPI_INIT or MPI_INIT_THREAD was invoked,
* - what the final MPI thread level was determined to be,
* - whether the OMPI or MPI libraries are multi-threaded (Jan 2003:
* they're not),
* - whether configure determined if we have thread support or not
*
* MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup
* functions) invoke opal_set_using_threads() to influence the value of
* this function, depending on their situation. Some examples:
*
* - if configure determined that we do not have threads, then this
* value will always be false.
*
* - if MPI_INIT is invoked, and the ompi libraries are [still]
* single-threaded, this value will be false.
*
* - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have
* thread support, and the final thread level is determined to be
* MPI_THREAD_MULTIPLE, this value will be true.
*
* - if the process is a single-threaded OMPI executable (e.g., mpicc),
* this value will be false.
*
* Hence, this function will return false if there is guaranteed to
* only be one thread in the process. If there is even the
* possibility that we may have multiple threads, true will be
* returned.
*/
#define opal_using_threads() opal_uses_threads

/**
* Set whether the process is using multiple threads or not.
*
* @param have Boolean indicating whether the process is using
* multiple threads or not.
*
* @retval opal_using_threads The new return value from
* opal_using_threads().
*
* This function is used to influence the return value of
* opal_using_threads(). If configure detected that we have thread
* support, the return value of future invocations of
* opal_using_threads() will be the parameter's value. If configure
* detected that we have no thread support, then the retuen from
* opal_using_threads() will always be false.
*/
static inline bool opal_set_using_threads(bool have)
{
opal_uses_threads = have;
return opal_using_threads();
}


/**
* Lock a mutex if opal_using_threads() says that multiple threads may
* be active in the process.
Expand Down Expand Up @@ -254,90 +187,6 @@ static inline bool opal_set_using_threads(bool have)
} \
} while (0)

/**
* Use an atomic operation for increment/decrement if opal_using_threads()
* indicates that threads are in use by the application or library.
*/

static inline int32_t
OPAL_THREAD_ADD32(volatile int32_t *addr, int delta)
{
int32_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_32(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}

#if OPAL_HAVE_ATOMIC_MATH_64
static inline int64_t
OPAL_THREAD_ADD64(volatile int64_t *addr, int delta)
{
int64_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_64(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}
#endif

static inline size_t
OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
{
size_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_size_t(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}

/* BWB: FIX ME: remove if possible */
#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0)

#if OPAL_HAVE_ATOMIC_CMPSET_32
#define OPAL_ATOMIC_CMPSET_32(x, y, z) \
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_64(x, y, z) \
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET(x, y, z) \
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_PTR(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_ptr(x, y, z) : OPAL_CMPSET(x, y, z))
#endif


static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
{
if (opal_using_threads ()) {
return opal_atomic_swap_ptr (ptr, newvalue);
}

void *old = ((void **) ptr)[0];
((void **) ptr)[0] = newvalue;

return old;
}

#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)

END_C_DECLS

#endif /* OPAL_MUTEX_H */
174 changes: 174 additions & 0 deletions opal/threads/thread_usage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#if !defined(OPAL_THREAD_USAGE_H)
#define OPAL_THREAD_USAGE_H

#include "opal_config.h"

#include "opal/sys/atomic.h"
#include "opal/prefetch.h"

OPAL_DECLSPEC extern bool opal_uses_threads;

/**
* Check and see if the process is using multiple threads.
*
* @retval true If the process may have more than one thread.
* @retval false If the process only has a single thread.
*
* The value that this function returns is influenced by:
*
* - how MPI_INIT or MPI_INIT_THREAD was invoked,
* - what the final MPI thread level was determined to be,
* - whether the OMPI or MPI libraries are multi-threaded
*
* MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup
* functions) invoke opal_set_using_threads() to influence the value of
* this function, depending on their situation. Some examples:
*
* - if MPI_INIT is invoked, and the ompi components in use are
* single-threaded, this value will be false.
*
* - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have
* thread support, and the final thread level is determined to be
* MPI_THREAD_MULTIPLE, this value will be true.
*
* - if the process is a single-threaded OMPI executable (e.g., mpicc),
* this value will be false.
*
* Hence, this function will return false if there is guaranteed to
* only be one thread in the process. If there is even the
* possibility that we may have multiple threads, true will be
* returned.
*/
#define opal_using_threads() opal_uses_threads

/**
* Set whether the process is using multiple threads or not.
*
* @param have Boolean indicating whether the process is using
* multiple threads or not.
*
* @retval opal_using_threads The new return value from
* opal_using_threads().
*
* This function is used to influence the return value of
* opal_using_threads(). If configure detected that we have thread
* support, the return value of future invocations of
* opal_using_threads() will be the parameter's value. If configure
* detected that we have no thread support, then the retuen from
* opal_using_threads() will always be false.
*/
static inline bool opal_set_using_threads(bool have)
{
opal_uses_threads = have;
return opal_using_threads();
}


/**
* Use an atomic operation for increment/decrement if opal_using_threads()
* indicates that threads are in use by the application or library.
*/

static inline int32_t
OPAL_THREAD_ADD32(volatile int32_t *addr, int delta)
{
int32_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_32(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}

#if OPAL_HAVE_ATOMIC_MATH_64
static inline int64_t
OPAL_THREAD_ADD64(volatile int64_t *addr, int delta)
{
int64_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_64(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}
#endif

static inline size_t
OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
{
size_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_size_t(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}

/* BWB: FIX ME: remove if possible */
#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0)

#if OPAL_HAVE_ATOMIC_CMPSET_32
#define OPAL_ATOMIC_CMPSET_32(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_64(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_PTR(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_ptr(x, y, z) : OPAL_CMPSET(x, y, z))
#endif

static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
{
if (opal_using_threads ()) {
return opal_atomic_swap_ptr (ptr, newvalue);
}

void *old = ((void **) ptr)[0];
((void **) ptr)[0] = newvalue;

return old;
}

#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)

#endif /* !defined(OPAL_THREAD_USAGE_H) */

0 comments on commit e3e4b1c

Please sign in to comment.