Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix performance regression caused by enabling opal thread support #1909

Merged
merged 1 commit into from
Jul 28, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions opal/class/opal_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -121,7 +123,7 @@
#include <assert.h>
#include <stdlib.h>

#include "opal/sys/atomic.h"
#include "opal/threads/thread_usage.h"

BEGIN_C_DECLS

Expand Down Expand Up @@ -508,7 +510,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls)
static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__;
static inline int opal_obj_update(opal_object_t *object, int inc)
{
return opal_atomic_add_32(&(object->obj_reference_count), inc);
return OPAL_THREAD_ADD32(&object->obj_reference_count, inc);
}

END_C_DECLS
Expand Down
3 changes: 2 additions & 1 deletion opal/threads/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ headers += \
threads/mutex_unix.h \
threads/threads.h \
threads/tsd.h \
threads/wait_sync.h
threads/wait_sync.h \
threads/thread_usage.h

lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \
threads/condition.c \
Expand Down
153 changes: 1 addition & 152 deletions opal/threads/mutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@

#include "opal_config.h"

#include "opal/sys/atomic.h"
#include "opal/prefetch.h"
#include "opal/threads/thread_usage.h"

BEGIN_C_DECLS

Expand All @@ -41,11 +40,6 @@ BEGIN_C_DECLS
* Functions for locking of critical sections.
*/

/*
* declaring this here so that CL does not complain
*/
OPAL_DECLSPEC extern bool opal_uses_threads;

/**
* Opaque mutex object
*/
Expand Down Expand Up @@ -107,67 +101,6 @@ END_C_DECLS

BEGIN_C_DECLS

/**
* Check and see if the process is using multiple threads.
*
* @retval true If the process may have more than one thread.
* @retval false If the process only has a single thread.
*
* The value that this function returns is influenced by:
*
* - how MPI_INIT or MPI_INIT_THREAD was invoked,
* - what the final MPI thread level was determined to be,
* - whether the OMPI or MPI libraries are multi-threaded (Jan 2003:
* they're not),
* - whether configure determined if we have thread support or not
*
* MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup
* functions) invoke opal_set_using_threads() to influence the value of
* this function, depending on their situation. Some examples:
*
* - if configure determined that we do not have threads, then this
* value will always be false.
*
* - if MPI_INIT is invoked, and the ompi libraries are [still]
* single-threaded, this value will be false.
*
* - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have
* thread support, and the final thread level is determined to be
* MPI_THREAD_MULTIPLE, this value will be true.
*
* - if the process is a single-threaded OMPI executable (e.g., mpicc),
* this value will be false.
*
* Hence, this function will return false if there is guaranteed to
* only be one thread in the process. If there is even the
* possibility that we may have multiple threads, true will be
* returned.
*/
#define opal_using_threads() opal_uses_threads

/**
* Set whether the process is using multiple threads or not.
*
* @param have Boolean indicating whether the process is using
* multiple threads or not.
*
* @retval opal_using_threads The new return value from
* opal_using_threads().
*
* This function is used to influence the return value of
* opal_using_threads(). If configure detected that we have thread
* support, the return value of future invocations of
* opal_using_threads() will be the parameter's value. If configure
* detected that we have no thread support, then the retuen from
* opal_using_threads() will always be false.
*/
static inline bool opal_set_using_threads(bool have)
{
opal_uses_threads = have;
return opal_using_threads();
}


/**
* Lock a mutex if opal_using_threads() says that multiple threads may
* be active in the process.
Expand Down Expand Up @@ -254,90 +187,6 @@ static inline bool opal_set_using_threads(bool have)
} \
} while (0)

/**
* Use an atomic operation for increment/decrement if opal_using_threads()
* indicates that threads are in use by the application or library.
*/

static inline int32_t
OPAL_THREAD_ADD32(volatile int32_t *addr, int delta)
{
int32_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_32(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}

#if OPAL_HAVE_ATOMIC_MATH_64
static inline int64_t
OPAL_THREAD_ADD64(volatile int64_t *addr, int delta)
{
int64_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_64(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}
#endif

static inline size_t
OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
{
size_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_size_t(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}

/* BWB: FIX ME: remove if possible */
#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0)

#if OPAL_HAVE_ATOMIC_CMPSET_32
#define OPAL_ATOMIC_CMPSET_32(x, y, z) \
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_64(x, y, z) \
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET(x, y, z) \
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_PTR(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_ptr(x, y, z) : OPAL_CMPSET(x, y, z))
#endif


static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
{
if (opal_using_threads ()) {
return opal_atomic_swap_ptr (ptr, newvalue);
}

void *old = ((void **) ptr)[0];
((void **) ptr)[0] = newvalue;

return old;
}

#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)

END_C_DECLS

#endif /* OPAL_MUTEX_H */
174 changes: 174 additions & 0 deletions opal/threads/thread_usage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#if !defined(OPAL_THREAD_USAGE_H)
#define OPAL_THREAD_USAGE_H

#include "opal_config.h"

#include "opal/sys/atomic.h"
#include "opal/prefetch.h"

OPAL_DECLSPEC extern bool opal_uses_threads;

/**
* Check and see if the process is using multiple threads.
*
* @retval true If the process may have more than one thread.
* @retval false If the process only has a single thread.
*
* The value that this function returns is influenced by:
*
* - how MPI_INIT or MPI_INIT_THREAD was invoked,
* - what the final MPI thread level was determined to be,
* - whether the OMPI or MPI libraries are multi-threaded
*
* MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup
* functions) invoke opal_set_using_threads() to influence the value of
* this function, depending on their situation. Some examples:
*
* - if MPI_INIT is invoked, and the ompi components in use are
* single-threaded, this value will be false.
*
* - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have
* thread support, and the final thread level is determined to be
* MPI_THREAD_MULTIPLE, this value will be true.
*
* - if the process is a single-threaded OMPI executable (e.g., mpicc),
* this value will be false.
*
* Hence, this function will return false if there is guaranteed to
* only be one thread in the process. If there is even the
* possibility that we may have multiple threads, true will be
* returned.
*/
#define opal_using_threads() opal_uses_threads

/**
* Set whether the process is using multiple threads or not.
*
* @param have Boolean indicating whether the process is using
* multiple threads or not.
*
* @retval opal_using_threads The new return value from
* opal_using_threads().
*
* This function is used to influence the return value of
* opal_using_threads(). If configure detected that we have thread
* support, the return value of future invocations of
* opal_using_threads() will be the parameter's value. If configure
* detected that we have no thread support, then the retuen from
* opal_using_threads() will always be false.
*/
static inline bool opal_set_using_threads(bool have)
{
opal_uses_threads = have;
return opal_using_threads();
}


/**
* Use an atomic operation for increment/decrement if opal_using_threads()
* indicates that threads are in use by the application or library.
*/

static inline int32_t
OPAL_THREAD_ADD32(volatile int32_t *addr, int delta)
{
int32_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_32(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}

#if OPAL_HAVE_ATOMIC_MATH_64
static inline int64_t
OPAL_THREAD_ADD64(volatile int64_t *addr, int delta)
{
int64_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_64(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}
#endif

static inline size_t
OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
{
size_t ret;

if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_size_t(addr, delta);
} else {
ret = (*addr += delta);
}

return ret;
}

/* BWB: FIX ME: remove if possible */
#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0)

#if OPAL_HAVE_ATOMIC_CMPSET_32
#define OPAL_ATOMIC_CMPSET_32(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_64(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_PTR(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_ptr(x, y, z) : OPAL_CMPSET(x, y, z))
#endif

static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
{
if (opal_using_threads ()) {
return opal_atomic_swap_ptr (ptr, newvalue);
}

void *old = ((void **) ptr)[0];
((void **) ptr)[0] = newvalue;

return old;
}

#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)

#endif /* !defined(OPAL_THREAD_USAGE_H) */