Skip to content

Commit

Permalink
Cleanup threading support (#2507)
Browse files Browse the repository at this point in the history
* Drop `_LIBCUDACXX_THREAD_ABI_VISIBILITY`

its always defined as `_LIBCUDACXX_HIDE_FROM_ABI`

* Drop `_LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS`

Its never defined outside of `__FreeBSD__`

* Drop `thread_if`

* Drop `__libcpp_thread_favorite_barrier_index`

* Drop `_LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE`

It is always defined

* Drop `_LIBCUDACXX_HAS_NO_PLATFORM_WAIT`

It is always defined and only used once

* Drop `_LIBCUDACXX_BUILDING_THREAD_LIBRARY_EXTERNAL`

* Move macro definition out of function declaration

* Move threading_support

* Split into the different threading mechanisms

* Disentangle `_LIBCUDACXX_HAS_THREAD_API_EXTERNAL` with other backends

* Fix missing qualifiers and attributes

* Silence a ICC warning about `__libcpp_thread_id_equal`

* Drop more unused funtions from pthread

* Move to `__thread` subfolder
  • Loading branch information
miscco authored Oct 19, 2024
1 parent 084cd53 commit 4a0addc
Show file tree
Hide file tree
Showing 12 changed files with 481 additions and 812 deletions.
2 changes: 1 addition & 1 deletion cudax/include/cuda/experimental/__async/stop_token.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
# pragma system_header
#endif // no system header

#include <cuda/std/__thread/threading_support.h>
#include <cuda/std/__type_traits/is_nothrow_constructible.h>
#include <cuda/std/atomic>
#include <cuda/std/detail/libcxx/include/__threading_support>

#include <cuda/experimental/__async/config.cuh>
#include <cuda/experimental/__async/thread.cuh>
Expand Down
4 changes: 2 additions & 2 deletions libcudacxx/include/cuda/std/__atomic/wait/notify_wait.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ _LIBCUDACXX_HIDE_FROM_ABI void __atomic_wait(
}
if (__i < 12)
{
__libcpp_thread_yield_processor();
_CUDA_VSTD::__libcpp_thread_yield_processor();
}
else
{
__libcpp_thread_yield();
_CUDA_VSTD::__libcpp_thread_yield();
}
}
while (__nonatomic_compare_equal(__atomic_load_dispatch(__a, __order, _Sco{}), __val))
Expand Down
4 changes: 2 additions & 2 deletions libcudacxx/include/cuda/std/__atomic/wait/polling.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <cuda/std/__atomic/order.h>
#include <cuda/std/__atomic/scopes.h>
#include <cuda/std/__atomic/types.h>
#include <cuda/std/detail/libcxx/include/__threading_support>
#include <cuda/std/__thread/threading_support.h>

_LIBCUDACXX_BEGIN_NAMESPACE_STD

Expand Down Expand Up @@ -53,7 +53,7 @@ template <typename _Tp, typename _Sco>
_CCCL_HOST_DEVICE void __atomic_try_wait_slow_fallback(
_Tp const volatile* __a, __atomic_underlying_remove_cv_t<_Tp> __val, memory_order __order, _Sco)
{
__libcpp_thread_poll_with_backoff(__atomic_poll_tester<_Tp, _Sco>(__a, __val, __order));
_CUDA_VSTD::__libcpp_thread_poll_with_backoff(__atomic_poll_tester<_Tp, _Sco>(__a, __val, __order));
}

_LIBCUDACXX_END_NAMESPACE_STD
Expand Down
5 changes: 3 additions & 2 deletions libcudacxx/include/cuda/std/__barrier/barrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,12 @@ class __barrier_base<__empty_completion, _Sco>
}
_LIBCUDACXX_HIDE_FROM_ABI void wait(arrival_token&& __phase) const
{
__libcpp_thread_poll_with_backoff(__barrier_poll_tester_phase<__barrier_base>(this, _CUDA_VSTD::move(__phase)));
_CUDA_VSTD::__libcpp_thread_poll_with_backoff(
__barrier_poll_tester_phase<__barrier_base>(this, _CUDA_VSTD::move(__phase)));
}
_LIBCUDACXX_HIDE_FROM_ABI void wait_parity(bool __parity) const
{
__libcpp_thread_poll_with_backoff(__barrier_poll_tester_parity<__barrier_base>(this, __parity));
_CUDA_VSTD::__libcpp_thread_poll_with_backoff(__barrier_poll_tester_parity<__barrier_base>(this, __parity));
}
_LIBCUDACXX_HIDE_FROM_ABI void arrive_and_wait()
{
Expand Down
4 changes: 2 additions & 2 deletions libcudacxx/include/cuda/std/__semaphore/atomic_semaphore.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class __atomic_semaphore

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time)
{
return __libcpp_thread_poll_with_backoff(
return _CUDA_VSTD::__libcpp_thread_poll_with_backoff(
[this]() {
ptrdiff_t const __old = __count.load(memory_order_acquire);
return __old != 0 && __fetch_sub_if_slow(__old);
Expand Down Expand Up @@ -157,7 +157,7 @@ class __atomic_semaphore<_Sco, 1>

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time)
{
return __libcpp_thread_poll_with_backoff(
return _CUDA_VSTD::__libcpp_thread_poll_with_backoff(
[this]() {
return try_acquire();
},
Expand Down
109 changes: 109 additions & 0 deletions libcudacxx/include/cuda/std/__thread/threading_support.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCUDACXX___THREAD_THREADING_SUPPORT_H
#define _LIBCUDACXX___THREAD_THREADING_SUPPORT_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#if !defined(_LIBCUDACXX_HAS_NO_THREADS)

# include <cuda/std/chrono>

# if defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL)
# include <cuda/std/__thread/threading_support_external.h>
# endif // _LIBCUDACXX_HAS_THREAD_API_EXTERNAL

# if defined(_LIBCUDACXX_HAS_THREAD_API_CUDA)
# include <cuda/std/__thread/threading_support_cuda.h>
# elif defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD)
# include <cuda/std/__thread/threading_support_pthread.h>
# elif defined(_LIBCUDACXX_HAS_THREAD_API_WIN32)
# include <cuda/std/__thread/threading_support_win32.h>
# else // ^^^ _LIBCUDACXX_HAS_THREAD_API_WIN32 ^^^ / vvv Unknown Thread API vvv
# error "Unknown Thread API"
# endif // Unknown Thread API

_CCCL_PUSH_MACROS

_LIBCUDACXX_BEGIN_NAMESPACE_STD

# define _LIBCUDACXX_POLLING_COUNT 16

# if defined(__aarch64__)
# define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("yield" :::);)
# elif defined(__x86_64__)
# define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("pause" :::);)
# else // ^^^ __x86_64__ ^^^ / vvv !__x86_64__ vvv
# define __LIBCUDACXX_ASM_THREAD_YIELD (;)
# endif // !__x86_64__

_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_yield_processor()
{
NV_IF_TARGET(NV_IS_HOST, __LIBCUDACXX_ASM_THREAD_YIELD)
}

template <class _Fn>
_LIBCUDACXX_HIDE_FROM_ABI bool __libcpp_thread_poll_with_backoff(
_Fn&& __f, _CUDA_VSTD::chrono::nanoseconds __max = _CUDA_VSTD::chrono::nanoseconds::zero())
{
_CUDA_VSTD::chrono::high_resolution_clock::time_point const __start =
_CUDA_VSTD::chrono::high_resolution_clock::now();
for (int __count = 0;;)
{
if (__f())
{
return true;
}
if (__count < _LIBCUDACXX_POLLING_COUNT)
{
if (__count > (_LIBCUDACXX_POLLING_COUNT >> 1))
{
_CUDA_VSTD::__libcpp_thread_yield_processor();
}
__count += 1;
continue;
}
_CUDA_VSTD::chrono::high_resolution_clock::duration const __elapsed =
_CUDA_VSTD::chrono::high_resolution_clock::now() - __start;
if (__max != _CUDA_VSTD::chrono::nanoseconds::zero() && __max < __elapsed)
{
return false;
}
_CUDA_VSTD::chrono::nanoseconds const __step = __elapsed / 4;
if (__step >= _CUDA_VSTD::chrono::milliseconds(1))
{
_CUDA_VSTD::__libcpp_thread_sleep_for(_CUDA_VSTD::chrono::milliseconds(1));
}
else if (__step >= _CUDA_VSTD::chrono::microseconds(10))
{
_CUDA_VSTD::__libcpp_thread_sleep_for(__step);
}
else
{
_CUDA_VSTD::__libcpp_thread_yield();
}
}
}

_LIBCUDACXX_END_NAMESPACE_STD

_CCCL_POP_MACROS

#endif // !_LIBCUDACXX_HAS_NO_THREADS

#endif // _LIBCUDACXX___THREAD_THREADING_SUPPORT_H
48 changes: 48 additions & 0 deletions libcudacxx/include/cuda/std/__thread/threading_support_cuda.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCUDACXX___THREAD_THREADING_SUPPORT_CUDA_H
#define _LIBCUDACXX___THREAD_THREADING_SUPPORT_CUDA_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && defined(_LIBCUDACXX_HAS_THREAD_API_CUDA)

# include <cuda/std/chrono>
# include <cuda/std/climits>

_CCCL_PUSH_MACROS

_LIBCUDACXX_BEGIN_NAMESPACE_STD

_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_yield() {}

_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_sleep_for(_CUDA_VSTD::chrono::nanoseconds __ns)
{
NV_IF_TARGET(NV_IS_DEVICE,
(auto const __step = __ns.count(); assert(__step < numeric_limits<unsigned>::max());
asm volatile("nanosleep.u32 %0;" ::"r"((unsigned) __step)
:);))
}

_LIBCUDACXX_END_NAMESPACE_STD

_CCCL_POP_MACROS

#endif // !_LIBCUDACXX_HAS_NO_THREADS && _LIBCUDACXX_HAS_THREAD_API_CUDA

#endif // _LIBCUDACXX___THREAD_THREADING_SUPPORT_CUDA_H
41 changes: 41 additions & 0 deletions libcudacxx/include/cuda/std/__thread/threading_support_external.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCUDACXX___THREAD_THREADING_SUPPORT_EXTERNAL_H
#define _LIBCUDACXX___THREAD_THREADING_SUPPORT_EXTERNAL_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL)

# include <cuda/std/chrono>

_CCCL_PUSH_MACROS

_LIBCUDACXX_BEGIN_NAMESPACE_STD

_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_yield();

_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_sleep_for(_CUDA_VSTD::chrono::nanoseconds __ns);

_LIBCUDACXX_END_NAMESPACE_STD

_CCCL_POP_MACROS

#endif // !_LIBCUDACXX_HAS_NO_THREADS && _LIBCUDACXX_HAS_THREAD_API_EXTERNAL

#endif // _LIBCUDACXX___THREAD_THREADING_SUPPORT_EXTERNAL_H
Loading

0 comments on commit 4a0addc

Please sign in to comment.