forked from rapidsai/cucim
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Apply pybind11 patch to avoid deadlock
This applies the following patches to pybind11: - pybind/pybind11#4857 - pybind/pybind11#4877 to avoid deadlock when using pybind11 without importing numpy in multi-threaded environment.
- Loading branch information
Showing
4 changed files
with
496 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,247 @@ | ||
diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
index 87ec103..f2f1d19 100644 | ||
--- a/CMakeLists.txt | ||
+++ b/CMakeLists.txt | ||
@@ -132,6 +132,7 @@ set(PYBIND11_HEADERS | ||
include/pybind11/embed.h | ||
include/pybind11/eval.h | ||
include/pybind11/gil.h | ||
+ include/pybind11/gil_safe_call_once.h | ||
include/pybind11/iostream.h | ||
include/pybind11/functional.h | ||
include/pybind11/numpy.h | ||
diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h | ||
index 31a54c7..8906c1f 100644 | ||
--- a/include/pybind11/detail/common.h | ||
+++ b/include/pybind11/detail/common.h | ||
@@ -118,6 +118,14 @@ | ||
# endif | ||
#endif | ||
|
||
+#if defined(PYBIND11_CPP20) | ||
+# define PYBIND11_CONSTINIT constinit | ||
+# define PYBIND11_DTOR_CONSTEXPR constexpr | ||
+#else | ||
+# define PYBIND11_CONSTINIT | ||
+# define PYBIND11_DTOR_CONSTEXPR | ||
+#endif | ||
+ | ||
// Compiler version assertions | ||
#if defined(__INTEL_COMPILER) | ||
# if __INTEL_COMPILER < 1800 | ||
diff --git a/include/pybind11/gil.h b/include/pybind11/gil.h | ||
index 570a558..da22f48 100644 | ||
--- a/include/pybind11/gil.h | ||
+++ b/include/pybind11/gil.h | ||
@@ -11,6 +11,8 @@ | ||
|
||
#include "detail/common.h" | ||
|
||
+#include <cassert> | ||
+ | ||
#if defined(WITH_THREAD) && !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT) | ||
# include "detail/internals.h" | ||
#endif | ||
@@ -137,7 +139,9 @@ private: | ||
|
||
class gil_scoped_release { | ||
public: | ||
+ // PRECONDITION: The GIL must be held when this constructor is called. | ||
explicit gil_scoped_release(bool disassoc = false) : disassoc(disassoc) { | ||
+ assert(PyGILState_Check()); | ||
// `get_internals()` must be called here unconditionally in order to initialize | ||
// `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an | ||
// initialization race could occur as multiple threads try `gil_scoped_acquire`. | ||
@@ -201,7 +205,11 @@ class gil_scoped_release { | ||
PyThreadState *state; | ||
|
||
public: | ||
- gil_scoped_release() : state{PyEval_SaveThread()} {} | ||
+ // PRECONDITION: The GIL must be held when this constructor is called. | ||
+ gil_scoped_release() { | ||
+ assert(PyGILState_Check()); | ||
+ state = PyEval_SaveThread(); | ||
+ } | ||
gil_scoped_release(const gil_scoped_release &) = delete; | ||
gil_scoped_release &operator=(const gil_scoped_release &) = delete; | ||
~gil_scoped_release() { PyEval_RestoreThread(state); } | ||
diff --git a/include/pybind11/gil_safe_call_once.h b/include/pybind11/gil_safe_call_once.h | ||
new file mode 100644 | ||
index 0000000..58b90b8 | ||
--- /dev/null | ||
+++ b/include/pybind11/gil_safe_call_once.h | ||
@@ -0,0 +1,90 @@ | ||
+// Copyright (c) 2023 The pybind Community. | ||
+ | ||
+ | ||
+#include "detail/common.h" | ||
+#include "gil.h" | ||
+ | ||
+#include <cassert> | ||
+#include <mutex> | ||
+ | ||
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) | ||
+ | ||
+// Use the `gil_safe_call_once_and_store` class below instead of the naive | ||
+// | ||
+// static auto imported_obj = py::module_::import("module_name"); // BAD, DO NOT USE! | ||
+// | ||
+// which has two serious issues: | ||
+// | ||
+// 1. Py_DECREF() calls potentially after the Python interpreter was finalized already, and | ||
+// 2. deadlocks in multi-threaded processes (because of missing lock ordering). | ||
+// | ||
+// The following alternative avoids both problems: | ||
+// | ||
+// PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store<py::object> storage; | ||
+// auto &imported_obj = storage // Do NOT make this `static`! | ||
+// .call_once_and_store_result([]() { | ||
+// return py::module_::import("module_name"); | ||
+// }) | ||
+// .get_stored(); | ||
+// | ||
+// The parameter of `call_once_and_store_result()` must be callable. It can make | ||
+// CPython API calls, and in particular, it can temporarily release the GIL. | ||
+// | ||
+// `T` can be any C++ type, it does not have to involve CPython API types. | ||
+// | ||
+// The behavior with regard to signals, e.g. `SIGINT` (`KeyboardInterrupt`), | ||
+// is not ideal. If the main thread is the one to actually run the `Callable`, | ||
+// then a `KeyboardInterrupt` will interrupt it if it is running normal Python | ||
+// code. The situation is different if a non-main thread runs the | ||
+// `Callable`, and then the main thread starts waiting for it to complete: | ||
+// a `KeyboardInterrupt` will not interrupt the non-main thread, but it will | ||
+// get processed only when it is the main thread's turn again and it is running | ||
+// normal Python code. However, this will be unnoticeable for quick call-once | ||
+// functions, which is usually the case. | ||
+template <typename T> | ||
+class gil_safe_call_once_and_store { | ||
+public: | ||
+ // PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called. | ||
+ template <typename Callable> | ||
+ gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn) { | ||
+ if (!is_initialized_) { // This read is guarded by the GIL. | ||
+ // Multiple threads may enter here, because the GIL is released in the next line and | ||
+ // CPython API calls in the `fn()` call below may release and reacquire the GIL. | ||
+ gil_scoped_release gil_rel; // Needed to establish lock ordering. | ||
+ std::call_once(once_flag_, [&] { | ||
+ // Only one thread will ever enter here. | ||
+ gil_scoped_acquire gil_acq; | ||
+ ::new (storage_) T(fn()); // fn may release, but will reacquire, the GIL. | ||
+ is_initialized_ = true; // This write is guarded by the GIL. | ||
+ }); | ||
+ // All threads will observe `is_initialized_` as true here. | ||
+ } | ||
+ // Intentionally not returning `T &` to ensure the calling code is self-documenting. | ||
+ return *this; | ||
+ } | ||
+ | ||
+ // This must only be called after `call_once_and_store_result()` was called. | ||
+ T &get_stored() { | ||
+ assert(is_initialized_); | ||
+ PYBIND11_WARNING_PUSH | ||
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5 | ||
+ // Needed for gcc 4.8.5 | ||
+ PYBIND11_WARNING_DISABLE_GCC("-Wstrict-aliasing") | ||
+#endif | ||
+ return *reinterpret_cast<T *>(storage_); | ||
+ PYBIND11_WARNING_POP | ||
+ } | ||
+ | ||
+ constexpr gil_safe_call_once_and_store() = default; | ||
+ PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() = default; | ||
+ | ||
+private: | ||
+ alignas(T) char storage_[sizeof(T)] = {}; | ||
+ std::once_flag once_flag_ = {}; | ||
+ bool is_initialized_ = false; | ||
+ // The `is_initialized_`-`storage_` pair is very similar to `std::optional`, | ||
+ // but the latter does not have the triviality properties of former, | ||
+ // therefore `std::optional` is not a viable alternative here. | ||
+}; | ||
+ | ||
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) | ||
diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h | ||
index 36077ec..1217c8d 100644 | ||
--- a/include/pybind11/numpy.h | ||
+++ b/include/pybind11/numpy.h | ||
@@ -10,7 +10,10 @@ | ||
#pragma once | ||
|
||
#include "pybind11.h" | ||
+#include "detail/common.h" | ||
#include "complex.h" | ||
+#include "gil_safe_call_once.h" | ||
+#include "pytypes.h" | ||
|
||
#include <algorithm> | ||
#include <array> | ||
@@ -120,6 +123,20 @@ inline numpy_internals &get_numpy_internals() { | ||
return *ptr; | ||
} | ||
|
||
+PYBIND11_NOINLINE module_ import_numpy_core_submodule(const char *submodule_name) { | ||
+ module_ numpy = module_::import("numpy"); | ||
+ str version_string = numpy.attr("__version__"); | ||
+ | ||
+ module_ numpy_lib = module_::import("numpy.lib"); | ||
+ object numpy_version = numpy_lib.attr("NumpyVersion")(version_string); | ||
+ int major_version = numpy_version.attr("major").cast<int>(); | ||
+ | ||
+ /* `numpy.core` was renamed to `numpy._core` in NumPy 2.0 as it officially | ||
+ became a private module. */ | ||
+ std::string numpy_core_path = major_version >= 2 ? "numpy._core" : "numpy.core"; | ||
+ return module_::import((numpy_core_path + "." + submodule_name).c_str()); | ||
+} | ||
+ | ||
template <typename T> | ||
struct same_size { | ||
template <typename U> | ||
@@ -192,8 +209,8 @@ struct npy_api { | ||
}; | ||
|
||
static npy_api &get() { | ||
- static npy_api api = lookup(); | ||
- return api; | ||
+ PYBIND11_CONSTINIT static gil_safe_call_once_and_store<npy_api> storage; | ||
+ return storage.call_once_and_store_result(lookup).get_stored(); | ||
} | ||
|
||
bool PyArray_Check_(PyObject *obj) const { | ||
@@ -263,9 +280,13 @@ private: | ||
}; | ||
|
||
static npy_api lookup() { | ||
- module_ m = module_::import("numpy.core.multiarray"); | ||
+ module_ m = detail::import_numpy_core_submodule("multiarray"); | ||
auto c = m.attr("_ARRAY_API"); | ||
void **api_ptr = (void **) PyCapsule_GetPointer(c.ptr(), nullptr); | ||
+ if (api_ptr == nullptr) { | ||
+ raise_from(PyExc_SystemError, "FAILURE obtaining numpy _ARRAY_API pointer."); | ||
+ throw error_already_set(); | ||
+ } | ||
npy_api api; | ||
#define DECL_NPY_API(Func) api.Func##_ = (decltype(api.Func##_)) api_ptr[API_##Func]; | ||
DECL_NPY_API(PyArray_GetNDArrayCFeatureVersion); | ||
@@ -625,13 +646,14 @@ public: | ||
char flags() const { return detail::array_descriptor_proxy(m_ptr)->flags; } | ||
|
||
private: | ||
- static object _dtype_from_pep3118() { | ||
- static PyObject *obj = module_::import("numpy.core._internal") | ||
- .attr("_dtype_from_pep3118") | ||
- .cast<object>() | ||
- .release() | ||
- .ptr(); | ||
- return reinterpret_borrow<object>(obj); | ||
+ static object &_dtype_from_pep3118() { | ||
+ PYBIND11_CONSTINIT static gil_safe_call_once_and_store<object> storage; | ||
+ return storage | ||
+ .call_once_and_store_result([]() { | ||
+ return detail::import_numpy_core_submodule("_internal") | ||
+ .attr("_dtype_from_pep3118"); | ||
+ }) | ||
+ .get_stored(); | ||
} | ||
|
||
dtype strip_padding(ssize_t itemsize) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.