Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-118727: Don't drop the GIL in drop_gil() unless the current thread holds it #118745

Merged
merged 10 commits into from
May 23, 2024
8 changes: 7 additions & 1 deletion Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,20 @@ struct _ts {
unsigned int bound_gilstate:1;
/* Currently in use (maybe holds the GIL). */
unsigned int active:1;
#ifdef Py_GIL_DISABLED
/* Currently holds the GIL. */
unsigned int holds_gil:1;
#else
unsigned int _unused:1;
#endif
swtaarrs marked this conversation as resolved.
Show resolved Hide resolved

/* various stages of finalization */
unsigned int finalizing:1;
unsigned int cleared:1;
unsigned int finalized:1;

/* padding to align to 4 bytes */
unsigned int :24;
unsigned int :23;
} _status;
#ifdef Py_BUILD_CORE
# define _PyThreadState_WHENCE_NOTSET -1
Expand Down
7 changes: 3 additions & 4 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,10 @@ extern int _PyEval_ThreadsInitialized(void);
extern void _PyEval_InitGIL(PyThreadState *tstate, int own_gil);
extern void _PyEval_FiniGIL(PyInterpreterState *interp);

// Acquire the GIL and return 1. In free-threaded builds, this function may
// return 0 to indicate that the GIL was disabled and therefore not acquired.
extern int _PyEval_AcquireLock(PyThreadState *tstate);
extern void _PyEval_AcquireLock(PyThreadState *tstate);

extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *);
extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *,
int thread_dying);

#ifdef Py_GIL_DISABLED
// Returns 0 or 1 if the GIL for the given thread's interpreter is disabled or
Expand Down
5 changes: 1 addition & 4 deletions Lib/test/test_importlib/test_threaded_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from test.support import verbose
from test.support.import_helper import forget, mock_register_at_fork
from test.support.os_helper import (TESTFN, unlink, rmtree)
from test.support import script_helper, threading_helper, requires_gil_enabled
from test.support import script_helper, threading_helper

threading_helper.requires_working_threading(module=True)

Expand Down Expand Up @@ -248,9 +248,6 @@ def test_concurrent_futures_circular_import(self):
'partial', 'cfimport.py')
script_helper.assert_python_ok(fn)

# gh-118727 and gh-118729: pool_in_threads.py may crash in free-threaded
# builds, which can hang the Tsan test so temporarily skip it for now.
@requires_gil_enabled("gh-118727: test may crash in free-threaded builds")
def test_multiprocessing_pool_circular_import(self):
# Regression test for bpo-41567
fn = os.path.join(os.path.dirname(__file__),
Expand Down
87 changes: 51 additions & 36 deletions Python/ceval_gil.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,59 +205,65 @@ static void recreate_gil(struct _gil_runtime_state *gil)
}
#endif

static void
drop_gil_impl(struct _gil_runtime_state *gil)
static inline void
drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil)
{
MUTEX_LOCK(gil->mutex);
_Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1);
_Py_atomic_store_int_relaxed(&gil->locked, 0);
#ifdef Py_GIL_DISABLED
if (tstate != NULL) {
tstate->_status.holds_gil = 0;
}
#endif
COND_SIGNAL(gil->cond);
MUTEX_UNLOCK(gil->mutex);
}

static void
drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
drop_gil(PyInterpreterState *interp, PyThreadState *tstate, int thread_dying)
{
struct _ceval_state *ceval = &interp->ceval;
/* If tstate is NULL, the caller is indicating that we're releasing
/* If thread_dying is true, the caller is indicating that we're releasing
the GIL for the last time in this thread. This is particularly
relevant when the current thread state is finalizing or its
interpreter is finalizing (either may be in an inconsistent
state). In that case the current thread will definitely
never try to acquire the GIL again. */
// XXX It may be more correct to check tstate->_status.finalizing.
// XXX assert(tstate == NULL || !tstate->_status.cleared);
// XXX assert(thread_dying || !tstate->_status.cleared);

assert(thread_dying || tstate != NULL);
swtaarrs marked this conversation as resolved.
Show resolved Hide resolved
struct _gil_runtime_state *gil = ceval->gil;
#ifdef Py_GIL_DISABLED
if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
// Check if we have the GIL before dropping it. tstate will be NULL if
// take_gil() detected that this thread has been destroyed, in which case
// we know we have the GIL.
if (tstate != NULL && !tstate->_status.holds_gil) {
return;
}
#endif
if (!_Py_atomic_load_int_relaxed(&gil->locked)) {
Py_FatalError("drop_gil: GIL is not locked");
}

/* tstate is allowed to be NULL (early interpreter init) */
if (tstate != NULL) {
if (!thread_dying) {
/* Sub-interpreter support: threads might have been switched
under our feet using PyThreadState_Swap(). Fix the GIL last
holder variable so that our heuristics work. */
_Py_atomic_store_ptr_relaxed(&gil->last_holder, tstate);
}

drop_gil_impl(gil);
drop_gil_impl(tstate, gil);

#ifdef FORCE_SWITCHING
/* We check tstate first in case we might be releasing the GIL for
the last time in this thread. In that case there's a possible
race with tstate->interp getting deleted after gil->mutex is
unlocked and before the following code runs, leading to a crash.
We can use (tstate == NULL) to indicate the thread is done with
the GIL, and that's the only time we might delete the
interpreter, so checking tstate first prevents the crash.
See https://github.com/python/cpython/issues/104341. */
if (tstate != NULL &&
/* We might be releasing the GIL for the last time in this thread. In that
case there's a possible race with tstate->interp getting deleted after
gil->mutex is unlocked and before the following code runs, leading to a
crash. We can use thread_dying to indicate the thread is done with the
GIL, and that's the only time we might delete the interpreter. See
https://github.com/python/cpython/issues/104341. */
if (!thread_dying &&
_Py_eval_breaker_bit_is_set(tstate, _PY_GIL_DROP_REQUEST_BIT)) {
MUTEX_LOCK(gil->switch_mutex);
/* Not switched yet => wait */
Expand All @@ -284,7 +290,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
tstate must be non-NULL.

Returns 1 if the GIL was acquired, or 0 if not. */
static int
static void
take_gil(PyThreadState *tstate)
{
int err = errno;
Expand All @@ -309,7 +315,7 @@ take_gil(PyThreadState *tstate)
struct _gil_runtime_state *gil = interp->ceval.gil;
#ifdef Py_GIL_DISABLED
if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
return 0;
return;
}
#endif

Expand Down Expand Up @@ -358,10 +364,10 @@ take_gil(PyThreadState *tstate)
if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
// Another thread disabled the GIL between our check above and
// now. Don't take the GIL, signal any other waiting threads, and
// return 0.
// return.
COND_SIGNAL(gil->cond);
MUTEX_UNLOCK(gil->mutex);
return 0;
return;
}
#endif

Expand All @@ -371,6 +377,9 @@ take_gil(PyThreadState *tstate)
MUTEX_LOCK(gil->switch_mutex);
#endif
/* We now hold the GIL */
#ifdef Py_GIL_DISABLED
tstate->_status.holds_gil = 1;
#endif
_Py_atomic_store_int_relaxed(&gil->locked, 1);
_Py_ANNOTATE_RWLOCK_ACQUIRED(&gil->locked, /*is_write=*/1);

Expand All @@ -393,9 +402,7 @@ take_gil(PyThreadState *tstate)
in take_gil() while the main thread called
wait_for_thread_shutdown() from Py_Finalize(). */
MUTEX_UNLOCK(gil->mutex);
/* Passing NULL to drop_gil() indicates that this thread is about to
terminate and will never hold the GIL again. */
drop_gil(interp, NULL);
drop_gil(interp, NULL, 1);
PyThread_exit_thread();
}
assert(_PyThreadState_CheckConsistency(tstate));
Expand All @@ -406,7 +413,7 @@ take_gil(PyThreadState *tstate)
MUTEX_UNLOCK(gil->mutex);

errno = err;
return 1;
return;
}

void _PyEval_SetSwitchInterval(unsigned long microseconds)
Expand Down Expand Up @@ -452,9 +459,16 @@ static inline int
current_thread_holds_gil(struct _gil_runtime_state *gil, PyThreadState *tstate)
{
if (((PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder)) != tstate) {
#ifdef Py_GIL_DISABLED
swtaarrs marked this conversation as resolved.
Show resolved Hide resolved
assert(!tstate->_status.holds_gil);
#endif
return 0;
}
return _Py_atomic_load_int_relaxed(&gil->locked);
int locked = _Py_atomic_load_int_relaxed(&gil->locked);
#ifdef Py_GIL_DISABLED
assert(!tstate->_status.holds_gil || locked);
#endif
return locked;
}
#endif

Expand Down Expand Up @@ -563,23 +577,24 @@ PyEval_ReleaseLock(void)
/* This function must succeed when the current thread state is NULL.
We therefore avoid PyThreadState_Get() which dumps a fatal error
in debug mode. */
drop_gil(tstate->interp, tstate);
drop_gil(tstate->interp, tstate, 0);
}

int
void
_PyEval_AcquireLock(PyThreadState *tstate)
{
_Py_EnsureTstateNotNULL(tstate);
return take_gil(tstate);
take_gil(tstate);
}

void
_PyEval_ReleaseLock(PyInterpreterState *interp, PyThreadState *tstate)
_PyEval_ReleaseLock(PyInterpreterState *interp,
PyThreadState *tstate,
int thread_dying)
{
/* If tstate is NULL then we do not expect the current thread
to acquire the GIL ever again. */
assert(tstate == NULL || tstate->interp == interp);
drop_gil(interp, tstate);
assert(tstate != NULL);
assert(tstate->interp == interp);
drop_gil(interp, tstate, thread_dying);
}

void
Expand Down Expand Up @@ -1136,7 +1151,7 @@ _PyEval_DisableGIL(PyThreadState *tstate)
//
// Drop the GIL, which will wake up any threads waiting in take_gil()
// and let them resume execution without the GIL.
drop_gil_impl(gil);
drop_gil_impl(tstate, gil);
return 1;
}
return 0;
Expand Down
12 changes: 5 additions & 7 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1834,7 +1834,7 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate)
#endif
current_fast_clear(tstate->interp->runtime);
tstate_delete_common(tstate);
_PyEval_ReleaseLock(tstate->interp, NULL);
_PyEval_ReleaseLock(tstate->interp, tstate, 1);
free_threadstate((_PyThreadStateImpl *)tstate);
}

Expand Down Expand Up @@ -2059,7 +2059,7 @@ _PyThreadState_Attach(PyThreadState *tstate)


while (1) {
int acquired_gil = _PyEval_AcquireLock(tstate);
_PyEval_AcquireLock(tstate);
ericsnowcurrently marked this conversation as resolved.
Show resolved Hide resolved

// XXX assert(tstate_is_alive(tstate));
current_fast_set(&_PyRuntime, tstate);
Expand All @@ -2070,20 +2070,18 @@ _PyThreadState_Attach(PyThreadState *tstate)
}

#ifdef Py_GIL_DISABLED
if (_PyEval_IsGILEnabled(tstate) != acquired_gil) {
if (_PyEval_IsGILEnabled(tstate) != tstate->_status.holds_gil) {
swtaarrs marked this conversation as resolved.
Show resolved Hide resolved
// The GIL was enabled between our call to _PyEval_AcquireLock()
// and when we attached (the GIL can't go from enabled to disabled
// here because only a thread holding the GIL can disable
// it). Detach and try again.
assert(!acquired_gil);
assert(!tstate->_status.holds_gil);
tstate_set_detached(tstate, _Py_THREAD_DETACHED);
tstate_deactivate(tstate);
current_fast_clear(&_PyRuntime);
continue;
}
_Py_qsbr_attach(((_PyThreadStateImpl *)tstate)->qsbr);
#else
(void)acquired_gil;
#endif
break;
}
Expand Down Expand Up @@ -2114,7 +2112,7 @@ detach_thread(PyThreadState *tstate, int detached_state)
tstate_deactivate(tstate);
tstate_set_detached(tstate, detached_state);
current_fast_clear(&_PyRuntime);
_PyEval_ReleaseLock(tstate->interp, tstate);
_PyEval_ReleaseLock(tstate->interp, tstate, 0);
}

void
Expand Down
Loading