Skip to content

Commit

Permalink
threading: remove _tstate_lock from threading.Thread
Browse files Browse the repository at this point in the history
This replaces the _tstate_lock used for Thread.join() with an event
object implemented in C. This avoids calls into the Python API
after the thread state is deleted, which had caused reference counting
race conditions.
  • Loading branch information
colesbury committed Apr 23, 2023
1 parent cfc11bc commit cfecf6f
Show file tree
Hide file tree
Showing 6 changed files with 324 additions and 304 deletions.
31 changes: 4 additions & 27 deletions Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# error "this header file must not be included directly"
#endif


/*
Runtime Feature Flags
Expand Down Expand Up @@ -109,6 +108,7 @@ typedef struct _stack_chunk {

struct mi_heap_s;
typedef struct mi_heap_s mi_heap_t;
typedef struct _PyEventRc _PyEventRc;

// must match MI_NUM_HEAPS in mimalloc.h
#define Py_NUM_HEAPS 5
Expand Down Expand Up @@ -190,31 +190,8 @@ struct _ts {

uintptr_t critical_section;

/* Called when a thread state is deleted normally, but not when it
* is destroyed after fork().
* Pain: to prevent rare but fatal shutdown errors (issue 18808),
* Thread.join() must wait for the join'ed thread's tstate to be unlinked
* from the tstate chain. That happens at the end of a thread's life,
* in pystate.c.
* The obvious way doesn't quite work: create a lock which the tstate
* unlinking code releases, and have Thread.join() wait to acquire that
* lock. The problem is that we _are_ at the end of the thread's life:
* if the thread holds the last reference to the lock, decref'ing the
* lock will delete the lock, and that may trigger arbitrary Python code
* if there's a weakref, with a callback, to the lock. But by this time
* _PyRuntime.gilstate.tstate_current is already NULL, so only the simplest
* of C code can be allowed to run (in particular it must not be possible to
* release the GIL).
* So instead of holding the lock directly, the tstate holds a weakref to
* the lock: that's the value of on_delete_data below. Decref'ing a
* weakref is harmless.
* on_delete points to _threadmodule.c's static release_sentinel() function.
* After the tstate is unlinked, release_sentinel is called with the
* weakref-to-lock (on_delete_data) argument, and release_sentinel releases
* the indirectly held lock.
*/
void (*on_delete)(void *);
void *on_delete_data;
_PyEventRc *done_event; /* Set when thread is about to exit */
int daemon;

int coroutine_origin_tracking_depth;

Expand Down Expand Up @@ -270,7 +247,7 @@ struct _ts {
// Alias for backward compatibility with Python 3.8
#define _PyInterpreterState_Get PyInterpreterState_Get

PyAPI_FUNC(PyThreadState *) _PyThreadState_Prealloc(PyInterpreterState *);
PyAPI_FUNC(PyThreadState *) _PyThreadState_Prealloc(PyInterpreterState *, _PyEventRc *done_event);

/* Similar to PyThreadState_Get(), but don't issue a fatal error
* if it is NULL. */
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ PyAPI_FUNC(PyInterpreterState*) _PyInterpreterState_LookUpID(int64_t);
PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *);
PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *);
PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *);
PyAPI_FUNC(void) _PyInterpreterState_WaitForThreads(PyInterpreterState *);

#ifdef __cplusplus
}
Expand Down
48 changes: 7 additions & 41 deletions Lib/test/test_threading.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def run(self):

def test_limbo_cleanup(self):
# Issue 7481: Failure to start thread should cleanup the limbo map.
def fail_new_thread(*args):
def fail_new_thread(*args, **kwargs):
raise threading.ThreadError()
_start_new_thread = threading._start_new_thread
threading._start_new_thread = fail_new_thread
Expand Down Expand Up @@ -749,7 +749,8 @@ def f():
rc, out, err = assert_python_ok("-c", code)
self.assertEqual(err, b"")

def test_tstate_lock(self):
@cpython_only
def test_done_event(self):
# Test an implementation detail of Thread objects.
started = _thread.allocate_lock()
finish = _thread.allocate_lock()
Expand All @@ -759,30 +760,19 @@ def f():
started.release()
finish.acquire()
time.sleep(0.01)
# The tstate lock is None until the thread is started
# The _done_event is not set whe
t = threading.Thread(target=f)
self.assertIs(t._tstate_lock, None)
self.assertFalse(t._done_event.is_set())
t.start()
started.acquire()
self.assertTrue(t.is_alive())
# The tstate lock can't be acquired when the thread is running
# (or suspended).
tstate_lock = t._tstate_lock
self.assertFalse(tstate_lock.acquire(timeout=0), False)
self.assertFalse(t._done_event.wait(0), False)
finish.release()
# When the thread ends, the state_lock can be successfully
# acquired.
self.assertTrue(tstate_lock.acquire(timeout=support.SHORT_TIMEOUT), False)
# But is_alive() is still True: we hold _tstate_lock now, which
# prevents is_alive() from knowing the thread's end-of-life C code
# is done.
self.assertTrue(t.is_alive())
# Let is_alive() find out the C code is done.
tstate_lock.release()
self.assertFalse(t.is_alive())
# And verify the thread disposed of _tstate_lock.
self.assertIsNone(t._tstate_lock)
t.join()
self.assertTrue(t._done_event.wait(support.SHORT_TIMEOUT), False)

def test_repr_stopped(self):
# Verify that "stopped" shows up in repr(Thread) appropriately.
Expand Down Expand Up @@ -949,30 +939,6 @@ def checker():
self.assertEqual(threading.getprofile(), old_profile)
self.assertEqual(sys.getprofile(), old_profile)

@cpython_only
def test_shutdown_locks(self):
for daemon in (False, True):
with self.subTest(daemon=daemon):
event = threading.Event()
thread = threading.Thread(target=event.wait, daemon=daemon)

# Thread.start() must add lock to _shutdown_locks,
# but only for non-daemon thread
thread.start()
tstate_lock = thread._tstate_lock
if not daemon:
self.assertIn(tstate_lock, threading._shutdown_locks)
else:
self.assertNotIn(tstate_lock, threading._shutdown_locks)

# unblock the thread and join it
event.set()
thread.join()

# Thread._stop() must remove tstate_lock from _shutdown_locks.
# Daemon threads must never add it to _shutdown_locks.
self.assertNotIn(tstate_lock, threading._shutdown_locks)

def test_locals_at_exit(self):
# bpo-19466: thread locals must not be deleted before destructors
# are called
Expand Down
Loading

0 comments on commit cfecf6f

Please sign in to comment.