Skip to content

Commit 9dabace

Browse files
gh-114940: Add _Py_FOR_EACH_TSTATE_UNLOCKED(), and Friends (gh-127077)
This is a precursor to the actual fix for gh-114940, where we will change these macros to use the new lock. This change is almost entirely mechanical; the exceptions are the loops in codeobject.c and ceval.c, which now hold the "head" lock. Note that almost all of the uses of _Py_FOR_EACH_TSTATE_UNLOCKED() here will change to _Py_FOR_EACH_TSTATE_BEGIN() once we add the new per-interpreter lock.
1 parent bf542f8 commit 9dabace

File tree

9 files changed

+79
-87
lines changed

9 files changed

+79
-87
lines changed

Include/internal/pycore_pystate.h

+9
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,15 @@ extern int _PyOS_InterruptOccurred(PyThreadState *tstate);
269269
#define HEAD_UNLOCK(runtime) \
270270
PyMutex_Unlock(&(runtime)->interpreters.mutex)
271271

272+
#define _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) \
273+
for (PyThreadState *t = interp->threads.head; t; t = t->next)
274+
#define _Py_FOR_EACH_TSTATE_BEGIN(interp, t) \
275+
HEAD_LOCK(interp->runtime); \
276+
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, t)
277+
#define _Py_FOR_EACH_TSTATE_END(interp) \
278+
HEAD_UNLOCK(interp->runtime)
279+
280+
272281
// Get the configuration of the current interpreter.
273282
// The caller must hold the GIL.
274283
// Export for test_peg_generator.

Objects/codeobject.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -2895,20 +2895,22 @@ get_indices_in_use(PyInterpreterState *interp, struct flag_set *in_use)
28952895
assert(interp->stoptheworld.world_stopped);
28962896
assert(in_use->flags == NULL);
28972897
int32_t max_index = 0;
2898-
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
2898+
_Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
28992899
int32_t idx = ((_PyThreadStateImpl *) p)->tlbc_index;
29002900
if (idx > max_index) {
29012901
max_index = idx;
29022902
}
29032903
}
2904+
_Py_FOR_EACH_TSTATE_END(interp);
29042905
in_use->size = (size_t) max_index + 1;
29052906
in_use->flags = PyMem_Calloc(in_use->size, sizeof(*in_use->flags));
29062907
if (in_use->flags == NULL) {
29072908
return -1;
29082909
}
2909-
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
2910+
_Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
29102911
in_use->flags[((_PyThreadStateImpl *) p)->tlbc_index] = 1;
29112912
}
2913+
_Py_FOR_EACH_TSTATE_END(interp);
29122914
return 0;
29132915
}
29142916

Objects/object.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ get_reftotal(PyInterpreterState *interp)
119119
since we can't determine which interpreter updated it. */
120120
Py_ssize_t total = REFTOTAL(interp);
121121
#ifdef Py_GIL_DISABLED
122-
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
122+
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, p) {
123123
/* This may race with other threads modifications to their reftotal */
124124
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)p;
125125
total += _Py_atomic_load_ssize_relaxed(&tstate_impl->reftotal);

Objects/obmalloc.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,7 @@ get_mimalloc_allocated_blocks(PyInterpreterState *interp)
14391439
{
14401440
size_t allocated_blocks = 0;
14411441
#ifdef Py_GIL_DISABLED
1442-
for (PyThreadState *t = interp->threads.head; t != NULL; t = t->next) {
1442+
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
14431443
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
14441444
for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
14451445
mi_heap_t *heap = &tstate->mimalloc.heaps[i];

Python/ceval.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -296,11 +296,12 @@ Py_SetRecursionLimit(int new_limit)
296296
{
297297
PyInterpreterState *interp = _PyInterpreterState_GET();
298298
interp->ceval.recursion_limit = new_limit;
299-
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
299+
_Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
300300
int depth = p->py_recursion_limit - p->py_recursion_remaining;
301301
p->py_recursion_limit = new_limit;
302302
p->py_recursion_remaining = new_limit - depth;
303303
}
304+
_Py_FOR_EACH_TSTATE_END(interp);
304305
}
305306

306307
/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()

Python/ceval_gil.c

+4-10
Original file line numberDiff line numberDiff line change
@@ -977,25 +977,19 @@ make_pending_calls(PyThreadState *tstate)
977977
void
978978
_Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit)
979979
{
980-
_PyRuntimeState *runtime = &_PyRuntime;
981-
982-
HEAD_LOCK(runtime);
983-
for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) {
980+
_Py_FOR_EACH_TSTATE_BEGIN(interp, tstate) {
984981
_Py_set_eval_breaker_bit(tstate, bit);
985982
}
986-
HEAD_UNLOCK(runtime);
983+
_Py_FOR_EACH_TSTATE_END(interp);
987984
}
988985

989986
void
990987
_Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit)
991988
{
992-
_PyRuntimeState *runtime = &_PyRuntime;
993-
994-
HEAD_LOCK(runtime);
995-
for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) {
989+
_Py_FOR_EACH_TSTATE_BEGIN(interp, tstate) {
996990
_Py_unset_eval_breaker_bit(tstate, bit);
997991
}
998-
HEAD_UNLOCK(runtime);
992+
_Py_FOR_EACH_TSTATE_END(interp);
999993
}
1000994

1001995
void

Python/gc_free_threading.c

+10-15
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor
304304
Py_ssize_t offset_pre = offset_base + 2 * sizeof(PyObject*);
305305

306306
// visit each thread's heaps for GC objects
307-
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
307+
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, p) {
308308
struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc;
309309
if (!_Py_atomic_load_int(&m->initialized)) {
310310
// The thread may not have called tstate_mimalloc_bind() yet.
@@ -374,8 +374,7 @@ gc_visit_stackref(_PyStackRef stackref)
374374
static void
375375
gc_visit_thread_stacks(PyInterpreterState *interp)
376376
{
377-
HEAD_LOCK(&_PyRuntime);
378-
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
377+
_Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
379378
for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f = f->previous) {
380379
PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable);
381380
if (executable == NULL || !PyCode_Check(executable)) {
@@ -390,7 +389,7 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
390389
}
391390
}
392391
}
393-
HEAD_UNLOCK(&_PyRuntime);
392+
_Py_FOR_EACH_TSTATE_END(interp);
394393
}
395394

396395
static void
@@ -444,14 +443,13 @@ process_delayed_frees(PyInterpreterState *interp, struct collection_state *state
444443

445444
// Merge the queues from other threads into our own queue so that we can
446445
// process all of the pending delayed free requests at once.
447-
HEAD_LOCK(&_PyRuntime);
448-
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
446+
_Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
449447
_PyThreadStateImpl *other = (_PyThreadStateImpl *)p;
450448
if (other != current_tstate) {
451449
llist_concat(&current_tstate->mem_free_queue, &other->mem_free_queue);
452450
}
453451
}
454-
HEAD_UNLOCK(&_PyRuntime);
452+
_Py_FOR_EACH_TSTATE_END(interp);
455453

456454
_PyMem_ProcessDelayedNoDealloc((PyThreadState *)current_tstate, queue_freed_object, state);
457455
}
@@ -1234,8 +1232,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
12341232
state->gcstate->old[i-1].count = 0;
12351233
}
12361234

1237-
HEAD_LOCK(&_PyRuntime);
1238-
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
1235+
_Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
12391236
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p;
12401237

12411238
// merge per-thread refcount for types into the type's actual refcount
@@ -1244,7 +1241,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
12441241
// merge refcounts for all queued objects
12451242
merge_queued_objects(tstate, state);
12461243
}
1247-
HEAD_UNLOCK(&_PyRuntime);
1244+
_Py_FOR_EACH_TSTATE_END(interp);
12481245

12491246
process_delayed_frees(interp, state);
12501247

@@ -1993,13 +1990,11 @@ PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
19931990
void
19941991
_PyGC_ClearAllFreeLists(PyInterpreterState *interp)
19951992
{
1996-
HEAD_LOCK(&_PyRuntime);
1997-
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head;
1998-
while (tstate != NULL) {
1993+
_Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
1994+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p;
19991995
_PyObject_ClearFreeLists(&tstate->freelists, 0);
2000-
tstate = (_PyThreadStateImpl *)tstate->base.next;
20011996
}
2002-
HEAD_UNLOCK(&_PyRuntime);
1997+
_Py_FOR_EACH_TSTATE_END(interp);
20031998
}
20041999

20052000
#endif // Py_GIL_DISABLED

Python/instrumentation.c

+2-5
Original file line numberDiff line numberDiff line change
@@ -1006,13 +1006,10 @@ set_global_version(PyThreadState *tstate, uint32_t version)
10061006

10071007
#ifdef Py_GIL_DISABLED
10081008
// Set the version on all threads in free-threaded builds.
1009-
_PyRuntimeState *runtime = &_PyRuntime;
1010-
HEAD_LOCK(runtime);
1011-
for (tstate = interp->threads.head; tstate;
1012-
tstate = PyThreadState_Next(tstate)) {
1009+
_Py_FOR_EACH_TSTATE_BEGIN(interp, tstate) {
10131010
set_version_raw(&tstate->eval_breaker, version);
10141011
};
1015-
HEAD_UNLOCK(runtime);
1012+
_Py_FOR_EACH_TSTATE_END(interp);
10161013
#else
10171014
// Normal builds take the current version from instrumentation_version when
10181015
// attaching a thread, so we only have to set the current thread's version.

Python/pystate.c

+46-52
Original file line numberDiff line numberDiff line change
@@ -790,18 +790,15 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
790790
}
791791

792792
// Clear the current/main thread state last.
793-
HEAD_LOCK(runtime);
794-
PyThreadState *p = interp->threads.head;
795-
HEAD_UNLOCK(runtime);
796-
while (p != NULL) {
793+
_Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
797794
// See https://github.com/python/cpython/issues/102126
798795
// Must be called without HEAD_LOCK held as it can deadlock
799796
// if any finalizer tries to acquire that lock.
797+
HEAD_UNLOCK(runtime);
800798
PyThreadState_Clear(p);
801799
HEAD_LOCK(runtime);
802-
p = p->next;
803-
HEAD_UNLOCK(runtime);
804800
}
801+
_Py_FOR_EACH_TSTATE_END(interp);
805802
if (tstate->interp == interp) {
806803
/* We fix tstate->_status below when we for sure aren't using it
807804
(e.g. no longer need the GIL). */
@@ -1801,10 +1798,9 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
18011798
static void
18021799
zapthreads(PyInterpreterState *interp)
18031800
{
1804-
PyThreadState *tstate;
18051801
/* No need to lock the mutex here because this should only happen
18061802
when the threads are all really dead (XXX famous last words). */
1807-
while ((tstate = interp->threads.head) != NULL) {
1803+
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, tstate) {
18081804
tstate_verify_not_active(tstate);
18091805
tstate_delete_common(tstate, 0);
18101806
free_threadstate((_PyThreadStateImpl *)tstate);
@@ -2161,7 +2157,7 @@ decrement_stoptheworld_countdown(struct _stoptheworld_state *stw)
21612157
}
21622158

21632159
#ifdef Py_GIL_DISABLED
2164-
// Interpreter for _Py_FOR_EACH_THREAD(). For global stop-the-world events,
2160+
// Interpreter for _Py_FOR_EACH_STW_INTERP(). For global stop-the-world events,
21652161
// we start with the first interpreter and then iterate over all interpreters.
21662162
// For per-interpreter stop-the-world events, we only operate on the one
21672163
// interpreter.
@@ -2176,10 +2172,9 @@ interp_for_stop_the_world(struct _stoptheworld_state *stw)
21762172
// Loops over threads for a stop-the-world event.
21772173
// For global: all threads in all interpreters
21782174
// For per-interpreter: all threads in the interpreter
2179-
#define _Py_FOR_EACH_THREAD(stw, i, t) \
2180-
for (i = interp_for_stop_the_world((stw)); \
2181-
i != NULL; i = ((stw->is_global) ? i->next : NULL)) \
2182-
for (t = i->threads.head; t; t = t->next)
2175+
#define _Py_FOR_EACH_STW_INTERP(stw, i) \
2176+
for (PyInterpreterState *i = interp_for_stop_the_world((stw)); \
2177+
i != NULL; i = ((stw->is_global) ? i->next : NULL))
21832178

21842179

21852180
// Try to transition threads atomically from the "detached" state to the
@@ -2188,19 +2183,19 @@ static bool
21882183
park_detached_threads(struct _stoptheworld_state *stw)
21892184
{
21902185
int num_parked = 0;
2191-
PyInterpreterState *i;
2192-
PyThreadState *t;
2193-
_Py_FOR_EACH_THREAD(stw, i, t) {
2194-
int state = _Py_atomic_load_int_relaxed(&t->state);
2195-
if (state == _Py_THREAD_DETACHED) {
2196-
// Atomically transition to "suspended" if in "detached" state.
2197-
if (_Py_atomic_compare_exchange_int(&t->state,
2198-
&state, _Py_THREAD_SUSPENDED)) {
2199-
num_parked++;
2186+
_Py_FOR_EACH_STW_INTERP(stw, i) {
2187+
_Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
2188+
int state = _Py_atomic_load_int_relaxed(&t->state);
2189+
if (state == _Py_THREAD_DETACHED) {
2190+
// Atomically transition to "suspended" if in "detached" state.
2191+
if (_Py_atomic_compare_exchange_int(
2192+
&t->state, &state, _Py_THREAD_SUSPENDED)) {
2193+
num_parked++;
2194+
}
2195+
}
2196+
else if (state == _Py_THREAD_ATTACHED && t != stw->requester) {
2197+
_Py_set_eval_breaker_bit(t, _PY_EVAL_PLEASE_STOP_BIT);
22002198
}
2201-
}
2202-
else if (state == _Py_THREAD_ATTACHED && t != stw->requester) {
2203-
_Py_set_eval_breaker_bit(t, _PY_EVAL_PLEASE_STOP_BIT);
22042199
}
22052200
}
22062201
stw->thread_countdown -= num_parked;
@@ -2227,12 +2222,12 @@ stop_the_world(struct _stoptheworld_state *stw)
22272222
stw->stop_event = (PyEvent){0}; // zero-initialize (unset)
22282223
stw->requester = _PyThreadState_GET(); // may be NULL
22292224

2230-
PyInterpreterState *i;
2231-
PyThreadState *t;
2232-
_Py_FOR_EACH_THREAD(stw, i, t) {
2233-
if (t != stw->requester) {
2234-
// Count all the other threads (we don't wait on ourself).
2235-
stw->thread_countdown++;
2225+
_Py_FOR_EACH_STW_INTERP(stw, i) {
2226+
_Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
2227+
if (t != stw->requester) {
2228+
// Count all the other threads (we don't wait on ourself).
2229+
stw->thread_countdown++;
2230+
}
22362231
}
22372232
}
22382233

@@ -2273,14 +2268,14 @@ start_the_world(struct _stoptheworld_state *stw)
22732268
stw->requested = 0;
22742269
stw->world_stopped = 0;
22752270
// Switch threads back to the detached state.
2276-
PyInterpreterState *i;
2277-
PyThreadState *t;
2278-
_Py_FOR_EACH_THREAD(stw, i, t) {
2279-
if (t != stw->requester) {
2280-
assert(_Py_atomic_load_int_relaxed(&t->state) ==
2281-
_Py_THREAD_SUSPENDED);
2282-
_Py_atomic_store_int(&t->state, _Py_THREAD_DETACHED);
2283-
_PyParkingLot_UnparkAll(&t->state);
2271+
_Py_FOR_EACH_STW_INTERP(stw, i) {
2272+
_Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
2273+
if (t != stw->requester) {
2274+
assert(_Py_atomic_load_int_relaxed(&t->state) ==
2275+
_Py_THREAD_SUSPENDED);
2276+
_Py_atomic_store_int(&t->state, _Py_THREAD_DETACHED);
2277+
_PyParkingLot_UnparkAll(&t->state);
2278+
}
22842279
}
22852280
}
22862281
stw->requester = NULL;
@@ -2344,7 +2339,6 @@ _PyEval_StartTheWorld(PyInterpreterState *interp)
23442339
int
23452340
PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
23462341
{
2347-
_PyRuntimeState *runtime = &_PyRuntime;
23482342
PyInterpreterState *interp = _PyInterpreterState_GET();
23492343

23502344
/* Although the GIL is held, a few C API functions can be called
@@ -2353,12 +2347,16 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
23532347
* list of thread states we're traversing, so to prevent that we lock
23542348
* head_mutex for the duration.
23552349
*/
2356-
HEAD_LOCK(runtime);
2357-
for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) {
2358-
if (tstate->thread_id != id) {
2359-
continue;
2350+
PyThreadState *tstate = NULL;
2351+
_Py_FOR_EACH_TSTATE_BEGIN(interp, t) {
2352+
if (t->thread_id == id) {
2353+
tstate = t;
2354+
break;
23602355
}
2356+
}
2357+
_Py_FOR_EACH_TSTATE_END(interp);
23612358

2359+
if (tstate != NULL) {
23622360
/* Tricky: we need to decref the current value
23632361
* (if any) in tstate->async_exc, but that can in turn
23642362
* allow arbitrary Python code to run, including
@@ -2368,14 +2366,12 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
23682366
*/
23692367
Py_XINCREF(exc);
23702368
PyObject *old_exc = _Py_atomic_exchange_ptr(&tstate->async_exc, exc);
2371-
HEAD_UNLOCK(runtime);
23722369

23732370
Py_XDECREF(old_exc);
23742371
_Py_set_eval_breaker_bit(tstate, _PY_ASYNC_EXCEPTION_BIT);
2375-
return 1;
23762372
}
2377-
HEAD_UNLOCK(runtime);
2378-
return 0;
2373+
2374+
return tstate != NULL;
23792375
}
23802376

23812377
//---------------------------------
@@ -2515,8 +2511,7 @@ _PyThread_CurrentFrames(void)
25152511
HEAD_LOCK(runtime);
25162512
PyInterpreterState *i;
25172513
for (i = runtime->interpreters.head; i != NULL; i = i->next) {
2518-
PyThreadState *t;
2519-
for (t = i->threads.head; t != NULL; t = t->next) {
2514+
_Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
25202515
_PyInterpreterFrame *frame = t->current_frame;
25212516
frame = _PyFrame_GetFirstComplete(frame);
25222517
if (frame == NULL) {
@@ -2581,8 +2576,7 @@ _PyThread_CurrentExceptions(void)
25812576
HEAD_LOCK(runtime);
25822577
PyInterpreterState *i;
25832578
for (i = runtime->interpreters.head; i != NULL; i = i->next) {
2584-
PyThreadState *t;
2585-
for (t = i->threads.head; t != NULL; t = t->next) {
2579+
_Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
25862580
_PyErr_StackItem *err_info = _PyErr_GetTopmostException(t);
25872581
if (err_info == NULL) {
25882582
continue;

0 commit comments

Comments
 (0)