Skip to content

Commit 9581448

Browse files
pythongh-126914: Store the Preallocated Thread State's Pointer in a PyInterpreterState Field (pythongh-126989)
This approach eliminates the originally reported race. It also gets rid of the deadlock reported in pythongh-96071, so we can remove the workaround added then.
1 parent 8cdd636 commit 9581448

File tree

3 files changed

+78
-46
lines changed

3 files changed

+78
-46
lines changed

Include/internal/pycore_interp.h

+2
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,8 @@ struct _is {
275275
/* the initial PyInterpreterState.threads.head */
276276
_PyThreadStateImpl _initial_thread;
277277
Py_ssize_t _interactive_src_count;
278+
// In 3.14+ this is interp->threads.preallocated.
279+
_PyThreadStateImpl *threads_preallocated;
278280
};
279281

280282

Lib/test/test_interpreters/test_stress.py

+30
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def test_create_many_sequential(self):
2323
alive.append(interp)
2424

2525
@support.requires_resource('cpu')
26+
@threading_helper.requires_working_threading()
2627
def test_create_many_threaded(self):
2728
alive = []
2829
def task():
@@ -32,6 +33,35 @@ def task():
3233
with threading_helper.start_threads(threads):
3334
pass
3435

36+
@support.requires_resource('cpu')
37+
@threading_helper.requires_working_threading()
38+
def test_many_threads_running_interp_in_other_interp(self):
39+
interp = interpreters.create()
40+
41+
script = f"""if True:
42+
import _interpreters
43+
_interpreters.run_string({interp.id}, '1')
44+
"""
45+
46+
def run():
47+
interp = interpreters.create()
48+
alreadyrunning = (f'{interpreters.InterpreterError}: '
49+
'interpreter already running')
50+
success = False
51+
while not success:
52+
try:
53+
interp.exec(script)
54+
except interpreters.ExecutionFailed as exc:
55+
if exc.excinfo.msg != 'interpreter already running':
56+
raise # re-raise
57+
assert exc.excinfo.type.__name__ == 'InterpreterError'
58+
else:
59+
success = True
60+
61+
threads = (threading.Thread(target=run) for _ in range(200))
62+
with threading_helper.start_threads(threads):
63+
pass
64+
3565

3666
if __name__ == '__main__':
3767
# Test needs to be a package, so we can do relative imports.

Python/pystate.c

+46-46
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,8 @@ init_interpreter(PyInterpreterState *interp,
632632
assert(next != NULL || (interp == runtime->interpreters.main));
633633
interp->next = next;
634634

635+
interp->threads_preallocated = &interp->_initial_thread;
636+
635637
// We would call _PyObject_InitState() at this point
636638
// if interp->feature_flags were alredy set.
637639

@@ -767,7 +769,6 @@ PyInterpreterState_New(void)
767769
return interp;
768770
}
769771

770-
771772
static void
772773
interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
773774
{
@@ -906,6 +907,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
906907
// XXX Once we have one allocator per interpreter (i.e.
907908
// per-interpreter GC) we must ensure that all of the interpreter's
908909
// objects have been cleaned up at the point.
910+
911+
// If we had a freelist of thread states, we would clear it here.
909912
}
910913

911914

@@ -1427,22 +1430,45 @@ allocate_chunk(int size_in_bytes, _PyStackChunk* previous)
14271430
return res;
14281431
}
14291432

1433+
static void
1434+
reset_threadstate(_PyThreadStateImpl *tstate)
1435+
{
1436+
// Set to _PyThreadState_INIT directly?
1437+
memcpy(tstate,
1438+
&initial._main_interpreter._initial_thread,
1439+
sizeof(*tstate));
1440+
}
1441+
14301442
static _PyThreadStateImpl *
1431-
alloc_threadstate(void)
1443+
alloc_threadstate(PyInterpreterState *interp)
14321444
{
1433-
return PyMem_RawCalloc(1, sizeof(_PyThreadStateImpl));
1445+
_PyThreadStateImpl *tstate;
1446+
1447+
// Try the preallocated tstate first.
1448+
tstate = _Py_atomic_exchange_ptr(&interp->threads_preallocated, NULL);
1449+
1450+
// Fall back to the allocator.
1451+
if (tstate == NULL) {
1452+
tstate = PyMem_RawCalloc(1, sizeof(_PyThreadStateImpl));
1453+
if (tstate == NULL) {
1454+
return NULL;
1455+
}
1456+
reset_threadstate(tstate);
1457+
}
1458+
return tstate;
14341459
}
14351460

14361461
static void
14371462
free_threadstate(_PyThreadStateImpl *tstate)
14381463
{
1464+
PyInterpreterState *interp = tstate->base.interp;
14391465
// The initial thread state of the interpreter is allocated
14401466
// as part of the interpreter state so should not be freed.
1441-
if (tstate == &tstate->base.interp->_initial_thread) {
1442-
// Restore to _PyThreadState_INIT.
1443-
memcpy(tstate,
1444-
&initial._main_interpreter._initial_thread,
1445-
sizeof(*tstate));
1467+
if (tstate == &interp->_initial_thread) {
1468+
// Make it available again.
1469+
reset_threadstate(tstate);
1470+
assert(interp->threads_preallocated == NULL);
1471+
_Py_atomic_store_ptr(&interp->threads_preallocated, tstate);
14461472
}
14471473
else {
14481474
PyMem_RawFree(tstate);
@@ -1533,68 +1559,42 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
15331559
static PyThreadState *
15341560
new_threadstate(PyInterpreterState *interp, int whence)
15351561
{
1536-
_PyThreadStateImpl *tstate;
1537-
_PyRuntimeState *runtime = interp->runtime;
1538-
// We don't need to allocate a thread state for the main interpreter
1539-
// (the common case), but doing it later for the other case revealed a
1540-
// reentrancy problem (deadlock). So for now we always allocate before
1541-
// taking the interpreters lock. See GH-96071.
1542-
_PyThreadStateImpl *new_tstate = alloc_threadstate();
1543-
int used_newtstate;
1544-
if (new_tstate == NULL) {
1562+
// Allocate the thread state.
1563+
_PyThreadStateImpl *tstate = alloc_threadstate(interp);
1564+
if (tstate == NULL) {
15451565
return NULL;
15461566
}
1567+
15471568
#ifdef Py_GIL_DISABLED
15481569
Py_ssize_t qsbr_idx = _Py_qsbr_reserve(interp);
15491570
if (qsbr_idx < 0) {
1550-
PyMem_RawFree(new_tstate);
1571+
free_threadstate(tstate);
15511572
return NULL;
15521573
}
15531574
#endif
15541575

15551576
/* We serialize concurrent creation to protect global state. */
1556-
HEAD_LOCK(runtime);
1577+
HEAD_LOCK(interp->runtime);
15571578

1579+
// Initialize the new thread state.
15581580
interp->threads.next_unique_id += 1;
15591581
uint64_t id = interp->threads.next_unique_id;
1582+
init_threadstate(tstate, interp, id, whence);
15601583

1561-
// Allocate the thread state and add it to the interpreter.
1584+
// Add the new thread state to the interpreter.
15621585
PyThreadState *old_head = interp->threads.head;
1563-
if (old_head == NULL) {
1564-
// It's the interpreter's initial thread state.
1565-
used_newtstate = 0;
1566-
tstate = &interp->_initial_thread;
1567-
}
1568-
// XXX Re-use interp->_initial_thread if not in use?
1569-
else {
1570-
// Every valid interpreter must have at least one thread.
1571-
assert(id > 1);
1572-
assert(old_head->prev == NULL);
1573-
used_newtstate = 1;
1574-
tstate = new_tstate;
1575-
// Set to _PyThreadState_INIT.
1576-
memcpy(tstate,
1577-
&initial._main_interpreter._initial_thread,
1578-
sizeof(*tstate));
1579-
}
1580-
1581-
init_threadstate(tstate, interp, id, whence);
15821586
add_threadstate(interp, (PyThreadState *)tstate, old_head);
15831587

1584-
HEAD_UNLOCK(runtime);
1585-
if (!used_newtstate) {
1586-
// Must be called with lock unlocked to avoid re-entrancy deadlock.
1587-
PyMem_RawFree(new_tstate);
1588-
}
1589-
else {
1588+
HEAD_UNLOCK(interp->runtime);
15901589
#ifdef Py_GIL_DISABLED
1590+
if (id == 1) {
15911591
if (_Py_atomic_load_int(&interp->gc.immortalize) == 0) {
15921592
// Immortalize objects marked as using deferred reference counting
15931593
// the first time a non-main thread is created.
15941594
_PyGC_ImmortalizeDeferredObjects(interp);
15951595
}
1596-
#endif
15971596
}
1597+
#endif
15981598

15991599
#ifdef Py_GIL_DISABLED
16001600
// Must be called with lock unlocked to avoid lock ordering deadlocks.

0 commit comments

Comments
 (0)