@@ -632,6 +632,8 @@ init_interpreter(PyInterpreterState *interp,
632
632
assert (next != NULL || (interp == runtime -> interpreters .main ));
633
633
interp -> next = next ;
634
634
635
+ interp -> threads_preallocated = & interp -> _initial_thread ;
636
+
635
637
// We would call _PyObject_InitState() at this point
636
638
// if interp->feature_flags were alredy set.
637
639
@@ -767,7 +769,6 @@ PyInterpreterState_New(void)
767
769
return interp ;
768
770
}
769
771
770
-
771
772
static void
772
773
interpreter_clear (PyInterpreterState * interp , PyThreadState * tstate )
773
774
{
@@ -906,6 +907,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
906
907
// XXX Once we have one allocator per interpreter (i.e.
907
908
// per-interpreter GC) we must ensure that all of the interpreter's
908
909
// objects have been cleaned up at the point.
910
+
911
+ // If we had a freelist of thread states, we would clear it here.
909
912
}
910
913
911
914
@@ -1427,22 +1430,45 @@ allocate_chunk(int size_in_bytes, _PyStackChunk* previous)
1427
1430
return res ;
1428
1431
}
1429
1432
1433
+ static void
1434
+ reset_threadstate (_PyThreadStateImpl * tstate )
1435
+ {
1436
+ // Set to _PyThreadState_INIT directly?
1437
+ memcpy (tstate ,
1438
+ & initial ._main_interpreter ._initial_thread ,
1439
+ sizeof (* tstate ));
1440
+ }
1441
+
1430
1442
static _PyThreadStateImpl *
1431
- alloc_threadstate (void )
1443
+ alloc_threadstate (PyInterpreterState * interp )
1432
1444
{
1433
- return PyMem_RawCalloc (1 , sizeof (_PyThreadStateImpl ));
1445
+ _PyThreadStateImpl * tstate ;
1446
+
1447
+ // Try the preallocated tstate first.
1448
+ tstate = _Py_atomic_exchange_ptr (& interp -> threads_preallocated , NULL );
1449
+
1450
+ // Fall back to the allocator.
1451
+ if (tstate == NULL ) {
1452
+ tstate = PyMem_RawCalloc (1 , sizeof (_PyThreadStateImpl ));
1453
+ if (tstate == NULL ) {
1454
+ return NULL ;
1455
+ }
1456
+ reset_threadstate (tstate );
1457
+ }
1458
+ return tstate ;
1434
1459
}
1435
1460
1436
1461
static void
1437
1462
free_threadstate (_PyThreadStateImpl * tstate )
1438
1463
{
1464
+ PyInterpreterState * interp = tstate -> base .interp ;
1439
1465
// The initial thread state of the interpreter is allocated
1440
1466
// as part of the interpreter state so should not be freed.
1441
- if (tstate == & tstate -> base . interp -> _initial_thread ) {
1442
- // Restore to _PyThreadState_INIT .
1443
- memcpy (tstate ,
1444
- & initial . _main_interpreter . _initial_thread ,
1445
- sizeof ( * tstate ) );
1467
+ if (tstate == & interp -> _initial_thread ) {
1468
+ // Make it available again .
1469
+ reset_threadstate (tstate );
1470
+ assert ( interp -> threads_preallocated == NULL );
1471
+ _Py_atomic_store_ptr ( & interp -> threads_preallocated , tstate );
1446
1472
}
1447
1473
else {
1448
1474
PyMem_RawFree (tstate );
@@ -1533,68 +1559,42 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
1533
1559
static PyThreadState *
1534
1560
new_threadstate (PyInterpreterState * interp , int whence )
1535
1561
{
1536
- _PyThreadStateImpl * tstate ;
1537
- _PyRuntimeState * runtime = interp -> runtime ;
1538
- // We don't need to allocate a thread state for the main interpreter
1539
- // (the common case), but doing it later for the other case revealed a
1540
- // reentrancy problem (deadlock). So for now we always allocate before
1541
- // taking the interpreters lock. See GH-96071.
1542
- _PyThreadStateImpl * new_tstate = alloc_threadstate ();
1543
- int used_newtstate ;
1544
- if (new_tstate == NULL ) {
1562
+ // Allocate the thread state.
1563
+ _PyThreadStateImpl * tstate = alloc_threadstate (interp );
1564
+ if (tstate == NULL ) {
1545
1565
return NULL ;
1546
1566
}
1567
+
1547
1568
#ifdef Py_GIL_DISABLED
1548
1569
Py_ssize_t qsbr_idx = _Py_qsbr_reserve (interp );
1549
1570
if (qsbr_idx < 0 ) {
1550
- PyMem_RawFree ( new_tstate );
1571
+ free_threadstate ( tstate );
1551
1572
return NULL ;
1552
1573
}
1553
1574
#endif
1554
1575
1555
1576
/* We serialize concurrent creation to protect global state. */
1556
- HEAD_LOCK (runtime );
1577
+ HEAD_LOCK (interp -> runtime );
1557
1578
1579
+ // Initialize the new thread state.
1558
1580
interp -> threads .next_unique_id += 1 ;
1559
1581
uint64_t id = interp -> threads .next_unique_id ;
1582
+ init_threadstate (tstate , interp , id , whence );
1560
1583
1561
- // Allocate the thread state and add it to the interpreter.
1584
+ // Add the new thread state to the interpreter.
1562
1585
PyThreadState * old_head = interp -> threads .head ;
1563
- if (old_head == NULL ) {
1564
- // It's the interpreter's initial thread state.
1565
- used_newtstate = 0 ;
1566
- tstate = & interp -> _initial_thread ;
1567
- }
1568
- // XXX Re-use interp->_initial_thread if not in use?
1569
- else {
1570
- // Every valid interpreter must have at least one thread.
1571
- assert (id > 1 );
1572
- assert (old_head -> prev == NULL );
1573
- used_newtstate = 1 ;
1574
- tstate = new_tstate ;
1575
- // Set to _PyThreadState_INIT.
1576
- memcpy (tstate ,
1577
- & initial ._main_interpreter ._initial_thread ,
1578
- sizeof (* tstate ));
1579
- }
1580
-
1581
- init_threadstate (tstate , interp , id , whence );
1582
1586
add_threadstate (interp , (PyThreadState * )tstate , old_head );
1583
1587
1584
- HEAD_UNLOCK (runtime );
1585
- if (!used_newtstate ) {
1586
- // Must be called with lock unlocked to avoid re-entrancy deadlock.
1587
- PyMem_RawFree (new_tstate );
1588
- }
1589
- else {
1588
+ HEAD_UNLOCK (interp -> runtime );
1590
1589
#ifdef Py_GIL_DISABLED
1590
+ if (id == 1 ) {
1591
1591
if (_Py_atomic_load_int (& interp -> gc .immortalize ) == 0 ) {
1592
1592
// Immortalize objects marked as using deferred reference counting
1593
1593
// the first time a non-main thread is created.
1594
1594
_PyGC_ImmortalizeDeferredObjects (interp );
1595
1595
}
1596
- #endif
1597
1596
}
1597
+ #endif
1598
1598
1599
1599
#ifdef Py_GIL_DISABLED
1600
1600
// Must be called with lock unlocked to avoid lock ordering deadlocks.
0 commit comments