@@ -255,6 +255,9 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self,
255
255
assert (state -> type == NULL );
256
256
state -> type = self ;
257
257
state -> isbuiltin = isbuiltin ;
258
+ #ifdef Py_GIL_DISABLED
259
+ state -> local_cache .tp_version_tag = self -> tp_version_tag ;
260
+ #endif
258
261
259
262
/* state->tp_subclasses is left NULL until init_subclasses() sets it. */
260
263
/* state->tp_weaklist is left NULL until insert_head() or insert_after()
@@ -290,6 +293,12 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self,
290
293
assert (state -> type != NULL );
291
294
state -> type = NULL ;
292
295
assert (state -> tp_weaklist == NULL ); // It was already cleared out.
296
+ #ifdef Py_GIL_DISABLED
297
+ for (Py_ssize_t i = 0 ; i < LOCAL_TYPE_CACHE_SIZE ; i ++ ) {
298
+ Py_CLEAR (state -> local_cache .entries [i ].name );
299
+ state -> local_cache .entries [i ].value = NULL ;
300
+ }
301
+ #endif
293
302
294
303
(void )_Py_atomic_add_int64 (
295
304
& _PyRuntime .types .managed_static .types [full_index ].interp_count , -1 );
@@ -1021,6 +1030,36 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version)
1021
1030
#endif
1022
1031
}
1023
1032
1033
+ static void
1034
+ clear_spec_cache (PyTypeObject * type )
1035
+ {
1036
+ if (PyType_HasFeature (type , Py_TPFLAGS_HEAPTYPE )) {
1037
+ // This field *must* be invalidated if the type is modified (see the
1038
+ // comment on struct _specialization_cache):
1039
+ PyHeapTypeObject * heap_type = (PyHeapTypeObject * )type ;
1040
+ FT_ATOMIC_STORE_PTR_RELAXED (
1041
+ heap_type -> _spec_cache .getitem , NULL );
1042
+ #ifdef Py_GIL_DISABLED
1043
+ struct local_type_cache * cache = heap_type -> _spec_cache .local_type_cache ;
1044
+ if (cache != NULL ) {
1045
+ FT_ATOMIC_STORE_PTR_RELAXED (
1046
+ heap_type -> _spec_cache .local_type_cache , NULL );
1047
+ for (Py_ssize_t i = 0 ; i < LOCAL_TYPE_CACHE_SIZE ; i ++ ) {
1048
+ PyObject * name = _Py_atomic_load_ptr_relaxed (& cache -> entries [i ].name );
1049
+ if (name != NULL ) {
1050
+ // Readers can be racing with the local type cache when a value is being replaced
1051
+ // in the type, and they can try and incref it after it has been decref'd, so
1052
+ // we eagerly clear these out.
1053
+ _Py_atomic_store_ptr_release (& cache -> entries [i ].name , NULL );
1054
+ Py_DECREF (name );
1055
+ }
1056
+ }
1057
+ _PyMem_FreeDelayed (cache );
1058
+ }
1059
+ #endif
1060
+ }
1061
+ }
1062
+
1024
1063
static void
1025
1064
type_modified_unlocked (PyTypeObject * type )
1026
1065
{
@@ -1083,12 +1122,7 @@ type_modified_unlocked(PyTypeObject *type)
1083
1122
}
1084
1123
1085
1124
set_version_unlocked (type , 0 ); /* 0 is not a valid version tag */
1086
- if (PyType_HasFeature (type , Py_TPFLAGS_HEAPTYPE )) {
1087
- // This field *must* be invalidated if the type is modified (see the
1088
- // comment on struct _specialization_cache):
1089
- FT_ATOMIC_STORE_PTR_RELAXED (
1090
- ((PyHeapTypeObject * )type )-> _spec_cache .getitem , NULL );
1091
- }
1125
+ clear_spec_cache (type );
1092
1126
}
1093
1127
1094
1128
void
@@ -1165,12 +1199,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
1165
1199
assert (!(type -> tp_flags & _Py_TPFLAGS_STATIC_BUILTIN ));
1166
1200
set_version_unlocked (type , 0 ); /* 0 is not a valid version tag */
1167
1201
type -> tp_versions_used = _Py_ATTR_CACHE_UNUSED ;
1168
- if (PyType_HasFeature (type , Py_TPFLAGS_HEAPTYPE )) {
1169
- // This field *must* be invalidated if the type is modified (see the
1170
- // comment on struct _specialization_cache):
1171
- FT_ATOMIC_STORE_PTR_RELAXED (
1172
- ((PyHeapTypeObject * )type )-> _spec_cache .getitem , NULL );
1173
- }
1202
+ clear_spec_cache (type );
1174
1203
}
1175
1204
1176
1205
/*
@@ -5542,6 +5571,119 @@ _PyTypes_AfterFork(void)
5542
5571
#endif
5543
5572
}
5544
5573
5574
+ #ifdef Py_GIL_DISABLED
5575
+
5576
+ static bool
5577
+ can_cache_locally (PyTypeObject * type , PyObject * name ) {
5578
+ // We don't cache types in l1 for anything which is a custom get attr, it's likely
5579
+ // to have many dynamic attributes (think modules and metaclasses).
5580
+ // We also only cache interned or immortal strings.
5581
+ return type -> tp_getattro == PyObject_GenericGetAttr &&
5582
+ (PyUnicode_CHECK_INTERNED (name ) != SSTATE_NOT_INTERNED || _Py_IsImmortal (name ));
5583
+ }
5584
+
5585
+ static struct local_type_cache *
5586
+ get_local_type_cache (PyTypeObject * type , unsigned int assigned_version )
5587
+ {
5588
+ unsigned long flags = FT_ATOMIC_LOAD_ULONG_RELAXED (type -> tp_flags );
5589
+
5590
+ if (flags & Py_TPFLAGS_HEAPTYPE ) {
5591
+ PyHeapTypeObject * heap_type = (PyHeapTypeObject * )type ;
5592
+ struct local_type_cache * local_cache = _Py_atomic_load_ptr_acquire (& heap_type -> _spec_cache .local_type_cache );
5593
+ if (local_cache == NULL && assigned_version ) {
5594
+ local_cache = PyMem_Calloc (1 , sizeof (struct local_type_cache ));
5595
+ local_cache -> tp_version_tag = assigned_version ;
5596
+ _Py_atomic_store_ptr_release (& heap_type -> _spec_cache .local_type_cache , local_cache );
5597
+ }
5598
+ return local_cache ;
5599
+ } else if (flags & _Py_TPFLAGS_STATIC_BUILTIN ) {
5600
+ PyInterpreterState * interp = _PyInterpreterState_GET ();
5601
+ managed_static_type_state * state = managed_static_type_state_get (interp , type );
5602
+ return & state -> local_cache ;
5603
+ }
5604
+ return NULL ;
5605
+ }
5606
+
5607
+ #define HASH_NAME (name ) (((Py_ssize_t)(name)) >> LOCAL_TYPE_CACHE_PROBE)
5608
+
5609
+ static bool
5610
+ try_local_cache_lookup (PyTypeObject * type , PyObject * name , PyObject * * value , unsigned int * version )
5611
+ {
5612
+ if (!can_cache_locally (type , name )) {
5613
+ return false;
5614
+ }
5615
+
5616
+ struct local_type_cache * local_cache = get_local_type_cache (type , 0 );
5617
+ if (local_cache == NULL ) {
5618
+ return false;
5619
+ }
5620
+
5621
+ Py_ssize_t index = HASH_NAME (name ) % LOCAL_TYPE_CACHE_SIZE ;
5622
+ Py_ssize_t cur = index ;
5623
+ do {
5624
+ struct local_type_cache_entry * entry = & local_cache -> entries [cur ];
5625
+ PyObject * entry_name = _Py_atomic_load_ptr_acquire (& entry -> name );
5626
+ if (entry_name == name ) {
5627
+ // Value is set as maybe weakref'd, and the per-type cache never replaces
5628
+ // values so we get away w/ a simple incref here.
5629
+ PyObject * entry_value = _Py_atomic_load_ptr_relaxed (& entry -> value );
5630
+ Py_XINCREF (entry_value );
5631
+ * value = entry_value ;
5632
+
5633
+ if (version ) {
5634
+ * version = local_cache -> tp_version_tag ;
5635
+ }
5636
+
5637
+ return true;
5638
+ }
5639
+ else if (entry_name == NULL ) {
5640
+ break ;
5641
+ }
5642
+ cur = (cur + LOCAL_TYPE_CACHE_PROBE ) % LOCAL_TYPE_CACHE_SIZE ;
5643
+ } while (cur != index );
5644
+ return false;
5645
+ }
5646
+
5647
+ static bool
5648
+ cache_local_type_lookup (PyTypeObject * type , PyObject * name ,
5649
+ PyObject * res , unsigned int assigned_version )
5650
+ {
5651
+ if (!can_cache_locally (type , name ) ||
5652
+ type -> tp_versions_used >= MAX_VERSIONS_PER_CLASS ) {
5653
+ return false;
5654
+ }
5655
+
5656
+ struct local_type_cache * local_cache = get_local_type_cache (type , assigned_version );
5657
+ if (local_cache == NULL ||
5658
+ local_cache -> cache_count >= LOCAL_TYPE_CACHE_MAX_ENTRIES ) {
5659
+ return false;
5660
+ }
5661
+
5662
+ Py_ssize_t index = HASH_NAME (name ) % LOCAL_TYPE_CACHE_SIZE ;
5663
+ Py_ssize_t cur = index ;
5664
+ do {
5665
+ struct local_type_cache_entry * entry = & local_cache -> entries [cur ];
5666
+ PyObject * entry_name = _Py_atomic_load_ptr_relaxed (& entry -> name );
5667
+ if (entry_name == NULL ) {
5668
+ if (res != NULL ) {
5669
+ // Reads from other threads can proceed lock-free.
5670
+ _PyObject_SetMaybeWeakref (res );
5671
+ }
5672
+
5673
+ // Value is written first, then name, so when name is read the
5674
+ // value is always present.
5675
+ _Py_atomic_store_ptr_relaxed (& entry -> value , res );
5676
+ _Py_atomic_store_ptr_release (& entry -> name , Py_NewRef (name ));
5677
+ local_cache -> cache_count ++ ;
5678
+ return true;
5679
+ }
5680
+ cur = (cur + LOCAL_TYPE_CACHE_PROBE ) % LOCAL_TYPE_CACHE_SIZE ;
5681
+ } while (cur != index );
5682
+ return false;
5683
+ }
5684
+
5685
+ #endif
5686
+
5545
5687
/* Internal API to look for a name through the MRO.
5546
5688
This returns a strong reference, and doesn't set an exception!
5547
5689
If nonzero, version is set to the value of type->tp_version at the time of
@@ -5551,13 +5693,22 @@ PyObject *
5551
5693
_PyType_LookupRefAndVersion (PyTypeObject * type , PyObject * name , unsigned int * version )
5552
5694
{
5553
5695
PyObject * res ;
5696
+
5697
+ #ifdef Py_GIL_DISABLED
5698
+ // Free-threaded, try a completely lock-free per-type L1 cache first
5699
+ if (try_local_cache_lookup (type , name , & res , version )) {
5700
+ return res ;
5701
+ }
5702
+ #endif
5703
+
5554
5704
int error ;
5555
5705
PyInterpreterState * interp = _PyInterpreterState_GET ();
5556
-
5557
5706
unsigned int h = MCACHE_HASH_METHOD (type , name );
5558
5707
struct type_cache * cache = get_type_cache ();
5559
5708
struct type_cache_entry * entry = & cache -> hashtable [h ];
5709
+
5560
5710
#ifdef Py_GIL_DISABLED
5711
+ // Fall back to global L2 cache which requires sequence locks
5561
5712
// synchronize-with other writing threads by doing an acquire load on the sequence
5562
5713
while (1 ) {
5563
5714
uint32_t sequence = _PySeqLock_BeginRead (& entry -> sequence );
@@ -5574,6 +5725,7 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
5574
5725
if (version != NULL ) {
5575
5726
* version = entry_version ;
5576
5727
}
5728
+
5577
5729
return value ;
5578
5730
}
5579
5731
Py_XDECREF (value );
@@ -5612,12 +5764,20 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
5612
5764
5613
5765
int has_version = 0 ;
5614
5766
unsigned int assigned_version = 0 ;
5767
+
5768
+ bool locally_cached = false;
5615
5769
BEGIN_TYPE_LOCK ();
5770
+
5616
5771
res = find_name_in_mro (type , name , & error );
5617
5772
if (MCACHE_CACHEABLE_NAME (name )) {
5618
5773
has_version = assign_version_tag (interp , type );
5619
5774
assigned_version = type -> tp_version_tag ;
5620
5775
}
5776
+
5777
+ #ifdef Py_GIL_DISABLED
5778
+ locally_cached = has_version && !error &&
5779
+ cache_local_type_lookup (type , name , res , assigned_version );
5780
+ #endif
5621
5781
END_TYPE_LOCK ();
5622
5782
5623
5783
/* Only put NULL results into cache if there was no error. */
@@ -5640,9 +5800,10 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
5640
5800
return NULL ;
5641
5801
}
5642
5802
5643
- if (has_version ) {
5803
+ if (has_version && ! locally_cached ) {
5644
5804
#if Py_GIL_DISABLED
5645
5805
update_cache_gil_disabled (entry , name , assigned_version , res );
5806
+
5646
5807
#else
5647
5808
PyObject * old_value = update_cache (entry , name , assigned_version , res );
5648
5809
Py_DECREF (old_value );
@@ -6164,6 +6325,7 @@ type_dealloc(PyObject *self)
6164
6325
}
6165
6326
Py_XDECREF (et -> ht_module );
6166
6327
PyMem_Free (et -> _ht_tpname );
6328
+ clear_spec_cache (type );
6167
6329
#ifdef Py_GIL_DISABLED
6168
6330
assert (et -> unique_id == _Py_INVALID_UNIQUE_ID );
6169
6331
#endif
0 commit comments