Skip to content

Commit 106607c

Browse files
committed
Add type-local type cache
1 parent 303043f commit 106607c

File tree

4 files changed

+206
-14
lines changed

4 files changed

+206
-14
lines changed

Include/cpython/object.h

+3
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ struct _specialization_cache {
259259
PyObject *getitem;
260260
uint32_t getitem_version;
261261
PyObject *init;
262+
#ifdef Py_GIL_DISABLED
263+
struct local_type_cache *local_type_cache;
264+
#endif
262265
};
263266

264267
/* The *real* layout of a type object when allocated on the heap */

Include/internal/pycore_typeobject.h

+25
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,28 @@ struct type_cache {
7171
struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP];
7272
};
7373

74+
#ifdef Py_GIL_DISABLED
75+
76+
// Type attribute lookup cache which is type-specific. Only used
77+
// for heap types where we store a small additional cache in free-threaded
78+
// builds which can be accessed without any locking.
79+
#define LOCAL_TYPE_CACHE_SIZE 32
80+
#define LOCAL_TYPE_CACHE_MAX_ENTRIES 24
81+
#define LOCAL_TYPE_CACHE_PROBE 3
82+
83+
struct local_type_cache_entry {
84+
PyObject *name; // reference to exactly a str or NULL
85+
PyObject *value; // owned reference or NULL
86+
};
87+
88+
struct local_type_cache {
89+
unsigned int tp_version_tag;
90+
unsigned int cache_count;
91+
struct local_type_cache_entry entries[LOCAL_TYPE_CACHE_SIZE];
92+
};
93+
94+
#endif
95+
7496
typedef struct {
7597
PyTypeObject *type;
7698
int isbuiltin;
@@ -85,6 +107,9 @@ typedef struct {
85107
are also some diagnostic uses for the list of weakrefs,
86108
so we still keep it. */
87109
PyObject *tp_weaklist;
110+
#ifdef Py_GIL_DISABLED
111+
struct local_type_cache local_cache;
112+
#endif
88113
} managed_static_type_state;
89114

90115
#define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */

Lib/test/test_sys.py

+2
Original file line numberDiff line numberDiff line change
@@ -1731,6 +1731,7 @@ def delx(self): del self.__x
17311731
s = vsize(fmt)
17321732
check(int, s)
17331733
typeid = 'n' if support.Py_GIL_DISABLED else ''
1734+
local_type_cache = 'P' if support.Py_GIL_DISABLED else ''
17341735
# class
17351736
s = vsize(fmt + # PyTypeObject
17361737
'4P' # PyAsyncMethods
@@ -1741,6 +1742,7 @@ def delx(self): del self.__x
17411742
'7P'
17421743
'1PIP' # Specializer cache
17431744
+ typeid # heap type id (free-threaded only)
1745+
+ local_type_cache # local type cache (free-threaded only)
17441746
)
17451747
class newstyleclass(object): pass
17461748
# Separate block for PyDictKeysObject with 8 keys and 5 entries

Objects/typeobject.c

+176-14
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,9 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self,
255255
assert(state->type == NULL);
256256
state->type = self;
257257
state->isbuiltin = isbuiltin;
258+
#ifdef Py_GIL_DISABLED
259+
state->local_cache.tp_version_tag = self->tp_version_tag;
260+
#endif
258261

259262
/* state->tp_subclasses is left NULL until init_subclasses() sets it. */
260263
/* state->tp_weaklist is left NULL until insert_head() or insert_after()
@@ -290,6 +293,12 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self,
290293
assert(state->type != NULL);
291294
state->type = NULL;
292295
assert(state->tp_weaklist == NULL); // It was already cleared out.
296+
#ifdef Py_GIL_DISABLED
297+
for (Py_ssize_t i = 0; i<LOCAL_TYPE_CACHE_SIZE; i++) {
298+
Py_CLEAR(state->local_cache.entries[i].name);
299+
state->local_cache.entries[i].value = NULL;
300+
}
301+
#endif
293302

294303
(void)_Py_atomic_add_int64(
295304
&_PyRuntime.types.managed_static.types[full_index].interp_count, -1);
@@ -1021,6 +1030,36 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version)
10211030
#endif
10221031
}
10231032

1033+
static void
1034+
clear_spec_cache(PyTypeObject *type)
1035+
{
1036+
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1037+
// This field *must* be invalidated if the type is modified (see the
1038+
// comment on struct _specialization_cache):
1039+
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)type;
1040+
FT_ATOMIC_STORE_PTR_RELAXED(
1041+
heap_type->_spec_cache.getitem, NULL);
1042+
#ifdef Py_GIL_DISABLED
1043+
struct local_type_cache *cache = heap_type->_spec_cache.local_type_cache;
1044+
if (cache != NULL) {
1045+
FT_ATOMIC_STORE_PTR_RELAXED(
1046+
heap_type->_spec_cache.local_type_cache, NULL);
1047+
for (Py_ssize_t i = 0; i<LOCAL_TYPE_CACHE_SIZE; i++) {
1048+
PyObject *name = _Py_atomic_load_ptr_relaxed(&cache->entries[i].name);
1049+
if (name != NULL) {
1050+
// Readers can be racing with the local type cache when a value is being replaced
1051+
// in the type, and they can try and incref it after it has been decref'd, so
1052+
// we eagerly clear these out.
1053+
_Py_atomic_store_ptr_release(&cache->entries[i].name, NULL);
1054+
Py_DECREF(name);
1055+
}
1056+
}
1057+
_PyMem_FreeDelayed(cache);
1058+
}
1059+
#endif
1060+
}
1061+
}
1062+
10241063
static void
10251064
type_modified_unlocked(PyTypeObject *type)
10261065
{
@@ -1083,12 +1122,7 @@ type_modified_unlocked(PyTypeObject *type)
10831122
}
10841123

10851124
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
1086-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1087-
// This field *must* be invalidated if the type is modified (see the
1088-
// comment on struct _specialization_cache):
1089-
FT_ATOMIC_STORE_PTR_RELAXED(
1090-
((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL);
1091-
}
1125+
clear_spec_cache(type);
10921126
}
10931127

10941128
void
@@ -1165,12 +1199,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
11651199
assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN));
11661200
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
11671201
type->tp_versions_used = _Py_ATTR_CACHE_UNUSED;
1168-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1169-
// This field *must* be invalidated if the type is modified (see the
1170-
// comment on struct _specialization_cache):
1171-
FT_ATOMIC_STORE_PTR_RELAXED(
1172-
((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL);
1173-
}
1202+
clear_spec_cache(type);
11741203
}
11751204

11761205
/*
@@ -5542,6 +5571,119 @@ _PyTypes_AfterFork(void)
55425571
#endif
55435572
}
55445573

5574+
#ifdef Py_GIL_DISABLED
5575+
5576+
static bool
5577+
can_cache_locally(PyTypeObject *type, PyObject *name) {
5578+
// We don't cache types in l1 for anything which is a custom get attr, it's likely
5579+
// to have many dynamic attributes (think modules and metaclasses).
5580+
// We also only cache interned or immortal strings.
5581+
return type->tp_getattro == PyObject_GenericGetAttr &&
5582+
(PyUnicode_CHECK_INTERNED(name) != SSTATE_NOT_INTERNED || _Py_IsImmortal(name));
5583+
}
5584+
5585+
static struct local_type_cache *
5586+
get_local_type_cache(PyTypeObject *type, unsigned int assigned_version)
5587+
{
5588+
unsigned long flags = FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags);
5589+
5590+
if (flags & Py_TPFLAGS_HEAPTYPE) {
5591+
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)type;
5592+
struct local_type_cache *local_cache = _Py_atomic_load_ptr_acquire(&heap_type->_spec_cache.local_type_cache);
5593+
if (local_cache == NULL && assigned_version) {
5594+
local_cache = PyMem_Calloc(1, sizeof(struct local_type_cache));
5595+
local_cache->tp_version_tag = assigned_version;
5596+
_Py_atomic_store_ptr_release(&heap_type->_spec_cache.local_type_cache, local_cache);
5597+
}
5598+
return local_cache;
5599+
} else if (flags & _Py_TPFLAGS_STATIC_BUILTIN) {
5600+
PyInterpreterState *interp = _PyInterpreterState_GET();
5601+
managed_static_type_state *state = managed_static_type_state_get(interp, type);
5602+
return &state->local_cache;
5603+
}
5604+
return NULL;
5605+
}
5606+
5607+
#define HASH_NAME(name) (((Py_ssize_t)(name)) >> LOCAL_TYPE_CACHE_PROBE)
5608+
5609+
static bool
5610+
try_local_cache_lookup(PyTypeObject *type, PyObject *name, PyObject **value, unsigned int *version)
5611+
{
5612+
if (!can_cache_locally(type, name)) {
5613+
return false;
5614+
}
5615+
5616+
struct local_type_cache *local_cache = get_local_type_cache(type, 0);
5617+
if (local_cache == NULL) {
5618+
return false;
5619+
}
5620+
5621+
Py_ssize_t index = HASH_NAME(name) % LOCAL_TYPE_CACHE_SIZE;
5622+
Py_ssize_t cur = index;
5623+
do {
5624+
struct local_type_cache_entry *entry = &local_cache->entries[cur];
5625+
PyObject *entry_name = _Py_atomic_load_ptr_acquire(&entry->name);
5626+
if (entry_name == name) {
5627+
// Value is set as maybe weakref'd, and the per-type cache never replaces
5628+
// values so we get away w/ a simple incref here.
5629+
PyObject *entry_value = _Py_atomic_load_ptr_relaxed(&entry->value);
5630+
Py_XINCREF(entry_value);
5631+
*value = entry_value;
5632+
5633+
if (version) {
5634+
*version = local_cache->tp_version_tag;
5635+
}
5636+
5637+
return true;
5638+
}
5639+
else if (entry_name == NULL) {
5640+
break;
5641+
}
5642+
cur = (cur + LOCAL_TYPE_CACHE_PROBE) % LOCAL_TYPE_CACHE_SIZE;
5643+
} while(cur != index);
5644+
return false;
5645+
}
5646+
5647+
static bool
5648+
cache_local_type_lookup(PyTypeObject *type, PyObject *name,
5649+
PyObject *res, unsigned int assigned_version)
5650+
{
5651+
if (!can_cache_locally(type, name) ||
5652+
type->tp_versions_used >= MAX_VERSIONS_PER_CLASS) {
5653+
return false;
5654+
}
5655+
5656+
struct local_type_cache *local_cache = get_local_type_cache(type, assigned_version);
5657+
if (local_cache == NULL ||
5658+
local_cache->cache_count >= LOCAL_TYPE_CACHE_MAX_ENTRIES) {
5659+
return false;
5660+
}
5661+
5662+
Py_ssize_t index = HASH_NAME(name) % LOCAL_TYPE_CACHE_SIZE;
5663+
Py_ssize_t cur = index;
5664+
do {
5665+
struct local_type_cache_entry *entry = &local_cache->entries[cur];
5666+
PyObject *entry_name = _Py_atomic_load_ptr_relaxed(&entry->name);
5667+
if (entry_name == NULL) {
5668+
if (res != NULL) {
5669+
// Reads from other threads can proceed lock-free.
5670+
_PyObject_SetMaybeWeakref(res);
5671+
}
5672+
5673+
// Value is written first, then name, so when name is read the
5674+
// value is always present.
5675+
_Py_atomic_store_ptr_relaxed(&entry->value, res);
5676+
_Py_atomic_store_ptr_release(&entry->name, Py_NewRef(name));
5677+
local_cache->cache_count++;
5678+
return true;
5679+
}
5680+
cur = (cur + LOCAL_TYPE_CACHE_PROBE) % LOCAL_TYPE_CACHE_SIZE;
5681+
} while (cur != index);
5682+
return false;
5683+
}
5684+
5685+
#endif
5686+
55455687
/* Internal API to look for a name through the MRO.
55465688
This returns a strong reference, and doesn't set an exception!
55475689
If nonzero, version is set to the value of type->tp_version at the time of
@@ -5551,13 +5693,22 @@ PyObject *
55515693
_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version)
55525694
{
55535695
PyObject *res;
5696+
5697+
#ifdef Py_GIL_DISABLED
5698+
// Free-threaded, try a completely lock-free per-type L1 cache first
5699+
if (try_local_cache_lookup(type, name, &res, version)) {
5700+
return res;
5701+
}
5702+
#endif
5703+
55545704
int error;
55555705
PyInterpreterState *interp = _PyInterpreterState_GET();
5556-
55575706
unsigned int h = MCACHE_HASH_METHOD(type, name);
55585707
struct type_cache *cache = get_type_cache();
55595708
struct type_cache_entry *entry = &cache->hashtable[h];
5709+
55605710
#ifdef Py_GIL_DISABLED
5711+
// Fall back to global L2 cache which requires sequence locks
55615712
// synchronize-with other writing threads by doing an acquire load on the sequence
55625713
while (1) {
55635714
uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
@@ -5574,6 +5725,7 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
55745725
if (version != NULL) {
55755726
*version = entry_version;
55765727
}
5728+
55775729
return value;
55785730
}
55795731
Py_XDECREF(value);
@@ -5612,12 +5764,20 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56125764

56135765
int has_version = 0;
56145766
unsigned int assigned_version = 0;
5767+
5768+
bool locally_cached = false;
56155769
BEGIN_TYPE_LOCK();
5770+
56165771
res = find_name_in_mro(type, name, &error);
56175772
if (MCACHE_CACHEABLE_NAME(name)) {
56185773
has_version = assign_version_tag(interp, type);
56195774
assigned_version = type->tp_version_tag;
56205775
}
5776+
5777+
#ifdef Py_GIL_DISABLED
5778+
locally_cached = has_version && !error &&
5779+
cache_local_type_lookup(type, name, res, assigned_version);
5780+
#endif
56215781
END_TYPE_LOCK();
56225782

56235783
/* Only put NULL results into cache if there was no error. */
@@ -5640,9 +5800,10 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56405800
return NULL;
56415801
}
56425802

5643-
if (has_version) {
5803+
if (has_version && !locally_cached) {
56445804
#if Py_GIL_DISABLED
56455805
update_cache_gil_disabled(entry, name, assigned_version, res);
5806+
56465807
#else
56475808
PyObject *old_value = update_cache(entry, name, assigned_version, res);
56485809
Py_DECREF(old_value);
@@ -6164,6 +6325,7 @@ type_dealloc(PyObject *self)
61646325
}
61656326
Py_XDECREF(et->ht_module);
61666327
PyMem_Free(et->_ht_tpname);
6328+
clear_spec_cache(type);
61676329
#ifdef Py_GIL_DISABLED
61686330
assert(et->unique_id == _Py_INVALID_UNIQUE_ID);
61696331
#endif

0 commit comments

Comments
 (0)