Skip to content

Commit 627b63e

Browse files
committed
Add type-local type cache
1 parent 303043f commit 627b63e

File tree

4 files changed

+203
-14
lines changed

4 files changed

+203
-14
lines changed

Include/cpython/object.h

+3
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ struct _specialization_cache {
259259
PyObject *getitem;
260260
uint32_t getitem_version;
261261
PyObject *init;
262+
#ifdef Py_GIL_DISABLED
263+
struct local_type_cache *local_type_cache;
264+
#endif
262265
};
263266

264267
/* The *real* layout of a type object when allocated on the heap */

Include/internal/pycore_typeobject.h

+25
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,28 @@ struct type_cache {
7171
struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP];
7272
};
7373

74+
#ifdef Py_GIL_DISABLED
75+
76+
// Type attribute lookup cache which is type-specific. Only used
77+
// for heap types where we store a small additional cache in free-threaded
78+
// builds which can be accessed without any locking.
79+
#define LOCAL_TYPE_CACHE_SIZE 32
80+
#define LOCAL_TYPE_CACHE_MAX_ENTRIES 24
81+
#define LOCAL_TYPE_CACHE_PROBE 3
82+
83+
struct local_type_cache_entry {
84+
PyObject *name; // reference to exactly a str or NULL
85+
PyObject *value; // owned reference or NULL
86+
};
87+
88+
struct local_type_cache {
89+
unsigned int tp_version_tag;
90+
unsigned int cache_count;
91+
struct local_type_cache_entry entries[LOCAL_TYPE_CACHE_SIZE];
92+
};
93+
94+
#endif
95+
7496
typedef struct {
7597
PyTypeObject *type;
7698
int isbuiltin;
@@ -85,6 +107,9 @@ typedef struct {
85107
are also some diagnostic uses for the list of weakrefs,
86108
so we still keep it. */
87109
PyObject *tp_weaklist;
110+
#ifdef Py_GIL_DISABLED
111+
struct local_type_cache local_cache;
112+
#endif
88113
} managed_static_type_state;
89114

90115
#define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */

Lib/test/test_sys.py

+2
Original file line numberDiff line numberDiff line change
@@ -1731,6 +1731,7 @@ def delx(self): del self.__x
17311731
s = vsize(fmt)
17321732
check(int, s)
17331733
typeid = 'n' if support.Py_GIL_DISABLED else ''
1734+
local_type_cache = 'P' if support.Py_GIL_DISABLED else ''
17341735
# class
17351736
s = vsize(fmt + # PyTypeObject
17361737
'4P' # PyAsyncMethods
@@ -1741,6 +1742,7 @@ def delx(self): del self.__x
17411742
'7P'
17421743
'1PIP' # Specializer cache
17431744
+ typeid # heap type id (free-threaded only)
1745+
+ local_type_cache # local type cache (free-threaded only)
17441746
)
17451747
class newstyleclass(object): pass
17461748
# Separate block for PyDictKeysObject with 8 keys and 5 entries

Objects/typeobject.c

+173-14
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,9 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self,
255255
assert(state->type == NULL);
256256
state->type = self;
257257
state->isbuiltin = isbuiltin;
258+
#ifdef Py_GIL_DISABLED
259+
state->local_cache.tp_version_tag = self->tp_version_tag;
260+
#endif
258261

259262
/* state->tp_subclasses is left NULL until init_subclasses() sets it. */
260263
/* state->tp_weaklist is left NULL until insert_head() or insert_after()
@@ -290,6 +293,12 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self,
290293
assert(state->type != NULL);
291294
state->type = NULL;
292295
assert(state->tp_weaklist == NULL); // It was already cleared out.
296+
#ifdef Py_GIL_DISABLED
297+
for (Py_ssize_t i = 0; i<LOCAL_TYPE_CACHE_SIZE; i++) {
298+
Py_CLEAR(state->local_cache.entries[i].name);
299+
state->local_cache.entries[i].value = NULL;
300+
}
301+
#endif
293302

294303
(void)_Py_atomic_add_int64(
295304
&_PyRuntime.types.managed_static.types[full_index].interp_count, -1);
@@ -1021,6 +1030,33 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version)
10211030
#endif
10221031
}
10231032

1033+
static void
1034+
clear_spec_cache(PyTypeObject *type)
1035+
{
1036+
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1037+
// This field *must* be invalidated if the type is modified (see the
1038+
// comment on struct _specialization_cache):
1039+
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)type;
1040+
FT_ATOMIC_STORE_PTR_RELAXED(
1041+
heap_type->_spec_cache.getitem, NULL);
1042+
#ifdef Py_GIL_DISABLED
1043+
struct local_type_cache *cache = heap_type->_spec_cache.local_type_cache;
1044+
if (cache != NULL) {
1045+
FT_ATOMIC_STORE_PTR_RELAXED(
1046+
heap_type->_spec_cache.local_type_cache, NULL);
1047+
for (Py_ssize_t i = 0; i<LOCAL_TYPE_CACHE_SIZE; i++) {
1048+
PyObject *name = _Py_atomic_load_ptr_relaxed(&cache->entries[i].name);
1049+
if (name != NULL) {
1050+
_Py_atomic_store_ptr_release(&cache->entries[i].name, NULL);
1051+
Py_DECREF(name);
1052+
}
1053+
}
1054+
_PyMem_FreeDelayed(cache);
1055+
}
1056+
#endif
1057+
}
1058+
}
1059+
10241060
static void
10251061
type_modified_unlocked(PyTypeObject *type)
10261062
{
@@ -1083,12 +1119,7 @@ type_modified_unlocked(PyTypeObject *type)
10831119
}
10841120

10851121
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
1086-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1087-
// This field *must* be invalidated if the type is modified (see the
1088-
// comment on struct _specialization_cache):
1089-
FT_ATOMIC_STORE_PTR_RELAXED(
1090-
((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL);
1091-
}
1122+
clear_spec_cache(type);
10921123
}
10931124

10941125
void
@@ -1165,12 +1196,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
11651196
assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN));
11661197
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
11671198
type->tp_versions_used = _Py_ATTR_CACHE_UNUSED;
1168-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1169-
// This field *must* be invalidated if the type is modified (see the
1170-
// comment on struct _specialization_cache):
1171-
FT_ATOMIC_STORE_PTR_RELAXED(
1172-
((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL);
1173-
}
1199+
clear_spec_cache(type);
11741200
}
11751201

11761202
/*
@@ -5542,6 +5568,119 @@ _PyTypes_AfterFork(void)
55425568
#endif
55435569
}
55445570

5571+
#ifdef Py_GIL_DISABLED
5572+
5573+
static bool
5574+
can_cache_locally(PyTypeObject *type, PyObject *name) {
5575+
// We don't cache types in l1 for anything which is a custom get attr, it's likely
5576+
// to have many dynamic attributes (think modules and metaclasses).
5577+
// We also only cache interned or immortal strings.
5578+
return type->tp_getattro == PyObject_GenericGetAttr &&
5579+
(PyUnicode_CHECK_INTERNED(name) != SSTATE_NOT_INTERNED || _Py_IsImmortal(name));
5580+
}
5581+
5582+
static struct local_type_cache *
5583+
get_local_type_cache(PyTypeObject *type, unsigned int assigned_version)
5584+
{
5585+
unsigned long flags = FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags);
5586+
5587+
if (flags & Py_TPFLAGS_HEAPTYPE) {
5588+
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)type;
5589+
struct local_type_cache *local_cache = _Py_atomic_load_ptr_acquire(&heap_type->_spec_cache.local_type_cache);
5590+
if (local_cache == NULL && assigned_version) {
5591+
local_cache = PyMem_Calloc(1, sizeof(struct local_type_cache));
5592+
local_cache->tp_version_tag = assigned_version;
5593+
_Py_atomic_store_ptr_release(&heap_type->_spec_cache.local_type_cache, local_cache);
5594+
}
5595+
return local_cache;
5596+
} else if (flags & _Py_TPFLAGS_STATIC_BUILTIN) {
5597+
PyInterpreterState *interp = _PyInterpreterState_GET();
5598+
managed_static_type_state *state = managed_static_type_state_get(interp, type);
5599+
return &state->local_cache;
5600+
}
5601+
return NULL;
5602+
}
5603+
5604+
#define HASH_NAME(name) (((Py_ssize_t)(name)) >> LOCAL_TYPE_CACHE_PROBE)
5605+
5606+
static bool
5607+
try_local_cache_lookup(PyTypeObject *type, PyObject *name, PyObject **value, unsigned int *version)
5608+
{
5609+
if (!can_cache_locally(type, name)) {
5610+
return false;
5611+
}
5612+
5613+
struct local_type_cache *local_cache = get_local_type_cache(type, 0);
5614+
if (local_cache == NULL) {
5615+
return false;
5616+
}
5617+
5618+
Py_ssize_t index = HASH_NAME(name) % LOCAL_TYPE_CACHE_SIZE;
5619+
Py_ssize_t cur = index;
5620+
do {
5621+
struct local_type_cache_entry *entry = &local_cache->entries[cur];
5622+
PyObject *entry_name = _Py_atomic_load_ptr_acquire(&entry->name);
5623+
if (entry_name == name) {
5624+
// Value is set as maybe weakref'd, and the per-type cache never replaces
5625+
// values so we get away w/ a simple incref here.
5626+
PyObject *entry_value = _Py_atomic_load_ptr_relaxed(&entry->value);
5627+
Py_XINCREF(entry_value);
5628+
*value = entry_value;
5629+
5630+
if (version) {
5631+
*version = local_cache->tp_version_tag;
5632+
}
5633+
5634+
return true;
5635+
}
5636+
else if (entry_name == NULL) {
5637+
break;
5638+
}
5639+
cur = (cur + LOCAL_TYPE_CACHE_PROBE) % LOCAL_TYPE_CACHE_SIZE;
5640+
} while(cur != index);
5641+
return false;
5642+
}
5643+
5644+
static bool
5645+
cache_local_type_lookup(PyTypeObject *type, PyObject *name,
5646+
PyObject *res, unsigned int assigned_version)
5647+
{
5648+
if (!can_cache_locally(type, name) ||
5649+
type->tp_versions_used >= MAX_VERSIONS_PER_CLASS) {
5650+
return false;
5651+
}
5652+
5653+
struct local_type_cache *local_cache = get_local_type_cache(type, assigned_version);
5654+
if (local_cache == NULL ||
5655+
local_cache->cache_count >= LOCAL_TYPE_CACHE_MAX_ENTRIES) {
5656+
return false;
5657+
}
5658+
5659+
Py_ssize_t index = HASH_NAME(name) % LOCAL_TYPE_CACHE_SIZE;
5660+
Py_ssize_t cur = index;
5661+
do {
5662+
struct local_type_cache_entry *entry = &local_cache->entries[cur];
5663+
PyObject *entry_name = _Py_atomic_load_ptr_relaxed(&entry->name);
5664+
if (entry_name == NULL) {
5665+
if (res != NULL) {
5666+
// Reads from other threads can proceed lock-free.
5667+
_PyObject_SetMaybeWeakref(res);
5668+
}
5669+
5670+
// Value is written first, then name, so when name is read the
5671+
// value is always present.
5672+
_Py_atomic_store_ptr_relaxed(&entry->value, res);
5673+
_Py_atomic_store_ptr_release(&entry->name, Py_NewRef(name));
5674+
local_cache->cache_count++;
5675+
return true;
5676+
}
5677+
cur = (cur + LOCAL_TYPE_CACHE_PROBE) % LOCAL_TYPE_CACHE_SIZE;
5678+
} while (cur != index);
5679+
return false;
5680+
}
5681+
5682+
#endif
5683+
55455684
/* Internal API to look for a name through the MRO.
55465685
This returns a strong reference, and doesn't set an exception!
55475686
If nonzero, version is set to the value of type->tp_version at the time of
@@ -5551,13 +5690,22 @@ PyObject *
55515690
_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version)
55525691
{
55535692
PyObject *res;
5693+
5694+
#ifdef Py_GIL_DISABLED
5695+
// Free-threaded, try a completely lock-free per-type L1 cache first
5696+
if (try_local_cache_lookup(type, name, &res, version)) {
5697+
return res;
5698+
}
5699+
#endif
5700+
55545701
int error;
55555702
PyInterpreterState *interp = _PyInterpreterState_GET();
5556-
55575703
unsigned int h = MCACHE_HASH_METHOD(type, name);
55585704
struct type_cache *cache = get_type_cache();
55595705
struct type_cache_entry *entry = &cache->hashtable[h];
5706+
55605707
#ifdef Py_GIL_DISABLED
5708+
// Fall back to global L2 cache which requires sequence locks
55615709
// synchronize-with other writing threads by doing an acquire load on the sequence
55625710
while (1) {
55635711
uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
@@ -5574,6 +5722,7 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
55745722
if (version != NULL) {
55755723
*version = entry_version;
55765724
}
5725+
55775726
return value;
55785727
}
55795728
Py_XDECREF(value);
@@ -5612,12 +5761,20 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56125761

56135762
int has_version = 0;
56145763
unsigned int assigned_version = 0;
5764+
5765+
bool locally_cached = false;
56155766
BEGIN_TYPE_LOCK();
5767+
56165768
res = find_name_in_mro(type, name, &error);
56175769
if (MCACHE_CACHEABLE_NAME(name)) {
56185770
has_version = assign_version_tag(interp, type);
56195771
assigned_version = type->tp_version_tag;
56205772
}
5773+
5774+
#ifdef Py_GIL_DISABLED
5775+
locally_cached = has_version && !error &&
5776+
cache_local_type_lookup(type, name, res, assigned_version);
5777+
#endif
56215778
END_TYPE_LOCK();
56225779

56235780
/* Only put NULL results into cache if there was no error. */
@@ -5640,9 +5797,10 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56405797
return NULL;
56415798
}
56425799

5643-
if (has_version) {
5800+
if (has_version && !locally_cached) {
56445801
#if Py_GIL_DISABLED
56455802
update_cache_gil_disabled(entry, name, assigned_version, res);
5803+
56465804
#else
56475805
PyObject *old_value = update_cache(entry, name, assigned_version, res);
56485806
Py_DECREF(old_value);
@@ -6164,6 +6322,7 @@ type_dealloc(PyObject *self)
61646322
}
61656323
Py_XDECREF(et->ht_module);
61666324
PyMem_Free(et->_ht_tpname);
6325+
clear_spec_cache(type);
61676326
#ifdef Py_GIL_DISABLED
61686327
assert(et->unique_id == _Py_INVALID_UNIQUE_ID);
61696328
#endif

0 commit comments

Comments
 (0)