Skip to content

Commit a99d214

Browse files
committed
pythongh-115103: Implement delayed memory reclamation (QSBR)
This adds a safe memory reclamation scheme based on FreeBSD's "GUS" and quiescent state based reclamation (QSBR). The API provides a mechanism for callers to detect when it is safe to free memory that may be concurrently accessed by readers.
1 parent fedbf77 commit a99d214

18 files changed

+565
-0
lines changed

Doc/license.rst

+32
Original file line numberDiff line numberDiff line change
@@ -1095,3 +1095,35 @@ which is distributed under the MIT license::
10951095
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
10961096
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
10971097
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1098+
1099+
1100+
Global Unbounded Sequences (GUS)
1101+
--------------------------------
1102+
1103+
The file :file:`Python/qsbr.c` is adapted from FreeBSD's "Global Unbounded
1104+
Sequences" safe memory reclamation scheme in
1105+
`subr_smr.c <https://github.com/freebsd/freebsd-src/blob/main/sys/kern/subr_smr.c>`_.
1106+
The file is distributed under the 2-Clause BSD License::
1107+
1108+
Copyright (c) 2019,2020 Jeffrey Roberson <[email protected]>
1109+
1110+
Redistribution and use in source and binary forms, with or without
1111+
modification, are permitted provided that the following conditions
1112+
are met:
1113+
1. Redistributions of source code must retain the above copyright
1114+
notice unmodified, this list of conditions, and the following
1115+
disclaimer.
1116+
2. Redistributions in binary form must reproduce the above copyright
1117+
notice, this list of conditions and the following disclaimer in the
1118+
documentation and/or other materials provided with the distribution.
1119+
1120+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1121+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1122+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1123+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
1124+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
1125+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1126+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1127+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1128+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
1129+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Include/cpython/pyatomic.h

+6
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,12 @@ _Py_atomic_store_int_release(int *obj, int value);
469469
static inline int
470470
_Py_atomic_load_int_acquire(const int *obj);
471471

472+
static inline void
473+
_Py_atomic_store_uint64_release(uint64_t *obj, uint64_t value);
474+
475+
static inline uint64_t
476+
_Py_atomic_load_uint64_acquire(const uint64_t *obj);
477+
472478

473479
// --- _Py_atomic_fence ------------------------------------------------------
474480

Include/cpython/pyatomic_gcc.h

+8
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,14 @@ static inline int
495495
_Py_atomic_load_int_acquire(const int *obj)
496496
{ return __atomic_load_n(obj, __ATOMIC_ACQUIRE); }
497497

498+
static inline void
499+
_Py_atomic_store_uint64_release(uint64_t *obj, uint64_t value)
500+
{ __atomic_store_n(obj, value, __ATOMIC_RELEASE); }
501+
502+
static inline uint64_t
503+
_Py_atomic_load_uint64_acquire(const uint64_t *obj)
504+
{ return __atomic_load_n(obj, __ATOMIC_ACQUIRE); }
505+
498506

499507
// --- _Py_atomic_fence ------------------------------------------------------
500508

Include/cpython/pyatomic_msc.h

+26
Original file line numberDiff line numberDiff line change
@@ -938,6 +938,32 @@ _Py_atomic_load_int_acquire(const int *obj)
938938
#endif
939939
}
940940

941+
static inline void
942+
_Py_atomic_store_uint64_release(uint64_t *obj, uint64_t value)
943+
{
944+
#if defined(_M_X64) || defined(_M_IX86)
945+
*(uint64_t volatile *)obj = value;
946+
#elif defined(_M_ARM64)
947+
_Py_atomic_ASSERT_ARG_TYPE(unsigned __int64);
948+
__stlr64((unsigned __int64 volatile *)obj, (unsigned __int64)value);
949+
#else
950+
# error "no implementation of _Py_atomic_store_uint64_release"
951+
#endif
952+
}
953+
954+
static inline uint64_t
955+
_Py_atomic_load_uint64_acquire(const uint64_t *obj)
956+
{
957+
#if defined(_M_X64) || defined(_M_IX86)
958+
return *(uint64_t volatile *)obj;
959+
#elif defined(_M_ARM64)
960+
_Py_atomic_ASSERT_ARG_TYPE(__int64);
961+
return (uint64_t)__ldar64((unsigned __int64 volatile *)obj);
962+
#else
963+
# error "no implementation of _Py_atomic_load_uint64_acquire"
964+
#endif
965+
}
966+
941967

942968
// --- _Py_atomic_fence ------------------------------------------------------
943969

Include/cpython/pyatomic_std.h

+16
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,22 @@ _Py_atomic_load_int_acquire(const int *obj)
870870
memory_order_acquire);
871871
}
872872

873+
static inline void
874+
_Py_atomic_store_uint64_release(uint64_t *obj, uint64_t value)
875+
{
876+
_Py_USING_STD;
877+
atomic_store_explicit((_Atomic(uint64_t)*)obj, value,
878+
memory_order_release);
879+
}
880+
881+
static inline uint64_t
882+
_Py_atomic_load_uint64_acquire(const uint64_t *obj)
883+
{
884+
_Py_USING_STD;
885+
return atomic_load_explicit((const _Atomic(uint64_t)*)obj,
886+
memory_order_acquire);
887+
}
888+
873889

874890

875891
// --- _Py_atomic_fence ------------------------------------------------------

Include/internal/pycore_interp.h

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ extern "C" {
3131
#include "pycore_mimalloc.h" // struct _mimalloc_interp_state
3232
#include "pycore_object_state.h" // struct _py_object_state
3333
#include "pycore_obmalloc.h" // struct _obmalloc_state
34+
#include "pycore_qsbr.h" // struct _qsbr_state
3435
#include "pycore_tstate.h" // _PyThreadStateImpl
3536
#include "pycore_tuple.h" // struct _Py_tuple_state
3637
#include "pycore_typeobject.h" // struct types_state
@@ -198,6 +199,7 @@ struct _is {
198199
struct _warnings_runtime_state warnings;
199200
struct atexit_state atexit;
200201
struct _stoptheworld_state stoptheworld;
202+
struct _qsbr_shared qsbr;
201203

202204
#if defined(Py_GIL_DISABLED)
203205
struct _mimalloc_interp_state mimalloc;

Include/internal/pycore_qsbr.h

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
// The QSBR APIs (quiescent state-based reclamation) provide a mechanism for
2+
// the free-threaded build to safely reclaim memory when there may be
3+
// concurrent accesses.
4+
//
5+
// Many operations in the free-threaded build are protected by locks. However,
6+
// in some cases, we want to allow reads to happen concurrently with updates.
7+
// In this case, we need to delay freeing ("reclaiming") any memory that may be
8+
// concurrently accessed by a reader. The QSBR APIs provide a way to do this.
9+
#ifndef Py_INTERNAL_QSBR_H
10+
#define Py_INTERNAL_QSBR_H
11+
12+
#include <stdbool.h>
13+
#include <stdint.h>
14+
#include "pycore_lock.h" // PyMutex
15+
16+
#ifdef __cplusplus
17+
extern "C" {
18+
#endif
19+
20+
#ifndef Py_BUILD_CORE
21+
# error "this header requires Py_BUILD_CORE define"
22+
#endif
23+
24+
struct _qsbr_shared;
25+
struct _PyThreadStateImpl; // forward declare to avoid circular dependency
26+
27+
// Per-thread state
28+
struct _qsbr_thread_state {
29+
// Last observed write sequence (or 0 if detached)
30+
uint64_t seq;
31+
32+
// Shared (per-interpreter) QSBR state
33+
struct _qsbr_shared *shared;
34+
35+
// Thread state (or NULL)
36+
PyThreadState *tstate;
37+
38+
// Used to defer advancing write sequence a fixed number of times
39+
int deferrals;
40+
41+
// Is this thread state allocated?
42+
bool allocated;
43+
struct _qsbr_thread_state *freelist_next;
44+
};
45+
46+
// Padding to avoid false sharing
47+
struct _qsbr_pad {
48+
struct _qsbr_thread_state qsbr;
49+
char __padding[64 - sizeof(struct _qsbr_thread_state)];
50+
};
51+
52+
// Per-interpreter state
53+
struct _qsbr_shared {
54+
// Write sequence: always odd, incremented by two
55+
uint64_t wr_seq;
56+
57+
// Minimum observed read sequence of all QSBR thread states
58+
uint64_t rd_seq;
59+
60+
// Array of QSBR thread states.
61+
struct _qsbr_pad *array;
62+
Py_ssize_t size;
63+
64+
// Freelist of unused _qsbr_thread_states (protected by mutex)
65+
PyMutex mutex;
66+
struct _qsbr_thread_state *freelist;
67+
};
68+
69+
static inline uint64_t
70+
_Py_qsbr_shared_current(struct _qsbr_shared *shared)
71+
{
72+
return _Py_atomic_load_uint64_acquire(&shared->wr_seq);
73+
}
74+
75+
// Reports a quiescent state: the caller no longer holds any pointer to shared
76+
// data not protected by locks or reference counts.
77+
static inline void
78+
_Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr)
79+
{
80+
uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
81+
_Py_atomic_store_uint64_release(&qsbr->seq, seq);
82+
}
83+
84+
// Advance the write sequence and return the new goal. This should be called
85+
// after data is removed. The returned goal is used with `_Py_qsbr_poll()` to
86+
// determine when it is safe to reclaim (free) the memory.
87+
extern uint64_t
88+
_Py_qsbr_advance(struct _qsbr_shared *shared);
89+
90+
// Batches requests to advance the write sequence. This advances the write
91+
// sequence every N calls, which reduces overhead but increases time to
92+
// reclamation. Returns the new goal.
93+
extern uint64_t
94+
_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr);
95+
96+
// Have the read sequences advanced to the given goal? If this returns true,
97+
// it safe to reclaim any memory tagged with the goal (or earlier goal).
98+
extern bool
99+
_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal);
100+
101+
// Called when thread attaches to interpreter
102+
extern void
103+
_Py_qsbr_attach(struct _qsbr_thread_state *qsbr);
104+
105+
// Called when thread detaches from interpreter
106+
extern void
107+
_Py_qsbr_detach(struct _qsbr_thread_state *qsbr);
108+
109+
// Reserves (allocates) a QSBR state and returns its index.
110+
extern Py_ssize_t
111+
_Py_qsbr_reserve(PyInterpreterState *interp);
112+
113+
// Associates a PyThreadState with the QSBR state at the given index
114+
extern void
115+
_Py_qsbr_register(struct _PyThreadStateImpl *tstate,
116+
PyInterpreterState *interp, Py_ssize_t index);
117+
118+
// Disassociates a PyThreadState from the QSBR state and frees the QSBR state.
119+
extern void
120+
_Py_qsbr_unregister(struct _PyThreadStateImpl *tstate);
121+
122+
extern void
123+
_Py_qsbr_fini(PyInterpreterState *interp);
124+
125+
extern void
126+
_Py_qsbr_after_fork(struct _PyThreadStateImpl *tstate);
127+
128+
#ifdef __cplusplus
129+
}
130+
#endif
131+
#endif /* !Py_INTERNAL_QSBR_H */

Include/internal/pycore_runtime_init.h

+4
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ extern PyTypeObject _PyExc_MemoryError;
169169
{ .threshold = 10, }, \
170170
}, \
171171
}, \
172+
.qsbr = { \
173+
.wr_seq = 1, \
174+
.rd_seq = 1, \
175+
}, \
172176
.object_state = _py_object_state_INIT(INTERP), \
173177
.dtoa = _dtoa_state_INIT(&(INTERP)), \
174178
.dict_state = _dict_state_INIT, \

Include/internal/pycore_tstate.h

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ extern "C" {
1010

1111
#include "pycore_freelist.h" // struct _Py_freelist_state
1212
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
13+
#include "pycore_qsbr.h" // struct qsbr
1314

1415

1516
// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
@@ -20,6 +21,7 @@ typedef struct _PyThreadStateImpl {
2021
PyThreadState base;
2122

2223
#ifdef Py_GIL_DISABLED
24+
struct _qsbr_thread_state *qsbr;
2325
struct _mimalloc_thread_state mimalloc;
2426
struct _Py_freelist_state freelist_state;
2527
#endif

Makefile.pre.in

+2
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ PYTHON_OBJS= \
455455
Python/pystate.o \
456456
Python/pythonrun.o \
457457
Python/pytime.o \
458+
Python/qsbr.o \
458459
Python/bootstrap_hash.o \
459460
Python/specialize.o \
460461
Python/structmember.o \
@@ -1158,6 +1159,7 @@ PYTHON_HEADERS= \
11581159
$(srcdir)/Include/internal/pycore_pystats.h \
11591160
$(srcdir)/Include/internal/pycore_pythonrun.h \
11601161
$(srcdir)/Include/internal/pycore_pythread.h \
1162+
$(srcdir)/Include/internal/pycore_qsbr.h \
11611163
$(srcdir)/Include/internal/pycore_range.h \
11621164
$(srcdir)/Include/internal/pycore_runtime.h \
11631165
$(srcdir)/Include/internal/pycore_runtime_init.h \

Modules/posixmodule.c

+4
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,10 @@ PyOS_AfterFork_Child(void)
637637
tstate->native_thread_id = PyThread_get_thread_native_id();
638638
#endif
639639

640+
#ifdef Py_GIL_DISABLED
641+
_Py_qsbr_after_fork((_PyThreadStateImpl *)tstate);
642+
#endif
643+
640644
status = _PyEval_ReInitThreads(tstate);
641645
if (_PyStatus_EXCEPTION(status)) {
642646
goto fatal_error;

PCbuild/_freeze_module.vcxproj

+1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@
252252
<ClCompile Include="..\Python\pythonrun.c" />
253253
<ClCompile Include="..\Python\Python-tokenize.c" />
254254
<ClCompile Include="..\Python\pytime.c" />
255+
<ClCompile Include="..\Python\qsbr.c" />
255256
<ClCompile Include="..\Python\specialize.c" />
256257
<ClCompile Include="..\Python\structmember.c" />
257258
<ClCompile Include="..\Python\suggestions.c" />

PCbuild/_freeze_module.vcxproj.filters

+3
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,9 @@
373373
<ClCompile Include="..\Python\pytime.c">
374374
<Filter>Source Files</Filter>
375375
</ClCompile>
376+
<ClCompile Include="..\Python\qsbr.c">
377+
<Filter>Source Files</Filter>
378+
</ClCompile>
376379
<ClCompile Include="..\Objects\rangeobject.c">
377380
<Filter>Source Files</Filter>
378381
</ClCompile>

PCbuild/pythoncore.vcxproj

+2
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@
274274
<ClInclude Include="..\Include\internal\pycore_pystats.h" />
275275
<ClInclude Include="..\Include\internal\pycore_pythonrun.h" />
276276
<ClInclude Include="..\Include\internal\pycore_pythread.h" />
277+
<ClInclude Include="..\Include\internal\pycore_qsbr.h" />
277278
<ClInclude Include="..\Include\internal\pycore_range.h" />
278279
<ClInclude Include="..\Include\internal\pycore_runtime.h" />
279280
<ClInclude Include="..\Include\internal\pycore_runtime_init.h" />
@@ -611,6 +612,7 @@
611612
<ClCompile Include="..\Python\pystrcmp.c" />
612613
<ClCompile Include="..\Python\pystrhex.c" />
613614
<ClCompile Include="..\Python\pystrtod.c" />
615+
<ClCompile Include="..\Python\qsbr.c" />
614616
<ClCompile Include="..\Python\dtoa.c" />
615617
<ClCompile Include="..\Python\Python-ast.c" />
616618
<ClCompile Include="..\Python\Python-tokenize.c" />

PCbuild/pythoncore.vcxproj.filters

+6
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,9 @@
747747
<ClInclude Include="..\Include\internal\pycore_pythread.h">
748748
<Filter>Include\internal</Filter>
749749
</ClInclude>
750+
<ClInclude Include="..\Include\internal\pycore_qsbr.h">
751+
<Filter>Include\internal</Filter>
752+
</ClInclude>
750753
<ClInclude Include="..\Include\internal\pycore_range.h">
751754
<Filter>Include\internal</Filter>
752755
</ClInclude>
@@ -1412,6 +1415,9 @@
14121415
<ClCompile Include="..\Python\pystrtod.c">
14131416
<Filter>Python</Filter>
14141417
</ClCompile>
1418+
<ClCompile Include="..\Python\qsbr.c">
1419+
<Filter>Python</Filter>
1420+
</ClCompile>
14151421
<ClCompile Include="..\Python\dtoa.c">
14161422
<Filter>Python</Filter>
14171423
</ClCompile>

Python/ceval_macros.h

+7
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@
8686
#define PRE_DISPATCH_GOTO() ((void)0)
8787
#endif
8888

89+
#ifdef Py_GIL_DISABLED
90+
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
91+
#else
92+
#define QSBR_QUIESCENT_STATE(tstate)
93+
#endif
94+
8995

9096
/* Do interpreter dispatch accounting for tracing and instrumentation */
9197
#define DISPATCH() \
@@ -117,6 +123,7 @@
117123

118124
#define CHECK_EVAL_BREAKER() \
119125
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
126+
QSBR_QUIESCENT_STATE(tstate); \
120127
if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & _PY_EVAL_EVENTS_MASK) { \
121128
if (_Py_HandlePending(tstate) != 0) { \
122129
GOTO_ERROR(error); \

0 commit comments

Comments
 (0)