|
| 1 | +// The QSBR APIs (quiescent state-based reclamation) provide a mechanism for |
| 2 | +// the free-threaded build to safely reclaim memory when there may be |
| 3 | +// concurrent accesses. |
| 4 | +// |
| 5 | +// Many operations in the free-threaded build are protected by locks. However, |
| 6 | +// in some cases, we want to allow reads to happen concurrently with updates. |
| 7 | +// In this case, we need to delay freeing ("reclaiming") any memory that may be |
| 8 | +// concurrently accessed by a reader. The QSBR APIs provide a way to do this. |
| 9 | +#ifndef Py_INTERNAL_QSBR_H |
| 10 | +#define Py_INTERNAL_QSBR_H |
| 11 | + |
| 12 | +#include <stdbool.h> |
| 13 | +#include <stdint.h> |
| 14 | +#include "pycore_lock.h" // PyMutex |
| 15 | + |
| 16 | +#ifdef __cplusplus |
| 17 | +extern "C" { |
| 18 | +#endif |
| 19 | + |
| 20 | +#ifndef Py_BUILD_CORE |
| 21 | +# error "this header requires Py_BUILD_CORE define" |
| 22 | +#endif |
| 23 | + |
| 24 | +struct _qsbr_shared; |
| 25 | +struct _PyThreadStateImpl; // forward declare to avoid circular dependency |
| 26 | + |
| 27 | +// Per-thread state |
| 28 | +struct _qsbr_thread_state { |
| 29 | + // Last observed write sequence (or 0 if detached) |
| 30 | + uint64_t seq; |
| 31 | + |
| 32 | + // Shared (per-interpreter) QSBR state |
| 33 | + struct _qsbr_shared *shared; |
| 34 | + |
| 35 | + // Thread state (or NULL) |
| 36 | + PyThreadState *tstate; |
| 37 | + |
| 38 | + // Used to defer advancing write sequence a fixed number of times |
| 39 | + int deferrals; |
| 40 | + |
| 41 | + // Is this thread state allocated? |
| 42 | + bool allocated; |
| 43 | + struct _qsbr_thread_state *freelist_next; |
| 44 | +}; |
| 45 | + |
| 46 | +// Padding to avoid false sharing |
| 47 | +struct _qsbr_pad { |
| 48 | + struct _qsbr_thread_state qsbr; |
| 49 | + char __padding[64 - sizeof(struct _qsbr_thread_state)]; |
| 50 | +}; |
| 51 | + |
| 52 | +// Per-interpreter state |
| 53 | +struct _qsbr_shared { |
| 54 | + // Write sequence: always odd, incremented by two |
| 55 | + uint64_t wr_seq; |
| 56 | + |
| 57 | + // Minimum observed read sequence of all QSBR thread states |
| 58 | + uint64_t rd_seq; |
| 59 | + |
| 60 | + // Array of QSBR thread states. |
| 61 | + struct _qsbr_pad *array; |
| 62 | + Py_ssize_t size; |
| 63 | + |
| 64 | + // Freelist of unused _qsbr_thread_states (protected by mutex) |
| 65 | + PyMutex mutex; |
| 66 | + struct _qsbr_thread_state *freelist; |
| 67 | +}; |
| 68 | + |
| 69 | +static inline uint64_t |
| 70 | +_Py_qsbr_shared_current(struct _qsbr_shared *shared) |
| 71 | +{ |
| 72 | + return _Py_atomic_load_uint64_acquire(&shared->wr_seq); |
| 73 | +} |
| 74 | + |
| 75 | +// Reports a quiescent state: the caller no longer holds any pointer to shared |
| 76 | +// data not protected by locks or reference counts. |
| 77 | +static inline void |
| 78 | +_Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr) |
| 79 | +{ |
| 80 | + uint64_t seq = _Py_qsbr_shared_current(qsbr->shared); |
| 81 | + _Py_atomic_store_uint64_release(&qsbr->seq, seq); |
| 82 | +} |
| 83 | + |
| 84 | +// Advance the write sequence and return the new goal. This should be called |
| 85 | +// after data is removed. The returned goal is used with `_Py_qsbr_poll()` to |
| 86 | +// determine when it is safe to reclaim (free) the memory. |
| 87 | +extern uint64_t |
| 88 | +_Py_qsbr_advance(struct _qsbr_shared *shared); |
| 89 | + |
| 90 | +// Batches requests to advance the write sequence. This advances the write |
| 91 | +// sequence every N calls, which reduces overhead but increases time to |
| 92 | +// reclamation. Returns the new goal. |
| 93 | +extern uint64_t |
| 94 | +_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr); |
| 95 | + |
| 96 | +// Have the read sequences advanced to the given goal? If this returns true, |
| 97 | +// it safe to reclaim any memory tagged with the goal (or earlier goal). |
| 98 | +extern bool |
| 99 | +_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal); |
| 100 | + |
| 101 | +// Called when thread attaches to interpreter |
| 102 | +extern void |
| 103 | +_Py_qsbr_attach(struct _qsbr_thread_state *qsbr); |
| 104 | + |
| 105 | +// Called when thread detaches from interpreter |
| 106 | +extern void |
| 107 | +_Py_qsbr_detach(struct _qsbr_thread_state *qsbr); |
| 108 | + |
| 109 | +// Reserves (allocates) a QSBR state and returns its index. |
| 110 | +extern Py_ssize_t |
| 111 | +_Py_qsbr_reserve(PyInterpreterState *interp); |
| 112 | + |
| 113 | +// Associates a PyThreadState with the QSBR state at the given index |
| 114 | +extern void |
| 115 | +_Py_qsbr_register(struct _PyThreadStateImpl *tstate, |
| 116 | + PyInterpreterState *interp, Py_ssize_t index); |
| 117 | + |
| 118 | +// Disassociates a PyThreadState from the QSBR state and frees the QSBR state. |
| 119 | +extern void |
| 120 | +_Py_qsbr_unregister(struct _PyThreadStateImpl *tstate); |
| 121 | + |
| 122 | +extern void |
| 123 | +_Py_qsbr_fini(PyInterpreterState *interp); |
| 124 | + |
| 125 | +extern void |
| 126 | +_Py_qsbr_after_fork(struct _PyThreadStateImpl *tstate); |
| 127 | + |
| 128 | +#ifdef __cplusplus |
| 129 | +} |
| 130 | +#endif |
| 131 | +#endif /* !Py_INTERNAL_QSBR_H */ |
0 commit comments