Skip to content

GH-126491: GC: Mark objects reachable from roots before doing cycle collection #126502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 35 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
2ec8d8a
GC experiment: mark almost all reachable objects before doing collect…
markshannon Nov 4, 2024
1fdf00e
Add stats for objects marked
markshannon Nov 4, 2024
5e813c5
Start with mark phase
markshannon Nov 4, 2024
8bd7606
Add stats for visits during marking
markshannon Nov 5, 2024
3513da2
Visit new frames before each increment
markshannon Nov 5, 2024
ab1faec
Redo stats
markshannon Nov 6, 2024
9e2d93c
Fix freezing and GC untracking
markshannon Nov 6, 2024
3c18fc8
Don't untrack dicts
markshannon Nov 6, 2024
94da963
Remove lazy dict tracking from no-gil build
markshannon Nov 6, 2024
659fd1e
Remove unused variable
markshannon Nov 6, 2024
4cfbc4f
Add news
markshannon Nov 6, 2024
8c92ca6
Fix use after free
markshannon Nov 6, 2024
12d7f7c
Attempt more careful fix of use-after-free
markshannon Nov 7, 2024
1f619d7
Typo
markshannon Nov 7, 2024
b55fe37
Fix use of uninitialized variable
markshannon Nov 7, 2024
73b7f52
Fix compiler warnings
markshannon Nov 7, 2024
33f6386
Tweak test
markshannon Nov 7, 2024
8574d00
Add section to internal docs
markshannon Nov 11, 2024
70007b0
Rephrase new docs
markshannon Nov 11, 2024
f043080
Use symbolic constant
markshannon Nov 13, 2024
db2e173
Update section on untracking
markshannon Nov 13, 2024
6a50c2f
Merge branch 'main' into mark-first-gc
markshannon Nov 14, 2024
b9467ec
Update docs
markshannon Nov 14, 2024
14ae8d7
A few more edits
markshannon Nov 14, 2024
3337512
Update comment
markshannon Nov 14, 2024
3ae87fa
Address doc review comments
markshannon Nov 14, 2024
a2d9e3e
Merge branch 'main' into mark-first-gc
markshannon Nov 15, 2024
1452378
Avoid repeated collection of the young gen
markshannon Nov 15, 2024
595b14c
Clearer calculation of work to do.
markshannon Nov 15, 2024
278059b
Make sure tuples are untracked and avoid quadratic time validation
markshannon Nov 15, 2024
f186b4a
Update InternalDocs/garbage_collector.md
markshannon Nov 18, 2024
5f6d04e
Remove unused variable
markshannon Nov 18, 2024
9cfb5f0
Tweak work to do calculation
markshannon Nov 18, 2024
c7683a4
Explain work to do calculation
markshannon Nov 18, 2024
170ea6d
Initialize field to prevent code analyzer warning.
markshannon Nov 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Include/cpython/pystats.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ typedef struct _gc_stats {
uint64_t collections;
uint64_t object_visits;
uint64_t objects_collected;
uint64_t objects_transitively_reachable;
uint64_t objects_not_transitively_reachable;
} GCStats;

typedef struct _uop_stats {
Expand Down
3 changes: 3 additions & 0 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ typedef struct _PyInterpreterFrame {
_PyStackRef *stackpointer;
uint16_t return_offset; /* Only relevant during a function call */
char owner;
char visited;
/* Locals and stack */
_PyStackRef localsplus[1];
} _PyInterpreterFrame;
Expand Down Expand Up @@ -165,6 +166,7 @@ _PyFrame_Initialize(
frame->instr_ptr = _PyCode_CODE(code);
frame->return_offset = 0;
frame->owner = FRAME_OWNED_BY_THREAD;
frame->visited = 0;

for (int i = null_locals_from; i < code->co_nlocalsplus; i++) {
frame->localsplus[i] = PyStackRef_NULL;
Expand Down Expand Up @@ -341,6 +343,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
frame->frame_obj = NULL;
frame->instr_ptr = _PyCode_CODE(code);
frame->owner = FRAME_OWNED_BY_THREAD;
frame->visited = 0;
frame->return_offset = 0;

#ifdef Py_GIL_DISABLED
Expand Down
6 changes: 6 additions & 0 deletions Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,11 @@ struct gc_generation_stats {
Py_ssize_t uncollectable;
};

enum _GCPhase {
GC_PHASE_MARK = 0,
GC_PHASE_COLLECT = 1
};

struct _gc_runtime_state {
/* List of objects that still need to be cleaned up, singly linked
* via their gc headers' gc_prev pointers. */
Expand Down Expand Up @@ -329,6 +334,7 @@ struct _gc_runtime_state {
Py_ssize_t work_to_do;
/* Which of the old spaces is the visited space */
int visited_space;
int phase;

#ifdef Py_GIL_DISABLED
/* This is the number of objects that survived the last full
Expand Down
4 changes: 2 additions & 2 deletions Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,8 +466,8 @@ static inline void _PyObject_GC_TRACK(
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
_PyGCHead_SET_NEXT(last, gc);
_PyGCHead_SET_PREV(gc, last);
/* Young objects will be moved into the visited space during GC, so set the bit here */
gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space;
uintptr_t not_visited = 1 ^ interp->gc.visited_space;
gc->_gc_next = ((uintptr_t)generation0) | not_visited;
generation0->_gc_prev = (uintptr_t)gc;
#endif
}
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ extern PyTypeObject _PyExc_MemoryError;
{ .threshold = 0, }, \
}, \
.work_to_do = -5000, \
.phase = 0, \
}, \
.qsbr = { \
.wr_seq = QSBR_INITIAL, \
Expand Down
109 changes: 0 additions & 109 deletions Lib/test/test_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,115 +880,6 @@ class C(object):
gc.collect()
self.assertIs(ref(), None, "Cycle was not collected")

def _not_tracked(self, t):
# Nested containers can take several collections to untrack
gc.collect()
gc.collect()
self.assertFalse(gc.is_tracked(t), t)

def _tracked(self, t):
self.assertTrue(gc.is_tracked(t), t)
gc.collect()
gc.collect()
self.assertTrue(gc.is_tracked(t), t)

def test_string_keys_can_track_values(self):
# Test that this doesn't leak.
for i in range(10):
d = {}
for j in range(10):
d[str(j)] = j
d["foo"] = d

@support.cpython_only
def test_track_literals(self):
# Test GC-optimization of dict literals
x, y, z, w = 1.5, "a", (1, None), []

self._not_tracked({})
self._not_tracked({x:(), y:x, z:1})
self._not_tracked({1: "a", "b": 2})
self._not_tracked({1: 2, (None, True, False, ()): int})
self._not_tracked({1: object()})

# Dicts with mutable elements are always tracked, even if those
# elements are not tracked right now.
self._tracked({1: []})
self._tracked({1: ([],)})
self._tracked({1: {}})
self._tracked({1: set()})

@support.cpython_only
def test_track_dynamic(self):
# Test GC-optimization of dynamically-created dicts
class MyObject(object):
pass
x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject()

d = dict()
self._not_tracked(d)
d[1] = "a"
self._not_tracked(d)
d[y] = 2
self._not_tracked(d)
d[z] = 3
self._not_tracked(d)
self._not_tracked(d.copy())
d[4] = w
self._tracked(d)
self._tracked(d.copy())
d[4] = None
self._not_tracked(d)
self._not_tracked(d.copy())

# dd isn't tracked right now, but it may mutate and therefore d
# which contains it must be tracked.
d = dict()
dd = dict()
d[1] = dd
self._not_tracked(dd)
self._tracked(d)
dd[1] = d
self._tracked(dd)

d = dict.fromkeys([x, y, z])
self._not_tracked(d)
dd = dict()
dd.update(d)
self._not_tracked(dd)
d = dict.fromkeys([x, y, z, o])
self._tracked(d)
dd = dict()
dd.update(d)
self._tracked(dd)

d = dict(x=x, y=y, z=z)
self._not_tracked(d)
d = dict(x=x, y=y, z=z, w=w)
self._tracked(d)
d = dict()
d.update(x=x, y=y, z=z)
self._not_tracked(d)
d.update(w=w)
self._tracked(d)

d = dict([(x, y), (z, 1)])
self._not_tracked(d)
d = dict([(x, y), (z, w)])
self._tracked(d)
d = dict()
d.update([(x, y), (z, 1)])
self._not_tracked(d)
d.update([(x, y), (z, w)])
self._tracked(d)

@support.cpython_only
def test_track_subtypes(self):
# Dict subtypes are always tracked
class MyDict(dict):
pass
self._tracked(MyDict())

def make_shared_key_dict(self, n):
class C:
pass
Expand Down
7 changes: 5 additions & 2 deletions Lib/test/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1284,7 +1284,8 @@ def test_refcount_errors(self):
from test.support import gc_collect, SuppressCrashReport

a = [1, 2, 3]
b = [a]
b = [a, a]
a.append(b)

# Avoid coredump when Py_FatalError() calls abort()
SuppressCrashReport().__enter__()
Expand All @@ -1294,6 +1295,8 @@ def test_refcount_errors(self):
# (to avoid deallocating it):
import ctypes
ctypes.pythonapi.Py_DecRef(ctypes.py_object(a))
del a
del b

# The garbage collector should now have a fatal error
# when it reaches the broken object
Expand Down Expand Up @@ -1322,7 +1325,7 @@ def test_refcount_errors(self):
self.assertRegex(stderr,
br'object type name: list')
self.assertRegex(stderr,
br'object repr : \[1, 2, 3\]')
br'object repr : \[1, 2, 3, \[\[...\], \[...\]\]\]')


class GCTogglingTests(unittest.TestCase):
Expand Down
92 changes: 4 additions & 88 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,7 @@ new_dict(PyInterpreterState *interp,
mp->ma_used = used;
mp->_ma_watcher_tag = 0;
ASSERT_CONSISTENT(mp);
_PyObject_GC_TRACK(mp);
return (PyObject *)mp;
}

Expand Down Expand Up @@ -1578,64 +1579,6 @@ _PyDict_HasOnlyStringKeys(PyObject *dict)
return 1;
}

#define MAINTAIN_TRACKING(mp, key, value) \
do { \
if (!_PyObject_GC_IS_TRACKED(mp)) { \
if (_PyObject_GC_MAY_BE_TRACKED(key) || \
_PyObject_GC_MAY_BE_TRACKED(value)) { \
_PyObject_GC_TRACK(mp); \
} \
} \
} while(0)

void
_PyDict_MaybeUntrack(PyObject *op)
{
PyDictObject *mp;
PyObject *value;
Py_ssize_t i, numentries;

ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op);

if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
return;

mp = (PyDictObject *) op;
ASSERT_CONSISTENT(mp);
numentries = mp->ma_keys->dk_nentries;
if (_PyDict_HasSplitTable(mp)) {
for (i = 0; i < numentries; i++) {
if ((value = mp->ma_values->values[i]) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value)) {
return;
}
}
}
else {
if (DK_IS_UNICODE(mp->ma_keys)) {
PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(mp->ma_keys);
for (i = 0; i < numentries; i++) {
if ((value = ep0[i].me_value) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value))
return;
}
}
else {
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
for (i = 0; i < numentries; i++) {
if ((value = ep0[i].me_value) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value) ||
_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
return;
}
}
}
_PyObject_GC_UNTRACK(op);
}

void
_PyDict_EnablePerThreadRefcounting(PyObject *op)
{
Expand Down Expand Up @@ -1761,7 +1704,6 @@ insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key,
{
assert(PyUnicode_CheckExact(key));
ASSERT_DICT_LOCKED(mp);
MAINTAIN_TRACKING(mp, key, value);
PyObject *old_value = mp->ma_values->values[ix];
if (old_value == NULL) {
_PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value);
Expand Down Expand Up @@ -1818,8 +1760,6 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp,
if (ix == DKIX_ERROR)
goto Fail;

MAINTAIN_TRACKING(mp, key, value);

if (ix == DKIX_EMPTY) {
assert(!_PyDict_HasSplitTable(mp));
/* Insert into new slot. */
Expand Down Expand Up @@ -1878,8 +1818,6 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp,
/* We don't decref Py_EMPTY_KEYS here because it is immortal. */
assert(mp->ma_values == NULL);

MAINTAIN_TRACKING(mp, key, value);

size_t hashpos = (size_t)hash & (PyDict_MINSIZE-1);
dictkeys_set_index(newkeys, hashpos, 0);
if (unicode) {
Expand Down Expand Up @@ -3770,11 +3708,6 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe
STORE_USED(mp, other->ma_used);
ASSERT_CONSISTENT(mp);

if (_PyObject_GC_IS_TRACKED(other) && !_PyObject_GC_IS_TRACKED(mp)) {
/* Maintain tracking. */
_PyObject_GC_TRACK(mp);
}

return 0;
}
}
Expand Down Expand Up @@ -4024,8 +3957,7 @@ copy_lock_held(PyObject *o)
split_copy->ma_used = mp->ma_used;
split_copy->_ma_watcher_tag = 0;
dictkeys_incref(mp->ma_keys);
if (_PyObject_GC_IS_TRACKED(mp))
_PyObject_GC_TRACK(split_copy);
_PyObject_GC_TRACK(split_copy);
return (PyObject *)split_copy;
}

Expand Down Expand Up @@ -4060,10 +3992,6 @@ copy_lock_held(PyObject *o)

new->ma_used = mp->ma_used;
ASSERT_CONSISTENT(new);
if (_PyObject_GC_IS_TRACKED(mp)) {
/* Maintain tracking. */
_PyObject_GC_TRACK(new);
}

return (PyObject *)new;
}
Expand Down Expand Up @@ -4350,8 +4278,6 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
*result = NULL;
}
}

MAINTAIN_TRACKING(mp, key, value);
STORE_USED(mp, mp->ma_used + 1);
assert(mp->ma_keys->dk_usable >= 0);
ASSERT_CONSISTENT(mp);
Expand Down Expand Up @@ -4801,15 +4727,8 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
d->ma_values = NULL;
ASSERT_CONSISTENT(d);

if (type != &PyDict_Type) {
// Don't track if a subclass tp_alloc is PyType_GenericAlloc()
if (!_PyObject_GC_IS_TRACKED(d)) {
_PyObject_GC_TRACK(d);
}
}
else {
// _PyType_AllocNoTrack() does not track the created object
assert(!_PyObject_GC_IS_TRACKED(d));
if (!_PyObject_GC_IS_TRACKED(d)) {
_PyObject_GC_TRACK(d);
}
return self;
}
Expand Down Expand Up @@ -6755,9 +6674,6 @@ make_dict_from_instance_attributes(PyInterpreterState *interp,
}
}
PyDictObject *res = (PyDictObject *)new_dict(interp, keys, values, used, 0);
if (track && res) {
_PyObject_GC_TRACK(res);
}
return res;
}

Expand Down
2 changes: 0 additions & 2 deletions Objects/moduleobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,6 @@ static void
track_module(PyModuleObject *m)
{
_PyDict_EnablePerThreadRefcounting(m->md_dict);
PyObject_GC_Track(m->md_dict);

_PyObject_SetDeferredRefcount((PyObject *)m);
PyObject_GC_Track(m);
}
Expand Down
Loading
Loading