Skip to content

Commit 2ac5c48

Browse files
committed
Revert "pythonGH-126491: GC: Mark objects reachable from roots before doing cycle collection (pythonGH-126502)"
This reverts commit b0fcc2c.
1 parent 4cd1076 commit 2ac5c48

21 files changed

+330
-332
lines changed

Include/cpython/pystats.h

-2
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,6 @@ typedef struct _gc_stats {
9999
uint64_t collections;
100100
uint64_t object_visits;
101101
uint64_t objects_collected;
102-
uint64_t objects_transitively_reachable;
103-
uint64_t objects_not_transitively_reachable;
104102
} GCStats;
105103

106104
typedef struct _uop_stats {

Include/internal/pycore_dict.h

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ extern int _PyDict_Next(
4343

4444
extern int _PyDict_HasOnlyStringKeys(PyObject *mp);
4545

46+
extern void _PyDict_MaybeUntrack(PyObject *mp);
47+
4648
// Export for '_ctypes' shared extension
4749
PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *);
4850

Include/internal/pycore_frame.h

-3
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ typedef struct _PyInterpreterFrame {
7575
_PyStackRef *stackpointer;
7676
uint16_t return_offset; /* Only relevant during a function call */
7777
char owner;
78-
char visited;
7978
/* Locals and stack */
8079
_PyStackRef localsplus[1];
8180
} _PyInterpreterFrame;
@@ -208,7 +207,6 @@ _PyFrame_Initialize(
208207
#endif
209208
frame->return_offset = 0;
210209
frame->owner = FRAME_OWNED_BY_THREAD;
211-
frame->visited = 0;
212210

213211
for (int i = null_locals_from; i < code->co_nlocalsplus; i++) {
214212
frame->localsplus[i] = PyStackRef_NULL;
@@ -391,7 +389,6 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
391389
frame->instr_ptr = _PyCode_CODE(code);
392390
#endif
393391
frame->owner = FRAME_OWNED_BY_THREAD;
394-
frame->visited = 0;
395392
frame->return_offset = 0;
396393

397394
#ifdef Py_GIL_DISABLED

Include/internal/pycore_gc.h

+2-9
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ extern "C" {
1010

1111
/* GC information is stored BEFORE the object structure. */
1212
typedef struct {
13-
// Tagged pointer to next object in the list.
13+
// Pointer to next object in the list.
1414
// 0 means the object is not tracked
1515
uintptr_t _gc_next;
1616

17-
// Tagged pointer to previous object in the list.
17+
// Pointer to previous object in the list.
1818
// Lowest two bits are used for flags documented later.
1919
uintptr_t _gc_prev;
2020
} PyGC_Head;
@@ -302,11 +302,6 @@ struct gc_generation_stats {
302302
Py_ssize_t uncollectable;
303303
};
304304

305-
enum _GCPhase {
306-
GC_PHASE_MARK = 0,
307-
GC_PHASE_COLLECT = 1
308-
};
309-
310305
struct _gc_runtime_state {
311306
/* List of objects that still need to be cleaned up, singly linked
312307
* via their gc headers' gc_prev pointers. */
@@ -330,12 +325,10 @@ struct _gc_runtime_state {
330325
/* a list of callbacks to be invoked when collection is performed */
331326
PyObject *callbacks;
332327

333-
Py_ssize_t prior_heap_size;
334328
Py_ssize_t heap_size;
335329
Py_ssize_t work_to_do;
336330
/* Which of the old spaces is the visited space */
337331
int visited_space;
338-
int phase;
339332

340333
#ifdef Py_GIL_DISABLED
341334
/* This is the number of objects that survived the last full

Include/internal/pycore_object.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,8 @@ static inline void _PyObject_GC_TRACK(
466466
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
467467
_PyGCHead_SET_NEXT(last, gc);
468468
_PyGCHead_SET_PREV(gc, last);
469-
uintptr_t not_visited = 1 ^ interp->gc.visited_space;
470-
gc->_gc_next = ((uintptr_t)generation0) | not_visited;
469+
/* Young objects will be moved into the visited space during GC, so set the bit here */
470+
gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space;
471471
generation0->_gc_prev = (uintptr_t)gc;
472472
#endif
473473
}

Include/internal/pycore_runtime_init.h

-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ extern PyTypeObject _PyExc_MemoryError;
134134
{ .threshold = 0, }, \
135135
}, \
136136
.work_to_do = -5000, \
137-
.phase = GC_PHASE_MARK, \
138137
}, \
139138
.qsbr = { \
140139
.wr_seq = QSBR_INITIAL, \

InternalDocs/garbage_collector.md

+15-48
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,6 @@ follows these steps in order:
351351
the reference counts fall to 0, triggering the destruction of all unreachable
352352
objects.
353353

354-
355354
Optimization: incremental collection
356355
====================================
357356

@@ -485,46 +484,6 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
485484
```
486485

487486

488-
Optimization: visiting reachable objects
489-
========================================
490-
491-
An object cannot be garbage if it can be reached.
492-
493-
To avoid having to identify reference cycles across the whole heap, we can
494-
reduce the amount of work done considerably by first moving most reachable objects
495-
to the `visited` space. Empirically, most reachable objects can be reached from a
496-
small set of global objects and local variables.
497-
This step does much less work per object, so reduces the time spent
498-
performing garbage collection by at least half.
499-
500-
> [!NOTE]
501-
> Objects that are not determined to be reachable by this pass are not necessarily
502-
> unreachable. We still need to perform the main algorithm to determine which objects
503-
> are actually unreachable.
504-
505-
We use the same technique of forming a transitive closure as the incremental
506-
collector does to find reachable objects, seeding the list with some global
507-
objects and the currently executing frames.
508-
509-
This phase moves objects to the `visited` space, as follows:
510-
511-
1. All objects directly referred to by any builtin class, the `sys` module, the `builtins`
512-
module and all objects directly referred to from stack frames are added to a working
513-
set of reachable objects.
514-
2. Until this working set is empty:
515-
1. Pop an object from the set and move it to the `visited` space
516-
2. For each object directly reachable from that object:
517-
* If it is not already in `visited` space and it is a GC object,
518-
add it to the working set
519-
520-
521-
Before each increment of collection is performed, the stacks are scanned
522-
to check for any new stack frames that have been created since the last
523-
increment. All objects directly referred to from those stack frames are
524-
added to the working set.
525-
Then the above algorithm is repeated, starting from step 2.
526-
527-
528487
Optimization: reusing fields to save memory
529488
===========================================
530489

@@ -573,8 +532,8 @@ of `PyGC_Head` discussed in the `Memory layout and object structure`_ section:
573532
currently in. Instead, when that's needed, ad hoc tricks (like the
574533
`NEXT_MASK_UNREACHABLE` flag) are employed.
575534

576-
Optimization: delayed untracking of containers
577-
==============================================
535+
Optimization: delay tracking containers
536+
=======================================
578537

579538
Certain types of containers cannot participate in a reference cycle, and so do
580539
not need to be tracked by the garbage collector. Untracking these objects
@@ -589,8 +548,8 @@ a container:
589548
As a general rule, instances of atomic types aren't tracked and instances of
590549
non-atomic types (containers, user-defined objects...) are. However, some
591550
type-specific optimizations can be present in order to suppress the garbage
592-
collector footprint of simple instances. Historically, both dictionaries and
593-
tuples were untracked during garbage collection. Now it is only tuples:
551+
collector footprint of simple instances. Some examples of native types that
552+
benefit from delayed tracking:
594553

595554
- Tuples containing only immutable objects (integers, strings etc,
596555
and recursively, tuples of immutable objects) do not need to be tracked. The
@@ -599,8 +558,14 @@ tuples were untracked during garbage collection. Now it is only tuples:
599558
tuples at creation time. Instead, all tuples except the empty tuple are tracked
600559
when created. During garbage collection it is determined whether any surviving
601560
tuples can be untracked. A tuple can be untracked if all of its contents are
602-
already not tracked. Tuples are examined for untracking when moved from the
603-
young to the old generation.
561+
already not tracked. Tuples are examined for untracking in all garbage collection
562+
cycles. It may take more than one cycle to untrack a tuple.
563+
564+
- Dictionaries containing only immutable objects also do not need to be tracked.
565+
Dictionaries are untracked when created. If a tracked item is inserted into a
566+
dictionary (either as a key or value), the dictionary becomes tracked. During a
567+
full garbage collection (all generations), the collector will untrack any dictionaries
568+
whose contents are not tracked.
604569

605570
The garbage collector module provides the Python function `is_tracked(obj)`, which returns
606571
the current tracking status of the object. Subsequent garbage collections may change the
@@ -613,9 +578,11 @@ tracking status of the object.
613578
False
614579
>>> gc.is_tracked([])
615580
True
616-
>>> gc.is_tracked(("a", 1))
581+
>>> gc.is_tracked({})
617582
False
618583
>>> gc.is_tracked({"a": 1})
584+
False
585+
>>> gc.is_tracked({"a": []})
619586
True
620587
```
621588

Lib/test/test_dict.py

+109
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,115 @@ class C(object):
880880
gc.collect()
881881
self.assertIs(ref(), None, "Cycle was not collected")
882882

883+
def _not_tracked(self, t):
884+
# Nested containers can take several collections to untrack
885+
gc.collect()
886+
gc.collect()
887+
self.assertFalse(gc.is_tracked(t), t)
888+
889+
def _tracked(self, t):
890+
self.assertTrue(gc.is_tracked(t), t)
891+
gc.collect()
892+
gc.collect()
893+
self.assertTrue(gc.is_tracked(t), t)
894+
895+
def test_string_keys_can_track_values(self):
896+
# Test that this doesn't leak.
897+
for i in range(10):
898+
d = {}
899+
for j in range(10):
900+
d[str(j)] = j
901+
d["foo"] = d
902+
903+
@support.cpython_only
904+
def test_track_literals(self):
905+
# Test GC-optimization of dict literals
906+
x, y, z, w = 1.5, "a", (1, None), []
907+
908+
self._not_tracked({})
909+
self._not_tracked({x:(), y:x, z:1})
910+
self._not_tracked({1: "a", "b": 2})
911+
self._not_tracked({1: 2, (None, True, False, ()): int})
912+
self._not_tracked({1: object()})
913+
914+
# Dicts with mutable elements are always tracked, even if those
915+
# elements are not tracked right now.
916+
self._tracked({1: []})
917+
self._tracked({1: ([],)})
918+
self._tracked({1: {}})
919+
self._tracked({1: set()})
920+
921+
@support.cpython_only
922+
def test_track_dynamic(self):
923+
# Test GC-optimization of dynamically-created dicts
924+
class MyObject(object):
925+
pass
926+
x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject()
927+
928+
d = dict()
929+
self._not_tracked(d)
930+
d[1] = "a"
931+
self._not_tracked(d)
932+
d[y] = 2
933+
self._not_tracked(d)
934+
d[z] = 3
935+
self._not_tracked(d)
936+
self._not_tracked(d.copy())
937+
d[4] = w
938+
self._tracked(d)
939+
self._tracked(d.copy())
940+
d[4] = None
941+
self._not_tracked(d)
942+
self._not_tracked(d.copy())
943+
944+
# dd isn't tracked right now, but it may mutate and therefore d
945+
# which contains it must be tracked.
946+
d = dict()
947+
dd = dict()
948+
d[1] = dd
949+
self._not_tracked(dd)
950+
self._tracked(d)
951+
dd[1] = d
952+
self._tracked(dd)
953+
954+
d = dict.fromkeys([x, y, z])
955+
self._not_tracked(d)
956+
dd = dict()
957+
dd.update(d)
958+
self._not_tracked(dd)
959+
d = dict.fromkeys([x, y, z, o])
960+
self._tracked(d)
961+
dd = dict()
962+
dd.update(d)
963+
self._tracked(dd)
964+
965+
d = dict(x=x, y=y, z=z)
966+
self._not_tracked(d)
967+
d = dict(x=x, y=y, z=z, w=w)
968+
self._tracked(d)
969+
d = dict()
970+
d.update(x=x, y=y, z=z)
971+
self._not_tracked(d)
972+
d.update(w=w)
973+
self._tracked(d)
974+
975+
d = dict([(x, y), (z, 1)])
976+
self._not_tracked(d)
977+
d = dict([(x, y), (z, w)])
978+
self._tracked(d)
979+
d = dict()
980+
d.update([(x, y), (z, 1)])
981+
self._not_tracked(d)
982+
d.update([(x, y), (z, w)])
983+
self._tracked(d)
984+
985+
@support.cpython_only
986+
def test_track_subtypes(self):
987+
# Dict subtypes are always tracked
988+
class MyDict(dict):
989+
pass
990+
self._tracked(MyDict())
991+
883992
def make_shared_key_dict(self, n):
884993
class C:
885994
pass

0 commit comments

Comments
 (0)