Skip to content

Commit

Permalink
gc: make the garbage collector non-generational
Browse files Browse the repository at this point in the history
This is going to be important once the GIL is removed for a few reasons:

 - We won't be maintaining the GC linked list. Scanning the heap is
   O(N), best to do it in proprotion to the number of live objects.

 - Frequent stop-the-world pauses will become a bottleneck in
   multi-threaded programs

 - Python programs don't realy adhere to the generational hypothesis,
   so there isn't much benefit to a generational collector. Most objects
   seen by the GC *don't* die young! (Most survive the GC cycles.)
  • Loading branch information
colesbury committed Apr 23, 2023
1 parent e8c13ea commit 654be8f
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 336 deletions.
23 changes: 18 additions & 5 deletions Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ static inline void _PyGC_SET_FINALIZED(PyObject *op) {

/* If we change this, we need to change the default value in the
signature of gc.collect. */
#define NUM_GENERATIONS 3
#define NUM_GENERATIONS 1
/*
NOTE: about untracking of mutable objects.
Expand Down Expand Up @@ -176,17 +176,24 @@ struct _gc_runtime_state {
int enabled;
int debug;
/* linked lists of container objects */
struct gc_generation generations[NUM_GENERATIONS];
PyGC_Head *generation0;
PyGC_Head head;
/* a permanent generation which won't be collected */
struct gc_generation permanent_generation;
struct gc_generation_stats generation_stats[NUM_GENERATIONS];
struct gc_generation_stats stats;
/* true if we are currently running the collector */
int collecting;
/* list of uncollectable objects */
PyObject *garbage;
/* a list of callbacks to be invoked when collection is performed */
PyObject *callbacks;
/* the number of live GC objects */
Py_ssize_t gc_live;
/* the threshold at which to trigger a collection */
Py_ssize_t gc_threshold;
/* The ratio used to compute gc_threshold:
gc_threshold = (1 + gc_scale/100) * gc_live
A value of 100 means to collect every time the number of live
objects doubles. */
int gc_scale;
/* This is the number of objects that survived the last full
collection. It approximates the number of long lived objects
tracked by the GC.
Expand All @@ -205,6 +212,12 @@ extern void _PyGC_InitState(struct _gc_runtime_state *);

extern Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate);

static inline int
_PyGC_ShouldCollect(struct _gc_runtime_state *gcstate)
{
Py_ssize_t live = _Py_atomic_load_ssize_relaxed(&gcstate->gc_live);
return live >= gcstate->gc_threshold && gcstate->enabled && gcstate->gc_threshold && !gcstate->collecting;
}

// Functions to clear types free lists
extern void _PyTuple_ClearFreeList(PyInterpreterState *interp);
Expand Down
8 changes: 4 additions & 4 deletions Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,12 @@ static inline void _PyObject_GC_TRACK(
filename, lineno, __func__);

PyInterpreterState *interp = _PyInterpreterState_GET();
PyGC_Head *generation0 = interp->gc.generation0;
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
PyGC_Head *head = &interp->gc.head;
PyGC_Head *last = (PyGC_Head*)(head->_gc_prev);
_PyGCHead_SET_NEXT(last, gc);
_PyGCHead_SET_PREV(gc, last);
_PyGCHead_SET_NEXT(gc, generation0);
generation0->_gc_prev = (uintptr_t)gc;
_PyGCHead_SET_NEXT(gc, head);
head->_gc_prev = (uintptr_t)gc;
}

/* Tell the GC to stop tracking this object.
Expand Down
6 changes: 0 additions & 6 deletions Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,6 @@ extern "C" {
}, \
.gc = { \
.enabled = 1, \
.generations = { \
/* .head is set in _PyGC_InitState(). */ \
{ .threshold = 700, }, \
{ .threshold = 10, }, \
{ .threshold = 10, }, \
}, \
}, \
.static_objects = { \
.singletons = { \
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/audit-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def hook(event, args):

sys.addaudithook(hook)

gc.get_objects(generation=1)
gc.get_objects()

x = object()
y = [x]
Expand Down
127 changes: 26 additions & 101 deletions Lib/test/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,47 +356,25 @@ def __del__(self):
gc.disable()
gc.set_threshold(*thresholds)

# The following two tests are fragile:
# They precisely count the number of allocations,
# which is highly implementation-dependent.
# For example, disposed tuples are not freed, but reused.
# To minimize variations, though, we first store the get_count() results
# and check them at the end.
# The following test is implementation-dependent because it
# counts the number of allocations.
@refcount_test
def test_get_count(self):
gc.collect()
a, b, c = gc.get_count()
x = []
# Note: get_count() isn't precise. To enable faster allocations,
# it is only updated when mimalloc "pages" become full or are no
# longer full.
tmp = []
for _ in range(2048):
tmp.append([''])
d, e, f = gc.get_count()
self.assertEqual((b, c), (0, 0))
self.assertEqual((e, f), (0, 0))
# This is less fragile than asserting that a equals 0.
self.assertLess(a, 5)
# Between the two calls to get_count(), at least one object was
# created (the list).
# Between the two calls to get_count(), enough objects were
# created to increase the count.
self.assertGreater(d, a)

@refcount_test
def test_collect_generations(self):
gc.collect()
# This object will "trickle" into generation N + 1 after
# each call to collect(N)
x = []
gc.collect(0)
# x is now in gen 1
a, b, c = gc.get_count()
gc.collect(1)
# x is now in gen 2
d, e, f = gc.get_count()
gc.collect(2)
# x is now in gen 3
g, h, i = gc.get_count()
# We don't check a, d, g since their exact values depends on
# internal implementation details of the interpreter.
self.assertEqual((b, c), (1, 0))
self.assertEqual((e, f), (0, 1))
self.assertEqual((h, i), (0, 0))

def test_trashcan(self):
class Ouch:
n = 0
Expand Down Expand Up @@ -784,7 +762,7 @@ def __del__(self):

def test_get_stats(self):
stats = gc.get_stats()
self.assertEqual(len(stats), 3)
self.assertEqual(len(stats), 1)
for st in stats:
self.assertIsInstance(st, dict)
self.assertEqual(set(st),
Expand All @@ -797,20 +775,14 @@ def test_get_stats(self):
self.addCleanup(gc.enable)
gc.disable()
old = gc.get_stats()
gc.collect(0)
new = gc.get_stats()
self.assertEqual(new[0]["collections"], old[0]["collections"] + 1)
self.assertEqual(new[1]["collections"], old[1]["collections"])
self.assertEqual(new[2]["collections"], old[2]["collections"])
gc.collect(2)
gc.collect()
new = gc.get_stats()
self.assertEqual(new[0]["collections"], old[0]["collections"] + 1)
self.assertEqual(new[1]["collections"], old[1]["collections"])
self.assertEqual(new[2]["collections"], old[2]["collections"] + 1)

def test_freeze(self):
# freeze no longer does anything, so count is always zero :(
gc.freeze()
self.assertGreater(gc.get_freeze_count(), 0)
self.assertEqual(gc.get_freeze_count(), 0)
gc.unfreeze()
self.assertEqual(gc.get_freeze_count(), 0)

Expand All @@ -819,52 +791,17 @@ def test_get_objects(self):
l = []
l.append(l)
self.assertTrue(
any(l is element for element in gc.get_objects(generation=0))
)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=1))
)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=2))
)
gc.collect(generation=0)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=0))
)
self.assertTrue(
any(l is element for element in gc.get_objects(generation=1))
)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=2))
)
gc.collect(generation=1)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=0))
)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=1))
)
self.assertTrue(
any(l is element for element in gc.get_objects(generation=2))
)
gc.collect(generation=2)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=0))
)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=1))
any(l is element for element in gc.get_objects())
)
gc.collect()
self.assertTrue(
any(l is element for element in gc.get_objects(generation=2))
any(l is element for element in gc.get_objects())
)
del l
gc.collect()

def test_get_objects_arguments(self):
gc.collect()
self.assertEqual(len(gc.get_objects()),
len(gc.get_objects(generation=None)))

self.assertGreater(len(gc.get_objects()), 0)
self.assertRaises(ValueError, gc.get_objects, 1000)
self.assertRaises(ValueError, gc.get_objects, -1000)
self.assertRaises(TypeError, gc.get_objects, "1")
Expand Down Expand Up @@ -1147,10 +1084,10 @@ def test_collect(self):

def test_collect_generation(self):
self.preclean()
gc.collect(2)
gc.collect(0)
for v in self.visit:
info = v[2]
self.assertEqual(info["generation"], 2)
self.assertEqual(info["generation"], 0)

@cpython_only
def test_collect_garbage(self):
Expand Down Expand Up @@ -1304,15 +1241,9 @@ def callback(ignored):

# We want to let gc happen "naturally", to preserve the distinction
# between generations.
junk = []
i = 0
detector = GC_Detector()
while not detector.gc_happened:
i += 1
if i > 10000:
self.fail("gc didn't happen after 10000 iterations")
self.assertEqual(len(ouch), 0)
junk.append([]) # this will eventually trigger gc
# TODO(sgross): revisit this. no guaranteed "natural" collection, so trigger
# a collection manually.
gc.collect()

self.assertEqual(len(ouch), 1) # else the callback wasn't invoked
for x in ouch:
Expand Down Expand Up @@ -1371,15 +1302,9 @@ def __del__(self):

# We want to let gc happen "naturally", to preserve the distinction
# between generations.
detector = GC_Detector()
junk = []
i = 0
while not detector.gc_happened:
i += 1
if i > 10000:
self.fail("gc didn't happen after 10000 iterations")
self.assertEqual(len(ouch), 0)
junk.append([]) # this will eventually trigger gc
# TODO(sgross): revisit this. no guaranteed "natural" collection, so trigger
# a collection manually.
gc.collect()

self.assertEqual(len(ouch), 1) # else __del__ wasn't invoked
for x in ouch:
Expand Down
Loading

0 comments on commit 654be8f

Please sign in to comment.