Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport promote objects more eagerly (#49644) #80

Merged
merged 1 commit into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -546,14 +546,6 @@ JL_NO_ASAN static void gc_scrub_range(char *low, char *high)
// Make sure the sweep rebuild the freelist
pg->has_marked = 1;
pg->has_young = 1;
// Find the age bit
char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET;
int obj_id = (((char*)tag) - page_begin) / osize;
uint8_t *ages = pg->ages + obj_id / 8;
// Force this to be a young object to save some memory
// (especially on 32bit where it's more likely to have pointer-like
// bit patterns)
*ages &= ~(1 << (obj_id % 8));
memset(tag, 0xff, osize);
// set mark to GC_MARKED (young and marked)
tag->bits.gc = GC_MARKED;
Expand Down
1 change: 0 additions & 1 deletion src/gc-pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
// return a page to the freemap allocator
void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
{
free(pg->ages);
void *p = pg->data;
gc_alloc_map_set((char*)p, GC_PAGE_FREED);
// tell the OS we don't need these pages right now
Expand Down
125 changes: 24 additions & 101 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ static int mark_reset_age = 0;
*
* <-[(quick)sweep]-
* |
* ----> GC_OLD <--[(quick)sweep && age>promotion]--
* ----> GC_OLD <--[(quick)sweep]-------------------
* | | |
* | | GC_MARKED (in remset) |
* | | ^ | |
Expand All @@ -820,9 +820,9 @@ static int mark_reset_age = 0;
* ========= above this line objects are old ========= |
* |
* ----[new]------> GC_CLEAN ------[mark]-----------> GC_MARKED
* | ^ |
* <-[(quick)sweep]--- | |
* --[(quick)sweep && age<=promotion]---
* |
* <-[(quick)sweep]---
*
*/

// A quick sweep is a sweep where `!sweep_full`
Expand All @@ -836,20 +836,11 @@ static int mark_reset_age = 0;
// When a write barrier triggers, the offending marked object is both queued,
// so as not to trigger the barrier again, and put in the remset.


#define PROMOTE_AGE 1
// this cannot be increased as is without changing :
// - sweep_page which is specialized for 1bit age
// - the size of the age storage in jl_gc_pagemeta_t


static int64_t scanned_bytes; // young bytes scanned while marking
static int64_t perm_scanned_bytes; // old bytes scanned while marking
int prev_sweep_full = 1;
int current_sweep_full = 0;

#define inc_sat(v,s) v = (v) >= s ? s : (v)+1

// Full collection heuristics
static int64_t pool_live_bytes = 0;
static int64_t live_bytes = 0;
Expand Down Expand Up @@ -954,9 +945,8 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
// We can't easily tell if the object is old or being promoted
// from the gc bits but if the `age` is `0` then the object
// must be already on a young list.
if (mark_reset_age && hdr->age) {
if (mark_reset_age) {
// Reset the object as if it was just allocated
hdr->age = 0;
gc_queue_big_marked(ptls, hdr, 1);
}
}
Expand All @@ -981,10 +971,6 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
ptls->gc_cache.scanned_bytes += page->osize;
if (mark_reset_age) {
page->has_young = 1;
char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET;
int obj_id = (((char*)o) - page_begin) / page->osize;
uint8_t *ages = page->ages + obj_id / 8;
jl_atomic_fetch_and_relaxed((_Atomic(uint8_t)*)ages, ~(1 << (obj_id % 8)));
}
}
objprofile_count(jl_typeof(jl_valueof(o)),
Expand Down Expand Up @@ -1035,37 +1021,6 @@ void gc_setmark_buf(jl_ptls_t ptls, void *o, uint8_t mark_mode, size_t minsz) JL
gc_setmark_buf_(ptls, o, mark_mode, minsz);
}

void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT
{
jl_taggedvalue_t *o = jl_astaggedvalue(v);
jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v);
size_t dtsz = jl_datatype_size(dt);
if (o->bits.gc == GC_OLD_MARKED)
return;
o->bits.gc = GC_OLD_MARKED;
if (dt == jl_simplevector_type) {
size_t l = jl_svec_len(v);
dtsz = l * sizeof(void*) + sizeof(jl_svec_t);
}
else if (dt->name == jl_array_typename) {
jl_array_t *a = (jl_array_t*)v;
if (!a->flags.pooled)
dtsz = GC_MAX_SZCLASS + 1;
}
else if (dt == jl_module_type) {
dtsz = sizeof(jl_module_t);
}
else if (dt == jl_task_type) {
dtsz = sizeof(jl_task_t);
}
else if (dt == jl_symbol_type) {
return;
}
gc_setmark(ptls, o, GC_OLD_MARKED, dtsz);
if (dt->layout->npointers != 0)
jl_gc_queue_root(v);
}

STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
{
if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) {
Expand Down Expand Up @@ -1161,7 +1116,6 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
memset(v, 0xee, allocsz);
#endif
v->sz = allocsz;
v->age = 0;
gc_big_object_link(v, &ptls->heap.big_objects);
return jl_valueof(&v->header);
}
Expand Down Expand Up @@ -1199,16 +1153,8 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
int old_bits = bits;
if (gc_marked(bits)) {
pv = &v->next;
int age = v->age;
if (age >= PROMOTE_AGE || bits == GC_OLD_MARKED) {
if (sweep_full || bits == GC_MARKED) {
bits = GC_OLD;
}
}
else {
inc_sat(age, PROMOTE_AGE);
v->age = age;
bits = GC_CLEAN;
if (sweep_full || bits == GC_MARKED) {
bits = GC_OLD;
}
v->bits.gc = bits;
}
Expand Down Expand Up @@ -1386,12 +1332,13 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_
assert(GC_PAGE_OFFSET >= sizeof(void*));
pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize;
pg->pool_n = p - ptls2->heap.norm_pools;
memset(pg->ages, 0, GC_PAGE_SZ / 8 / p->osize + 1);
jl_taggedvalue_t *beg = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
pg->has_young = 0;
pg->has_marked = 0;
pg->fl_begin_offset = -1;
pg->fl_end_offset = -1;
pg->prev_nold = 0;
pg->nold = 0;
pg->fl_begin_offset = UINT16_MAX;
pg->fl_end_offset = UINT16_MAX;
return beg;
}

Expand All @@ -1414,7 +1361,6 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
pg = jl_gc_alloc_page();
}
pg->osize = p->osize;
pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1);
pg->thread_n = ptls->tid;
set_page_metadata(pg);
push_lf_back(&ptls->page_metadata_allocd, pg);
Expand Down Expand Up @@ -1522,7 +1468,6 @@ static void gc_sweep_page(jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_pag
jl_gc_pagemeta_t *pg, int osize) JL_NOTSAFEPOINT
{
char *data = pg->data;
uint8_t *ages = pg->ages;
jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
char *lim = data + GC_PAGE_SZ - osize;
char *lim_newpages = data + GC_PAGE_SZ;
Expand Down Expand Up @@ -1570,43 +1515,25 @@ static void gc_sweep_page(jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_pag
jl_taggedvalue_t *fl = NULL;
jl_taggedvalue_t **pfl = &fl;
jl_taggedvalue_t **pfl_begin = NULL;
uint8_t msk = 1; // mask for the age bit in the current age byte
while ((char*)v <= lim) {
int bits = v->bits.gc;
// if an object is past `lim_newpages` then we can guarantee it's garbage
if (!gc_marked(bits) || (char*)v >= lim_newpages) {
*pfl = v;
pfl = &v->next;
pfl_begin = pfl_begin ? pfl_begin : pfl;
pfl_begin = (pfl_begin != NULL) ? pfl_begin : pfl;
pg_nfree++;
*ages &= ~msk;
}
else { // marked young or old
if (*ages & msk || bits == GC_OLD_MARKED) { // old enough
// `!age && bits == GC_OLD_MARKED` is possible for
// non-first-class objects like `jl_binding_t`
if (current_sweep_full || bits == GC_MARKED) {
bits = v->bits.gc = GC_OLD; // promote
}
prev_nold++;
}
else {
assert(bits == GC_MARKED);
bits = v->bits.gc = GC_CLEAN; // unmark
has_young = 1;
if (current_sweep_full || bits == GC_MARKED) { // old enough
bits = v->bits.gc = GC_OLD; // promote
}
prev_nold++;
has_marked |= gc_marked(bits);
*ages |= msk;
freedall = 0;
}
v = (jl_taggedvalue_t*)((char*)v + osize);
msk <<= 1;
if (!msk) {
msk = 1;
ages++;
}
}

assert(!freedall);
pg->has_marked = has_marked;
pg->has_young = has_young;
Expand All @@ -1615,8 +1542,8 @@ static void gc_sweep_page(jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_pag
pg->fl_end_offset = (char*)pfl - data;
}
else {
pg->fl_begin_offset = -1;
pg->fl_end_offset = -1;
pg->fl_begin_offset = UINT16_MAX;
pg->fl_end_offset = UINT16_MAX;
}

pg->nfree = pg_nfree;
Expand Down Expand Up @@ -1666,7 +1593,7 @@ static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT

static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
{
assert(pg->fl_begin_offset != (uint16_t)-1);
assert(pg->fl_begin_offset != UINT16_MAX);
char *cur_pg = gc_page_data(last);
// Fast path for page that has no allocation
jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
Expand Down Expand Up @@ -1775,7 +1702,7 @@ static void gc_sweep_pool(void)
pfl[t_i * JL_GC_N_POOLS + i] = &p->freelist;

last = p->newpages;
if (last) {
if (last != NULL) {
char *last_p = (char*)last;
jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(last_p - 1));
assert(last_p - gc_page_data(last_p - 1) >= GC_PAGE_OFFSET);
Expand Down Expand Up @@ -4008,7 +3935,6 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz)
// old pointer.
bigval_t *newbig = (bigval_t*)gc_managed_realloc_(ptls, hdr, allocsz, oldsz, 1, s, 0);
newbig->sz = allocsz;
newbig->age = 0;
gc_big_object_link(newbig, &ptls->heap.big_objects);
jl_value_t *snew = jl_valueof(&newbig->header);
*(size_t*)snew = sz;
Expand Down Expand Up @@ -4182,7 +4108,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
{
p = (char *) p - 1;
jl_gc_pagemeta_t *meta = page_metadata(p);
if (meta && meta->ages) {
if (meta) {
char *page = gc_page_data(p);
// offset within page.
size_t off = (char *)p - page;
Expand Down Expand Up @@ -4217,7 +4143,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
char *data = gc_page_data(newpages);
if (data != meta->data) {
// Pages on newpages form a linked list where only the
// first one is allocated from (see reset_page()).
// first one is allocated from (see gc_reset_page()).
// All other pages are empty.
return NULL;
}
Expand Down Expand Up @@ -4245,19 +4171,16 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
// entries and 1 for live objects. The above subcases arise
// because allocating a cell will not update the age bit, so we
// need extra logic for pages that have been allocated from.
unsigned obj_id = (off - off2) / osize;
// We now distinguish between the second and third subcase.
// Freelist entries are consumed in ascending order. Anything
// before the freelist pointer was either live during the last
// sweep or has been allocated since.
if (gc_page_data(cell) == gc_page_data(pool->freelist)
&& (char *)cell < (char *)pool->freelist)
goto valid_object;
// We know now that the age bit reflects liveness status during
// the last sweep and that the cell has not been reused since.
if (!(meta->ages[obj_id / 8] & (1 << (obj_id % 8)))) {
return NULL;
}
// already skipped marked or old objects above, so here
// the age bits are 0, thus the object is on the freelist
return NULL;
// Not a freelist entry, therefore a valid object.
valid_object:
// We have to treat objects with type `jl_buff_tag` differently,
Expand Down
8 changes: 2 additions & 6 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,7 @@ typedef struct _jl_gc_chunk_t {
JL_EXTENSION typedef struct _bigval_t {
struct _bigval_t *next;
struct _bigval_t **prev; // pointer to the next field of the prev entry
union {
size_t sz;
uintptr_t age : 2;
};
size_t sz;
#ifdef _P64 // Add padding so that the value is 64-byte aligned
// (8 pointers of 8 bytes each) - (4 other pointers in struct)
void *_padding[8 - 4];
Expand Down Expand Up @@ -177,12 +174,11 @@ typedef struct _jl_gc_pagemeta_t {
// number of free objects in this page.
// invalid if pool that owns this page is allocating objects from this page.
uint16_t nfree;
uint16_t osize; // size of each object in this page
uint16_t osize; // size of each object in this page
uint16_t fl_begin_offset; // offset of first free object in this page
uint16_t fl_end_offset; // offset of last free object in this page
uint16_t thread_n; // thread id of the heap that owns this page
char *data;
uint8_t *ages;
} jl_gc_pagemeta_t;

extern jl_gc_page_stack_t global_page_pool_lazily_freed;
Expand Down
1 change: 0 additions & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,6 @@ void *jl_gc_perm_alloc_nolock(size_t sz, int zero,
unsigned align, unsigned offset) JL_NOTSAFEPOINT;
void *jl_gc_perm_alloc(size_t sz, int zero,
unsigned align, unsigned offset) JL_NOTSAFEPOINT;
void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v);
void gc_sweep_sysimg(void);


Expand Down