From 21941a393c02fc4a25432d5ca045a8a72460cd4f Mon Sep 17 00:00:00 2001 From: d-netto Date: Mon, 17 Jul 2023 15:21:37 -0300 Subject: [PATCH 1/3] Revert "Don't use exchange in the hot path of the GC (#50021)" This reverts commit 0a2d6fc552e3675c2485cc56820af40df47461d6. --- src/gc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gc.c b/src/gc.c index 46754d3223f1c..0761e29092e89 100644 --- a/src/gc.c +++ b/src/gc.c @@ -799,7 +799,7 @@ STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr, FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT { assert(gc_marked(mark_mode)); - uintptr_t tag = jl_atomic_load_relaxed((_Atomic(uintptr_t)*)&o->header); + uintptr_t tag = o->header; if (gc_marked(tag)) return 0; if (mark_reset_age) { @@ -813,9 +813,9 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N tag = tag | mark_mode; assert((tag & 0x3) == mark_mode); } - jl_atomic_store_relaxed((_Atomic(uintptr_t)*)&o->header, tag); //xchg here was slower than - verify_val(jl_valueof(o)); //potentially redoing work because of a stale tag. - return 1; + tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag); + verify_val(jl_valueof(o)); + return !gc_marked(tag); } // This function should be called exactly once during marking for each big From 3725a53f4a6908a52304c741e34612e8eb574d39 Mon Sep 17 00:00:00 2001 From: d-netto Date: Mon, 17 Jul 2023 15:22:30 -0300 Subject: [PATCH 2/3] Revert "relax assertion involving pg->nold to reflect that it may be a bit inaccurate with parallel marking (#50466)" This reverts commit 8e877cbb01d2c26a3fdcb7b9302f4d3ead229f9f. --- src/gc.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/src/gc.c b/src/gc.c index 0761e29092e89..7f8c046de23d1 100644 --- a/src/gc.c +++ b/src/gc.c @@ -854,7 +854,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, if (mark_mode == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += page->osize; static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), ""); - page->nold++; + jl_atomic_fetch_add_relaxed((_Atomic(uint16_t)*)&page->nold, 1); } else { ptls->gc_cache.scanned_bytes += page->osize; @@ -1377,27 +1377,20 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize; goto done; } - // note that `pg->nold` may not be accurate with multithreaded marking since - // two threads may race when trying to set the mark bit in `gc_try_setmark_tag`. - // We're basically losing a bit of precision in the sweep phase at the cost of - // making the mark phase considerably cheaper. - // See issue #50419 - if (jl_n_markthreads == 0) { - // For quick sweep, we might be able to skip the page if the page doesn't - // have any young live cell before marking. - if (!sweep_full && !pg->has_young) { - assert(!prev_sweep_full || pg->prev_nold >= pg->nold); - if (!prev_sweep_full || pg->prev_nold == pg->nold) { - // the position of the freelist begin/end in this page - // is stored in its metadata - if (pg->fl_begin_offset != (uint16_t)-1) { - *pfl = page_pfl_beg(pg); - pfl = (jl_taggedvalue_t**)page_pfl_end(pg); - } - freedall = 0; - nfree = pg->nfree; - goto done; + // For quick sweep, we might be able to skip the page if the page doesn't + // have any young live cell before marking. + if (!sweep_full && !pg->has_young) { + assert(!prev_sweep_full || pg->prev_nold >= pg->nold); + if (!prev_sweep_full || pg->prev_nold == pg->nold) { + // the position of the freelist begin/end in this page + // is stored in its metadata + if (pg->fl_begin_offset != (uint16_t)-1) { + *pfl = page_pfl_beg(pg); + pfl = (jl_taggedvalue_t**)page_pfl_end(pg); } + freedall = 0; + nfree = pg->nfree; + goto done; } } From 509eb5c847fea0a0da47b394195c896cc97211d3 Mon Sep 17 00:00:00 2001 From: d-netto Date: Mon, 17 Jul 2023 21:41:43 -0300 Subject: [PATCH 3/3] suggestion from Gabriel's review --- src/gc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gc.c b/src/gc.c index 7f8c046de23d1..ec6d6b42cc71e 100644 --- a/src/gc.c +++ b/src/gc.c @@ -813,6 +813,10 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N tag = tag | mark_mode; assert((tag & 0x3) == mark_mode); } + // XXX: note that marking not only sets the GC bits but also updates the + // page metadata for pool allocated objects. + // The second step is **not** idempotent, so we need a compare exchange here + // (instead of a pair of load&store) to avoid marking an object twice tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag); verify_val(jl_valueof(o)); return !gc_marked(tag);