From e8ba639f5f994db0f5759079498dd8a6dd5a353b Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 1 Feb 2023 08:56:50 +1100 Subject: [PATCH 001/116] Update/julia master (#2) This PR updates the binding to the latest Julia master (up to this commit: 134f3e7dfaa04511a2f81f4a40cdc85f4e433706). --- Makefile | 3 + src/Makefile | 58 ++++++++++++ src/array.c | 10 +++ src/gc.c | 159 +++++++++++++++++++++++++++++++-- src/init.c | 10 +++ src/julia.h | 6 ++ src/julia_internal.h | 18 ++++ src/julia_threads.h | 10 +++ src/llvm-final-gc-lowering.cpp | 68 ++++++++++++++ src/llvm-pass-helpers.cpp | 5 ++ src/threading.c | 4 + 11 files changed, 346 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index c080f0d144cf6..bc24f9272b060 100644 --- a/Makefile +++ b/Makefile @@ -621,6 +621,9 @@ testall: check-whitespace $(JULIA_BUILD_MODE) testall1: check-whitespace $(JULIA_BUILD_MODE) @env JULIA_CPU_THREADS=1 $(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE) +testall3: check-whitespace $(JULIA_BUILD_MODE) + @env JULIA_CPU_THREADS=3 $(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE) + test-%: check-whitespace $(JULIA_BUILD_MODE) .FORCE @([ $$(( $$(date +%s) - $$(date -r $(build_private_libdir)/sys.$(SHLIB_EXT) +%s) )) -le 100 ] && \ printf '\033[93m HINT The system image was recently rebuilt. Are you aware of the test-revise-* targets? See CONTRIBUTING.md. \033[0m\n') || true diff --git a/src/Makefile b/src/Makefile index 0baa34fedf877..d113eea5422a5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,6 +4,17 @@ BUILDDIR := . include $(JULIAHOME)/Make.inc include $(JULIAHOME)/deps/llvm-ver.make +ifeq ($(USE_MMTK), 1) +CFLAGS = -DMMTKHEAP +CPPFLAGS = -DMMTKHEAP +MMTK_BUILD_TYPE = ${MMTK_BUILD} +MMTK_DIR = ${MMTK_JULIA_DIR} +MMTK_API_DIR_INCLUDE = $(MMTK_DIR)/api +MMTK_JULIA_DIR_INCLUDE = $(MMTK_DIR)/../julia +MMTK_LIB := -L$(MMTK_DIR)/target/$(MMTK_BUILD_TYPE)/ -lmmtk_julia +LDFLAGS += -Wl,-rpath=$(MMTK_DIR)/target/$(MMTK_BUILD_TYPE)/ +endif + JCFLAGS += $(CFLAGS) JCXXFLAGS += $(CXXFLAGS) JCPPFLAGS += $(CPPFLAGS) @@ -18,6 +29,11 @@ FLAGS := \ -I$(SRCDIR)/flisp -I$(SRCDIR)/support \ -I$(LIBUV_INC) -I$(build_includedir) \ -I$(JULIAHOME)/deps/valgrind + +ifeq ($(USE_MMTK), 1) +FLAGS += -I$(MMTK_API_DIR_INCLUDE) -I$(MMTK_JULIA_DIR_INCLUDE) +endif + FLAGS += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden -fno-common \ -Wno-comment -Wpointer-arith -Wundef ifeq ($(USEGCC),1) # GCC bug #25509 (void)__attribute__((warn_unused_result)) @@ -159,6 +175,12 @@ endif COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir) RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) + +ifeq ($(USE_MMTK), 1) +CG_LIBS += $(MMTK_LIB) +RT_LIBS += $(MMTK_LIB) +endif + RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS) CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS) @@ -167,6 +189,12 @@ CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia -ljulia-internal OBJS := $(SRCS:%=$(BUILDDIR)/%.o) DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) +ifeq ($(USE_MMTK), 1) +MMTK_SRCS := mmtk_julia +MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_DIR_INCLUDE)/%.o) +MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_DIR_INCLUDE)/%.o) +endif + CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o) CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj) @@ -234,6 +262,13 @@ $(BUILDDIR)/%.o : $(SRCDIR)/%.d $(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d @$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@) +ifeq ($(USE_MMTK), 1) +$(MMTK_JULIA_DIR_INCLUDE)/%.o: $(MMTK_JULIA_DIR_INCLUDE)/%.c $(HEADERS) | $(MMTK_JULIA_DIR_INCLUDE) + @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@) +$(MMTK_JULIA_DIR_INCLUDE)/%.dbg.obj: $(MMTK_JULIA_DIR_INCLUDE)/%.c $(HEADERS) | $(MMTK_JULIA_DIR_INCLUDE) + @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@) +endif + # public header rules $(eval $(call dir_target,$(build_includedir)/julia)) define public_header_target @@ -363,6 +398,19 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION CXXLD = $(CXX) -shared +ifeq ($(USE_MMTK), 1) +$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(MMTK_OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) + @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(MMTK_OBJS) $(RPATH_LIB) -o $@ \ + $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) + @$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@ + $(DSYMUTIL) $@ + +$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(DOBJS) $(MMTK_DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) + @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(MMTK_DOBJS) $(RPATH_LIB) -o $@ \ + $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) + @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ + $(DSYMUTIL) $@ +else $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) @@ -374,6 +422,7 @@ $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR) $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ +endif ifneq ($(OS), WINNT) $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_SHLIB_EXT): \ @@ -415,11 +464,20 @@ libjulia-codegen-release: $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SH libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) libjulia-codegen-debug libjulia-codegen-release: $(PUBLIC_HEADER_TARGETS) +ifeq ($(USE_MMTK), 1) +clean: + -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* + -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc + -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a + -rm -f $(BUILDDIR)/julia_version.h + -rm -fr $(MMTK_JULIA_DIR_INCLUDE)/*.o +else clean: -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen -rm -f $(BUILDDIR)/julia_version.h +endif clean-flisp: -$(MAKE) -C $(SRCDIR)/flisp clean BUILDDIR='$(abspath $(BUILDDIR)/flisp)' diff --git a/src/array.c b/src/array.c index ae89087502627..f515f5d26c024 100644 --- a/src/array.c +++ b/src/array.c @@ -497,17 +497,27 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) jl_ptls_t ptls = ct->ptls; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { +#ifndef MMTKHEAP int pool_id = jl_gc_szclass_align8(allocsz); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); +#else + int pool_id = jl_gc_szclass_align8(allocsz); + int osize = jl_gc_sizeclasses[pool_id]; + s = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, jl_string_type); +#endif } else { if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); +#ifndef MMTKHEAP s = jl_gc_big_alloc_noinline(ptls, allocsz); +#else + s = jl_mmtk_gc_alloc_big(ptls, allocsz); +#endif } jl_set_typeof(s, jl_string_type); maybe_record_alloc_to_profile(s, len, jl_string_type); diff --git a/src/gc.c b/src/gc.c index fc2a4041910f5..7eb05fbb12251 100644 --- a/src/gc.c +++ b/src/gc.c @@ -7,6 +7,10 @@ #include // for malloc_trim #endif +#ifdef MMTKHEAP +#include "mmtk_julia.h" +#endif + #ifdef __cplusplus extern "C" { #endif @@ -244,6 +248,9 @@ STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT #else STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { +#ifdef MMTKHEAP + return mmtk_malloc_aligned(sz, align); +#endif #if defined(_P64) || defined(__APPLE__) if (align <= 16) return malloc(sz); @@ -256,6 +263,14 @@ STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align) { +#ifdef MMTKHEAP + void *res = jl_malloc_aligned(sz, align); + if (res != NULL) { + memcpy(res, d, oldsz > sz ? sz : oldsz); + mmtk_free_aligned(d); + } + return res; +#endif #if defined(_P64) || defined(__APPLE__) if (align <= 16) return realloc(d, sz); @@ -269,7 +284,11 @@ STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, } STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT { +#ifdef MMTKHEAP + mmtk_free_aligned(p); +#else free(p); +#endif } #endif #define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT) @@ -284,7 +303,10 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1); } -static void run_finalizer(jl_task_t *ct, void *o, void *ff) +#ifndef MMTKHEAP +static +#endif +void run_finalizer(jl_task_t *ct, void *o, void *ff) { int ptr_finalizer = gc_ptr_tag(o, 1); o = gc_ptr_clear_tag(o, 3); @@ -393,7 +415,10 @@ static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NO ct->sticky = sticky; } -static uint64_t finalizer_rngState[4]; +#ifndef MMTKHEAP +static +#endif +uint64_t finalizer_rngState[4]; void jl_rng_split(uint64_t to[4], uint64_t from[4]) JL_NOTSAFEPOINT; @@ -404,6 +429,10 @@ JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void) static void run_finalizers(jl_task_t *ct) { +#ifdef MMTKHEAP + mmtk_jl_run_finalizers(ct->ptls); + return; +#endif // Racy fast path: // The race here should be OK since the race can only happen if // another thread is writing to it with the lock held. In such case, @@ -442,6 +471,10 @@ JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) { if (ct == NULL) ct = jl_current_task; +#ifdef MMTKHEAP + mmtk_jl_run_pending_finalizers(ct->ptls); + return; +#endif jl_ptls_t ptls = ct->ptls; if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) { run_finalizers(ct); @@ -532,6 +565,10 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT { +#ifdef MMTKHEAP + register_finalizer(v, f, 0); + return; +#endif assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); arraylist_t *a = &ptls->finalizers; // This acquire load and the release store at the end are used to @@ -560,14 +597,20 @@ void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT { +#ifndef MMTKHEAP jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); +#else + register_finalizer(v, f, 1); +#endif } // schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT { +#ifndef MMTKHEAP assert(!gc_ptr_tag(v, 3)); jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f); +#endif } JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT @@ -582,6 +625,10 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) { +#ifdef MMTKHEAP + run_finalizers_for_obj(o); + return; +#endif JL_LOCK_NOGC(&finalizers_lock); // Copy the finalizers into a temporary list so that code in the finalizer // won't change the list as we loop through them. @@ -955,12 +1002,16 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT static inline void maybe_collect(jl_ptls_t ptls) { +#ifndef MMTKHEAP if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) { jl_gc_collect(JL_GC_AUTO); } else { jl_gc_safepoint_(ptls); } +#else + mmtk_gc_poll(ptls); +#endif } // weak references @@ -971,7 +1022,11 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here +#ifdef MMTKHEAP + mmtk_add_weak_candidate(wr); +#else arraylist_push(&ptls->heap.weak_refs, wr); +#endif return wr; } @@ -1219,14 +1274,25 @@ size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT return sz; } -static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT +#ifndef MMTKHEAP +static +#endif +void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT { if (a->flags.how == 2) { char *d = (char*)a->data - a->offset*a->elsize; +#ifndef MMTKHEAP if (a->flags.isaligned) jl_free_aligned(d); else free(d); +#else + if (a->flags.isaligned) + mmtk_free_aligned(d); + else { + mmtk_free(d); + } +#endif gc_num.freed += jl_array_nbytes(a); gc_num.freecall++; } @@ -1703,6 +1769,7 @@ static void gc_sweep_perm_alloc(void) JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) { +#ifndef MMTKHEAP jl_ptls_t ptls = jl_current_task->ptls; jl_taggedvalue_t *o = jl_astaggedvalue(ptr); // The modification of the `gc_bits` is not atomic but it @@ -1712,6 +1779,7 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) o->bits.gc = GC_MARKED; arraylist_push(ptls->heap.remset, (jl_value_t*)ptr); ptls->heap.remset_nptr++; // conservative +#endif } void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT @@ -3066,9 +3134,15 @@ JL_DLLEXPORT int jl_gc_enable(int on) if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { gc_num.allocd += gc_num.deferred_alloc; gc_num.deferred_alloc = 0; +#ifdef MMTKHEAP + enable_collection(); +#endif } } else if (prev && !on) { +#ifdef MMTKHEAP + disable_collection(); +#endif // enable -> disable jl_atomic_fetch_add(&jl_gc_disable_counter, 1); // check if the GC is running and wait for it to finish @@ -3134,7 +3208,10 @@ JL_DLLEXPORT int64_t jl_gc_live_bytes(void) return live_bytes; } -static void jl_gc_premark(jl_ptls_t ptls2) +#ifndef MMTKHEAP +static +#endif +void jl_gc_premark(jl_ptls_t ptls2) { arraylist_t *remset = ptls2->heap.remset; ptls2->heap.remset = ptls2->heap.last_remset; @@ -3465,6 +3542,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; } +#ifdef MMTKHEAP + handle_user_collection_request(ptls); + return; +#endif jl_gc_debug_print(); int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state); @@ -3593,6 +3674,10 @@ void jl_init_thread_heap(jl_ptls_t ptls) memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); +#ifdef MMTKHEAP + MMTk_Mutator mmtk_mutator = bind_mutator((void *)ptls, ptls->tid); + ptls->mmtk_mutator_ptr = ((MMTkMutatorContext*)mmtk_mutator); +#endif } // System-wide initializations @@ -3632,6 +3717,50 @@ void jl_gc_init(void) if (high_water_mark < max_total_memory) max_total_memory = high_water_mark; +#ifdef MMTKHEAP + long long min_heap_size; + long long max_heap_size; + char* min_size_def = getenv("MMTK_MIN_HSIZE"); + char* min_size_gb = getenv("MMTK_MIN_HSIZE_G"); + + char* max_size_def = getenv("MMTK_MAX_HSIZE"); + char* max_size_gb = getenv("MMTK_MAX_HSIZE_G"); + + // default min heap currently set as Julia's default_collect_interval + if (min_size_def != NULL) { + char *p; + double min_size = strtod(min_size_def, &p); + min_heap_size = (long) 1024 * 1024 * min_size; + } else if (min_size_gb != NULL) { + char *p; + double min_size = strtod(min_size_gb, &p); + min_heap_size = (long) 1024 * 1024 * 1024 * min_size; + } else { + min_heap_size = default_collect_interval; + } + + // default max heap currently set as 70% the free memory in the system + if (max_size_def != NULL) { + char *p; + double max_size = strtod(max_size_def, &p); + max_heap_size = (long) 1024 * 1024 * max_size; + } else if (max_size_gb != NULL) { + char *p; + double max_size = strtod(max_size_gb, &p); + max_heap_size = (long) 1024 * 1024 * 1024 * max_size; + } else { + max_heap_size = uv_get_free_memory() * 70 / 100; + } + + // if only max size is specified initialize MMTk with a fixed size heap + if (max_size_def != NULL || max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL)) { + gc_init(0, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + } else { + gc_init(min_heap_size, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + } + +#endif + jl_gc_mark_sp_t sp = {NULL, NULL, NULL, NULL}; gc_mark_loop(NULL, sp); t_start = jl_hrtime(); @@ -3664,6 +3793,9 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); +#ifdef MMTKHEAP + return mmtk_counted_malloc(sz); +#endif } return malloc(sz); } @@ -3679,6 +3811,9 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); +#ifdef MMTKHEAP + return mmtk_counted_calloc(nm, sz); +#endif } return calloc(nm, sz); } @@ -3687,14 +3822,18 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); jl_task_t *ct = jl_current_task; - free(p); if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; jl_atomic_store_relaxed(&ptls->gc_num.freed, jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); jl_atomic_store_relaxed(&ptls->gc_num.freecall, jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); +#ifdef MMTKHEAP + mmtk_free_with_size(p, sz); + return; +#endif } + free(p); } JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) @@ -3712,6 +3851,9 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); jl_atomic_store_relaxed(&ptls->gc_num.realloc, jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); +#ifdef MMTKHEAP + return mmtk_realloc_with_old_size(p, sz, old); +#endif } return realloc(p, sz); } @@ -3853,6 +3995,7 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) { +#ifndef MMTKHEAP size_t len = jl_string_len(s); if (sz <= len) return s; jl_taggedvalue_t *v = jl_astaggedvalue(s); @@ -3886,6 +4029,12 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) jl_value_t *snew = jl_valueof(&newbig->header); *(size_t*)snew = sz; return snew; +#else + size_t len = jl_string_len(s); + jl_value_t *snew = jl_alloc_string(sz); + memcpy(jl_string_data(snew), jl_string_data(s), sz <= len ? sz : len); + return snew; +#endif } // Perm gen allocator diff --git a/src/init.c b/src/init.c index 0651d3b274f24..45d6b8ee98873 100644 --- a/src/init.c +++ b/src/init.c @@ -295,8 +295,12 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER JL_STDOUT = (uv_stream_t*) STDOUT_FILENO; JL_STDERR = (uv_stream_t*) STDERR_FILENO; +#ifndef MMTKHEAP if (ct) jl_gc_run_all_finalizers(ct); +#else + mmtk_jl_gc_run_all_finalizers(); +#endif uv_loop_t *loop = jl_global_event_loop(); if (loop != NULL) { @@ -806,6 +810,12 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) arraylist_new(&jl_image_relocs, 0); jl_ptls_t ptls = jl_init_threadtls(0); + +#ifdef MMTKHEAP + // start MMTk's GC + initialize_collection((void*) ptls); +#endif + #pragma GCC diagnostic push #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 #pragma GCC diagnostic ignored "-Wdangling-pointer" diff --git a/src/julia.h b/src/julia.h index 03efa773d026c..2bc1a97b681ed 100644 --- a/src/julia.h +++ b/src/julia.h @@ -932,22 +932,27 @@ JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const jl_value_t STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT { +#ifndef MMTKHEAP // parent and ptr isa jl_value_t* if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 && // parent is old and not in remset (jl_astaggedvalue(ptr)->bits.gc & 1) == 0)) // ptr is young jl_gc_queue_root((jl_value_t*)parent); +#endif } STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* { +#ifndef MMTKHEAP // if ptr is old if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3)) { jl_gc_queue_root((jl_value_t*)ptr); } +#endif } STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT { +#ifndef MMTKHEAP // ptr is an immutable object if (__likely(jl_astaggedvalue(parent)->bits.gc != 3)) return; // parent is young or in remset @@ -957,6 +962,7 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_ const jl_datatype_layout_t *ly = dt->layout; if (ly->npointers) jl_gc_queue_multiroot((jl_value_t*)parent, ptr); +#endif } JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); diff --git a/src/julia_internal.h b/src/julia_internal.h index 7565967b0a270..adf0c0c3fdd67 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -329,6 +329,10 @@ JL_DLLEXPORT extern const char *jl_filename; jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize); jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); +#ifdef MMTKHEAP +JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); +JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); +#endif JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; void *jl_gc_perm_alloc_nolock(size_t sz, int zero, @@ -452,17 +456,27 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) jl_value_t *v; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { +#ifndef MMTKHEAP int pool_id = jl_gc_szclass(allocsz); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) v = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); +#else + int pool_id = jl_gc_szclass(allocsz); + int osize = jl_gc_sizeclasses[pool_id]; + v = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, ty); +#endif } else { if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); +#ifndef MMTKHEAP v = jl_gc_big_alloc_noinline(ptls, allocsz); +#else + v = jl_mmtk_gc_alloc_big(ptls, allocsz); +#endif } jl_set_typeof(v, ty); maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); @@ -564,16 +578,20 @@ void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT; STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* { +#ifndef MMTKHEAP jl_gc_wb(bnd, val); +#endif } STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* { +#ifndef MMTKHEAP // if parent is marked and buf is not if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) { jl_task_t *ct = jl_current_task; gc_setmark_buf(ct->ptls, bufptr, 3, minsz); } +#endif } void jl_gc_debug_print_status(void); diff --git a/src/julia_threads.h b/src/julia_threads.h index 5874225c12eac..4d6284562120b 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -4,6 +4,10 @@ #ifndef JL_THREADS_H #define JL_THREADS_H +#ifdef MMTKHEAP +#include "mmtkMutator.h" +#endif + #include "julia_atomics.h" #ifndef _OS_WINDOWS_ #include "pthread.h" @@ -282,6 +286,12 @@ typedef struct _jl_tls_states_t { uint64_t sleep_leave; ) +#ifdef MMTKHEAP + MMTkMutatorContext* mmtk_mutator_ptr; + void* cursor; + void* limit; +#endif + // some hidden state (usually just because we don't have the type's size declaration) #ifdef LIBRARY_EXPORTS uv_mutex_t sleep_lock; diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 3b8533c6d0115..3e2eb3bcdf6ed 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -224,10 +224,78 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*)); } else { +#ifndef MMTKHEAP auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset); auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize }); derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize); + #else + auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); + auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); + auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, cursor)); + auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, limit)); + + auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); + auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); + auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); + + + auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); + auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); + auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); + auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); + auto result = builder.CreateNSWAdd(cursor, delta, "result"); + + auto new_cursor = builder.CreateNSWAdd(result, pool_osize); + + auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos); + auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr"); + auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit"); + + auto gt_limit = builder.CreateICmpSGT(new_cursor, limit); + + auto current_block = target->getParent(); + builder.SetInsertPoint(target->getNextNode()); + auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow"); + auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont"); + + auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction()); + auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont); + + auto next_br = current_block->getTerminator(); + next_br->eraseFromParent(); + builder.SetInsertPoint(current_block); + builder.CreateCondBr(gt_limit, slowpath, fastpath); + + // slowpath + builder.SetInsertPoint(slowpath); + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 }); + new_call->setAttributes(new_call->getCalledFunction()->getAttributes()); + builder.CreateBr(top_cont); + + // // fastpath + builder.SetInsertPoint(fastpath); + builder.CreateStore(new_cursor, cursor_ptr); + + // ptls->gc_num.allocd += osize; + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num)); + auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); + auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); + auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); + auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); + builder.CreateStore(pool_allocd_total, pool_alloc_tls); + + auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); + auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); + builder.CreateBr(top_cont); + + phiNode->addIncoming(new_call, slowpath); + phiNode->addIncoming(v_as_ptr, fastpath); + phiNode->takeName(target); + + return phiNode; +#endif } newI->setAttributes(newI->getCalledFunction()->getAttributes()); newI->addRetAttr(derefAttr); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index ea390f01010fd..c46228f13490b 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -226,8 +226,13 @@ namespace jl_intrinsics { } namespace jl_well_known { +#ifndef MMTKHEAP static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc); static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc); +#else + static const char *GC_BIG_ALLOC_NAME = XSTR(jl_mmtk_gc_alloc_big); + static const char *GC_POOL_ALLOC_NAME = XSTR(jl_mmtk_gc_alloc_default_llvm); +#endif static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root); using jl_intrinsics::addGCAllocAttributes; diff --git a/src/threading.c b/src/threading.c index db9df0bad0dde..52b3fc2d8c06d 100644 --- a/src/threading.c +++ b/src/threading.c @@ -347,6 +347,10 @@ jl_ptls_t jl_init_threadtls(int16_t tid) ptls->rngseed = jl_rand(); if (tid == 0) ptls->disable_gc = 1; +#ifdef MMTKHEAP + if (tid == 0) + disable_collection(); +#endif #ifdef _OS_WINDOWS_ if (tid == 0) { if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), From 72a275233012a80dfd5c5ac1c83afdf9aff0a87a Mon Sep 17 00:00:00 2001 From: Kiran Date: Thu, 16 Mar 2023 19:14:52 -0400 Subject: [PATCH 002/116] Refactor MMTk changes to Julia (#5) In preparation for upstreaming these changes. This ended up being a pretty large set of changes, but I think most of this would have been necessary for the upstream PR anyway. Summary of the changes: - Build-related, to more closely match how Julia adds optional libraries. More has to be done here, for automation and for BinaryBuilder. - Reduced the `#ifdef MMTK_GC`s. - Documented the GC interface in `gc-interface.h`. - Moved code that is common to Julia's GC and MMTk into `gc-common.c`. - Exclude `gc-debug.c` and `gc-pages.c` entirely for MMTk. - Reorganized `gc.h` into 3 parts: common, MMTk-specific, and Julia GC-specific. - Reorganized changes to `julia.h` and `julia_internal.h`. - Removed `#ifdef MMTK_GC` in `llvm-pass-helpers.cpp`. We can now start working on resolving the TODOs. --- Make.inc | 26 ++ contrib/refresh_checksums.mk | 2 +- src/Makefile | 77 +-- src/array.c | 4 +- src/gc-common.c | 732 +++++++++++++++++++++++++++++ src/gc-debug.c | 47 +- src/gc-pages.c | 4 + src/gc.c | 832 +-------------------------------- src/gc.h | 243 ++++++---- src/init.c | 13 +- src/julia.h | 22 +- src/julia_internal.h | 49 +- src/julia_threads.h | 4 +- src/llvm-final-gc-lowering.cpp | 6 +- src/llvm-pass-helpers.cpp | 5 - src/mmtk-gc.c | 487 +++++++++++++++++++ src/threading.c | 6 +- 17 files changed, 1515 insertions(+), 1044 deletions(-) create mode 100644 src/gc-common.c create mode 100644 src/mmtk-gc.c diff --git a/Make.inc b/Make.inc index bb1922c32bc44..7c1ca6a5db7a8 100644 --- a/Make.inc +++ b/Make.inc @@ -86,6 +86,9 @@ HAVE_SSP := 0 WITH_GC_VERIFY := 0 WITH_GC_DEBUG_ENV := 0 +# MMTk GC +WITH_MMTK ?= 0 + # Enable DTrace support WITH_DTRACE := 0 @@ -709,6 +712,29 @@ JCXXFLAGS += -DGC_DEBUG_ENV JCFLAGS += -DGC_DEBUG_ENV endif +ifeq ($(WITH_MMTK), 1) +ifeq (${MMTK_JULIA_DIR},) +$(error MMTK_JULIA_DIR must be set to use MMTk) +endif +JCXXFLAGS += -DMMTK_GC +JCFLAGS += -DMMTK_GC +ifeq (${MMTK_BUILD},) +ifeq (debug,$(findstring debug,$(MAKECMDGOALS))) +MMTK_BUILD = debug +else +MMTK_BUILD = release +endif +endif +MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk +MMTK_API_INC = $(MMTK_DIR)/api +MMTK_JULIA_INC = ${MMTK_JULIA_DIR}/julia +MMTK_LIB := -L$(MMTK_DIR)/target/$(MMTK_BUILD) -lmmtk_julia +LDFLAGS += -Wl,-rpath=$(MMTK_DIR)/target/$(MMTK_BUILD)/ +else +MMTK_JULIA_INC := +MMTK_LIB := +endif + ifeq ($(WITH_DTRACE), 1) JCXXFLAGS += -DUSE_DTRACE JCFLAGS += -DUSE_DTRACE diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk index fc632728e9a9e..664a1e4b038e0 100644 --- a/contrib/refresh_checksums.mk +++ b/contrib/refresh_checksums.mk @@ -24,7 +24,7 @@ CLANG_TRIPLETS=$(filter %-darwin %-freebsd,$(TRIPLETS)) NON_CLANG_TRIPLETS=$(filter-out %-darwin %-freebsd,$(TRIPLETS)) # These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded: -BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline +BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libmmtk_julia BB_GCC_EXPANDED_PROJECTS=openblas csl BB_CXX_EXPANDED_PROJECTS=gmp llvm clang llvm-tools lld # These are non-BB source-only deps diff --git a/src/Makefile b/src/Makefile index d113eea5422a5..2e976282015d6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,17 +4,6 @@ BUILDDIR := . include $(JULIAHOME)/Make.inc include $(JULIAHOME)/deps/llvm-ver.make -ifeq ($(USE_MMTK), 1) -CFLAGS = -DMMTKHEAP -CPPFLAGS = -DMMTKHEAP -MMTK_BUILD_TYPE = ${MMTK_BUILD} -MMTK_DIR = ${MMTK_JULIA_DIR} -MMTK_API_DIR_INCLUDE = $(MMTK_DIR)/api -MMTK_JULIA_DIR_INCLUDE = $(MMTK_DIR)/../julia -MMTK_LIB := -L$(MMTK_DIR)/target/$(MMTK_BUILD_TYPE)/ -lmmtk_julia -LDFLAGS += -Wl,-rpath=$(MMTK_DIR)/target/$(MMTK_BUILD_TYPE)/ -endif - JCFLAGS += $(CFLAGS) JCXXFLAGS += $(CXXFLAGS) JCPPFLAGS += $(CPPFLAGS) @@ -30,10 +19,6 @@ FLAGS := \ -I$(LIBUV_INC) -I$(build_includedir) \ -I$(JULIAHOME)/deps/valgrind -ifeq ($(USE_MMTK), 1) -FLAGS += -I$(MMTK_API_DIR_INCLUDE) -I$(MMTK_JULIA_DIR_INCLUDE) -endif - FLAGS += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden -fno-common \ -Wno-comment -Wpointer-arith -Wundef ifeq ($(USEGCC),1) # GCC bug #25509 (void)__attribute__((warn_unused_result)) @@ -45,6 +30,10 @@ ifeq ($(USECLANG),1) FLAGS += -Wno-return-type-c-linkage endif +ifeq ($(WITH_MMTK), 1) +FLAGS += -I$(MMTK_API_INC) -I$(MMTK_JULIA_INC) +endif + FLAGS += -DJL_BUILD_ARCH='"$(ARCH)"' ifeq ($(OS),WINNT) FLAGS += -DJL_BUILD_UNAME='"NT"' @@ -60,9 +49,10 @@ SRCS := \ jltypes gf typemap smallintset ast builtins module interpreter symbol \ dlload sys init task array staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile jloptions \ - threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \ - jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \ - crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall + threading partr stackwalk gc-common gc gc-debug gc-pages gc-stacks gc-alloc-profiler \ + mmtk-gc method jlapi signal-handling safepoint timing subtype rtutils \ + gc-heap-snapshot crc32c APInt-C processor ircode opaque_closure codegen-stubs \ + coverage runtime_ccall RT_LLVMLINK := CG_LLVMLINK := @@ -173,13 +163,8 @@ LIBJULIA_PATH_REL := libjulia endif COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir) -RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) -CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) - -ifeq ($(USE_MMTK), 1) -CG_LIBS += $(MMTK_LIB) -RT_LIBS += $(MMTK_LIB) -endif +RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(MMTK_LIB) +CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(MMTK_LIB) RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS) CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug @@ -189,10 +174,13 @@ CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia -ljulia-internal OBJS := $(SRCS:%=$(BUILDDIR)/%.o) DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) -ifeq ($(USE_MMTK), 1) +ifeq ($(WITH_MMTK), 1) MMTK_SRCS := mmtk_julia -MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_DIR_INCLUDE)/%.o) -MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_DIR_INCLUDE)/%.o) +MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.o) +MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) +else +MMTK_OBJS := +MMTK_DOBJS := endif CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o) @@ -262,10 +250,10 @@ $(BUILDDIR)/%.o : $(SRCDIR)/%.d $(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d @$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@) -ifeq ($(USE_MMTK), 1) -$(MMTK_JULIA_DIR_INCLUDE)/%.o: $(MMTK_JULIA_DIR_INCLUDE)/%.c $(HEADERS) | $(MMTK_JULIA_DIR_INCLUDE) +ifeq ($(WITH_MMTK), 1) +$(MMTK_JULIA_INC)/%.o: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@) -$(MMTK_JULIA_DIR_INCLUDE)/%.dbg.obj: $(MMTK_JULIA_DIR_INCLUDE)/%.c $(HEADERS) | $(MMTK_JULIA_DIR_INCLUDE) +$(MMTK_JULIA_INC)/%.dbg.obj: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@) endif @@ -333,6 +321,8 @@ $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR) $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h +$(BUILDDIR)/mmtk-gc.o $(BUILDDIR)/mmtk-gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h +$(BUILDDIR)/gc-common.o $(BUILDDIR)/gc-common.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h @@ -398,7 +388,6 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION CXXLD = $(CXX) -shared -ifeq ($(USE_MMTK), 1) $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(MMTK_OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(MMTK_OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) @@ -410,19 +399,6 @@ $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR) $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ -else -$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) - @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \ - $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) - @$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@ - $(DSYMUTIL) $@ - -$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) - @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ \ - $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) - @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ - $(DSYMUTIL) $@ -endif ifneq ($(OS), WINNT) $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_SHLIB_EXT): \ @@ -464,20 +440,11 @@ libjulia-codegen-release: $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SH libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) libjulia-codegen-debug libjulia-codegen-release: $(PUBLIC_HEADER_TARGETS) -ifeq ($(USE_MMTK), 1) clean: -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a - -rm -f $(BUILDDIR)/julia_version.h - -rm -fr $(MMTK_JULIA_DIR_INCLUDE)/*.o -else -clean: - -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* - -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc - -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen - -rm -f $(BUILDDIR)/julia_version.h -endif + -rm -f $(BUILDDIR)/julia_version.h $(MMTK_OBJS) $(MMTK_DOBJS) clean-flisp: -$(MAKE) -C $(SRCDIR)/flisp clean BUILDDIR='$(abspath $(BUILDDIR)/flisp)' diff --git a/src/array.c b/src/array.c index f515f5d26c024..c6cefbebceb20 100644 --- a/src/array.c +++ b/src/array.c @@ -497,7 +497,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) jl_ptls_t ptls = ct->ptls; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { -#ifndef MMTKHEAP +#ifndef MMTK_GC int pool_id = jl_gc_szclass_align8(allocsz); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; @@ -513,7 +513,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) else { if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); -#ifndef MMTKHEAP +#ifndef MMTK_GC s = jl_gc_big_alloc_noinline(ptls, allocsz); #else s = jl_mmtk_gc_alloc_big(ptls, allocsz); diff --git a/src/gc-common.c b/src/gc-common.c new file mode 100644 index 0000000000000..f5636c97fe32a --- /dev/null +++ b/src/gc-common.c @@ -0,0 +1,732 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include "gc.h" + +jl_gc_num_t gc_num = {0}; +size_t last_long_collect_interval; +int gc_n_threads; +jl_ptls_t* gc_all_tls_states; + +int64_t live_bytes = 0; + +JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0; + +// mutex for gc-heap-snapshot. +jl_mutex_t heapsnapshot_lock; + +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) +{ + return jl_buff_tag; +} + +// GC knobs and self-measurement variables + +int64_t last_gc_total_bytes = 0; + +// max_total_memory is a suggestion. We try very hard to stay +// under this limit, but we will go above it rather than halting. +#ifdef _P64 +typedef uint64_t memsize_t; +const size_t default_collect_interval = 5600 * 1024 * sizeof(void*); +const size_t max_collect_interval = 1250000000UL; +size_t total_mem; +// We expose this to the user/ci as jl_gc_set_max_memory +memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024; +#else +typedef uint32_t memsize_t; +const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); +const size_t max_collect_interval = 500000000UL; +// Work really hard to stay within 2GB +// Alternative is to risk running out of address space +// on 32 bit architectures. +memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024; +#endif + + +// finalizers +// --- +uint64_t finalizer_rngState[4]; + +JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void) +{ + jl_rng_split(finalizer_rngState, jl_current_task->rngState); +} + +void run_finalizer(jl_task_t *ct, void *o, void *ff) +{ + int ptr_finalizer = gc_ptr_tag(o, 1); + o = gc_ptr_clear_tag(o, 3); + if (ptr_finalizer) { + ((void (*)(void*))ff)((void*)o); + return; + } + JL_TRY { + size_t last_age = ct->world_age; + ct->world_age = jl_atomic_load_acquire(&jl_world_counter); + jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1); + ct->world_age = last_age; + } + JL_CATCH { + jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: "); + jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception()); + jl_printf((JL_STREAM*)STDERR_FILENO, "\n"); + jlbacktrace(); // written to STDERR_FILENO + } +} + +JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls) +{ + if (ptls == NULL) + ptls = jl_current_task->ptls; + return ptls->finalizers_inhibited; +} + +JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + ptls->finalizers_inhibited++; +} + +JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void) +{ + jl_task_t *ct = jl_current_task; +#ifdef NDEBUG + ct->ptls->finalizers_inhibited--; +#else + jl_gc_enable_finalizers(ct, 1); +#endif +} + +JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on) +{ + if (ct == NULL) + ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + int old_val = ptls->finalizers_inhibited; + int new_val = old_val + (on ? -1 : 1); + if (new_val < 0) { + JL_TRY { + jl_error(""); // get a backtrace + } + JL_CATCH { + jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n"); + // Only print the backtrace once, to avoid spamming the logs + static int backtrace_printed = 0; + if (backtrace_printed == 0) { + backtrace_printed = 1; + jlbacktrace(); // written to STDERR_FILENO + } + } + return; + } + ptls->finalizers_inhibited = new_val; + if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) { + jl_gc_run_pending_finalizers(ct); + } +} + + +// allocation +// --- + +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_(ptls, sz, ty); +} + +// Instrumented version of jl_gc_big_alloc_inner, called into by +// LLVM-generated code. +JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz) +{ + jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz); + + maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag); + return val; +} + +// This wrapper exists only to prevent `jl_gc_big_alloc_inner` from being +// inlined into its callers. We provide an external-facing interface for +// callers, and inline `jl_gc_big_alloc_inner` into this. (See +// https://github.com/JuliaLang/julia/pull/43868 for more details.) +jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) +{ + return jl_gc_big_alloc_inner(ptls, sz); +} + +// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code. +JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, int osize) +{ + jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize); + + maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag); + return val; +} + +// This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into +// its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner` +// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) +jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize) +{ + return jl_gc_pool_alloc_inner(ptls, pool_offset, osize); +} + +int jl_gc_classify_pools(size_t sz, int *osize) +{ + if (sz > GC_MAX_SZCLASS) + return -1; + size_t allocsz = sz + sizeof(jl_taggedvalue_t); + int klass = jl_gc_szclass(allocsz); + *osize = jl_gc_sizeclasses[klass]; + return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]); +} + +// TODO: jl_gc_track_malloced_array needed? Eliminate heap.mallocarrays, +// heap.mafreelist, mallocarray_t? +void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT +{ + // This is **NOT** a GC safe point. + mallocarray_t *ma; + if (ptls->heap.mafreelist == NULL) { + ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t)); + } + else { + ma = ptls->heap.mafreelist; + ptls->heap.mafreelist = ma->next; + } + ma->a = a; + ma->next = ptls->heap.mallocarrays; + ptls->heap.mallocarrays = ma; +} + +void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT +{ + jl_ptls_t ptls = jl_current_task->ptls; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); +} + + +// GCNum, statistics manipulation +// --- +void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT +{ + int gc_n_threads; + jl_ptls_t* gc_all_tls_states; + gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); + gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls = gc_all_tls_states[i]; + if (ptls) { + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc); + dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall); + } + } +} + +void reset_thread_gc_counts(void) JL_NOTSAFEPOINT +{ + int gc_n_threads; + jl_ptls_t* gc_all_tls_states; + gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); + gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls = gc_all_tls_states[i]; + if (ptls) { + memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + } + } +} + +void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT +{ + combine_thread_gc_counts(&gc_num); + live_bytes += (gc_num.deferred_alloc + gc_num.allocd); + gc_num.allocd = 0; + gc_num.deferred_alloc = 0; + reset_thread_gc_counts(); +} + +size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT +{ + size_t sz = 0; + int isbitsunion = jl_array_isbitsunion(a); + if (jl_array_ndims(a) == 1) + sz = a->elsize * a->maxsize + ((a->elsize == 1 && !isbitsunion) ? 1 : 0); + else + sz = a->elsize * jl_array_len(a); + if (isbitsunion) + // account for isbits Union array selector bytes + sz += jl_array_len(a); + return sz; +} + + +void gc_premark(jl_ptls_t ptls2) +{ + arraylist_t *remset = ptls2->heap.remset; + ptls2->heap.remset = ptls2->heap.last_remset; + ptls2->heap.last_remset = remset; + ptls2->heap.remset->len = 0; + ptls2->heap.remset_nptr = 0; + // avoid counting remembered objects + // in `perm_scanned_bytes` + size_t len = remset->len; + void **items = remset->items; + for (size_t i = 0; i < len; i++) { + jl_value_t *item = (jl_value_t *)items[i]; + objprofile_count(jl_typeof(item), 2, 0); + jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED; + } +} + + + +// GC control +// --- + +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) +{ + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + enable_collection(); + } + } + else if (prev && !on) { + disable_collection(); + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT +{ + jl_gc_num_t num = gc_num; + combine_thread_gc_counts(&num); + // Sync this logic with `base/util.jl:GC_Diff` + *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); +} + +JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) +{ + return gc_num.total_time; +} + +JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) +{ + jl_gc_num_t num = gc_num; + combine_thread_gc_counts(&num); + return num; +} + +JL_DLLEXPORT void jl_gc_reset_stats(void) +{ + gc_num.max_pause = 0; + gc_num.max_memory = 0; + gc_num.max_time_to_safepoint = 0; +} + +// TODO: these were supposed to be thread local +JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT +{ + int64_t oldtb = last_gc_total_bytes; + int64_t newtb; + jl_gc_get_total_bytes(&newtb); + last_gc_total_bytes = newtb; + return newtb - oldtb; +} + +JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT +{ + int64_t oldtb = last_gc_total_bytes; + int64_t newtb; + jl_gc_get_total_bytes(&newtb); + last_gc_total_bytes = newtb - offset; + return newtb - oldtb; +} + +JL_DLLEXPORT int64_t jl_gc_live_bytes(void) +{ + return live_bytes; +} + +JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) +{ + if (max_mem > 0 && max_mem < (uint64_t)1 << (sizeof(memsize_t) * 8 - 1)) + max_total_memory = max_mem; +} + +// callback for passing OOM errors from gmp +JL_DLLEXPORT void jl_throw_out_of_memory_error(void) +{ + jl_throw(jl_memory_exception); +} + +// allocation wrappers that save the size of allocations, to allow using +// jl_gc_counted_* functions with a libc-compatible API. + +JL_DLLEXPORT void *jl_malloc(size_t sz) +{ + int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); + if (p == NULL) + return NULL; + p[0] = sz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); + if (p == NULL) + return NULL; + p[0] = nmsz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +{ + if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) + return NULL; + return _unchecked_calloc(nm, sz); +} + +JL_DLLEXPORT void jl_free(void *p) +{ + if (p != NULL) { + int64_t *pp = (int64_t *)p - 2; + size_t sz = pp[0]; + jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); + } +} + +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +{ + int64_t *pp; + size_t szold; + if (p == NULL) { + pp = NULL; + szold = 0; + } + else { + pp = (int64_t *)p - 2; + szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; + } + int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); + if (pnew == NULL) + return NULL; + pnew[0] = sz; + return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +// allocating blocks for Arrays and Strings + +JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + maybe_collect(ptls); + size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *b = malloc_cache_align(allocsz); + if (b == NULL) + jl_throw(jl_memory_exception); +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + // jl_gc_managed_malloc is currently always used for allocating array buffers. + maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag); + return b; +} + +void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, + int isaligned, jl_value_t *owner, int8_t can_collect) +{ + if (can_collect) + maybe_collect(ptls); + + size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + + // TODO: not needed? gc_cache.*? + if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) { + ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz; + live_bytes += allocsz - oldsz; + } + else if (allocsz < oldsz) + jl_atomic_store_relaxed(&ptls->gc_num.freed, + jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz)); + else + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz)); + jl_atomic_store_relaxed(&ptls->gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); + + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *b; + if (isaligned) + b = realloc_cache_align(d, allocsz, oldsz); + else + b = realloc(d, allocsz); + if (b == NULL) + jl_throw(jl_memory_exception); +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag); + return b; +} + +JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, + int isaligned, jl_value_t *owner) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1); +} + +// Perm gen allocator +// 2M pool +#define GC_PERM_POOL_SIZE (2 * 1024 * 1024) +// 20k limit for pool allocation. At most 1% fragmentation +#define GC_PERM_POOL_LIMIT (20 * 1024) +uv_mutex_t gc_perm_lock; +static uintptr_t gc_perm_pool = 0; +static uintptr_t gc_perm_end = 0; + +static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT +{ + // `align` must be power of two + assert(offset == 0 || offset < align); + const size_t malloc_align = sizeof(void*) == 8 ? 16 : 4; + if (align > 1 && (offset != 0 || align > malloc_align)) + sz += align - 1; + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *base = zero ? calloc(1, sz) : malloc(sz); + if (base == NULL) + jl_throw(jl_memory_exception); +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + jl_may_leak(base); + assert(align > 0); + unsigned diff = (offset - (uintptr_t)base) % align; + return (void*)((char*)base + diff); +} + +STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT +{ + uintptr_t pool = LLT_ALIGN(gc_perm_pool + offset, (uintptr_t)align) - offset; + uintptr_t end = pool + sz; + if (end > gc_perm_end) + return NULL; + gc_perm_pool = end; + return (void*)jl_assume(pool); +} + +// **NOT** a safepoint +void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) +{ + // The caller should have acquired `gc_perm_lock` + assert(align < GC_PERM_POOL_LIMIT); +#ifndef MEMDEBUG + if (__unlikely(sz > GC_PERM_POOL_LIMIT)) +#endif + return gc_perm_alloc_large(sz, zero, align, offset); + void *ptr = gc_try_perm_alloc_pool(sz, align, offset); + if (__likely(ptr)) + return ptr; + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); + void *pool = VirtualAlloc(NULL, GC_PERM_POOL_SIZE, MEM_COMMIT, PAGE_READWRITE); + SetLastError(last_error); + errno = last_errno; + if (__unlikely(pool == NULL)) + return NULL; +#else + void *pool = mmap(0, GC_PERM_POOL_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + errno = last_errno; + if (__unlikely(pool == MAP_FAILED)) + return NULL; +#endif + gc_perm_pool = (uintptr_t)pool; + gc_perm_end = gc_perm_pool + GC_PERM_POOL_SIZE; + return gc_try_perm_alloc_pool(sz, align, offset); +} + +// **NOT** a safepoint +void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) +{ + assert(align < GC_PERM_POOL_LIMIT); +#ifndef MEMDEBUG + if (__unlikely(sz > GC_PERM_POOL_LIMIT)) +#endif + return gc_perm_alloc_large(sz, zero, align, offset); + uv_mutex_lock(&gc_perm_lock); + void *p = jl_gc_perm_alloc_nolock(sz, zero, align, offset); + uv_mutex_unlock(&gc_perm_lock); + return p; +} + +JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) +{ + jl_ptls_t ptls = jl_current_task->ptls; + jl_gc_add_finalizer_th(ptls, v, f); +} + +JL_DLLEXPORT void jl_finalize(jl_value_t *o) +{ + jl_finalize_th(jl_current_task, o); +} + +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_new_weakref_th(ptls, value); +} + +JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sz, NULL); +} + +JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, 0, NULL); +} + +JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sizeof(void*), NULL); +} + +JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL); +} + +JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL); +} + +JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void) +{ + // TODO: meaningful for MMTk? + return GC_MAX_SZCLASS; +} + +JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) +{ + return sizeof(bigval_t); +} + + +JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc(ptls, sz, ty); +} + +JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +{ + // TODO: correct for MMTk? + arraylist_push(&ptls->sweep_objs, obj); +} + + +// gc-debug common functions +// --- + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + else if (vt->name == jl_array_typename) { + jl_array_t *a = (jl_array_t*)obj; + start = (char*)a->data; + len = jl_array_len(a); + elsize = a->elsize; + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + +static int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +#ifdef __cplusplus +} +#endif diff --git a/src/gc-debug.c b/src/gc-debug.c index a233b18d7dcfc..c5ab21a3fb3c1 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#ifndef MMTK_GC + #include "gc.h" #include #include @@ -1231,43 +1233,6 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - else if (vt->name == jl_array_typename) { - jl_array_t *a = (jl_array_t*)obj; - start = (char*)a->data; - len = jl_array_len(a); - elsize = a->elsize; - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - // Print a backtrace from the `mq->start` of the mark queue up to `mq->current` // `offset` will be added to `mq->current` for convenience in the debugger. NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_markqueue_t *mq, int offset) @@ -1292,12 +1257,6 @@ NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_markqueue_t *mq, int off jl_set_safe_restore(old_buf); } -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; @@ -1312,3 +1271,5 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc-pages.c b/src/gc-pages.c index d579eb0cd4fbb..e367334450863 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#ifndef MMTK_GC + #include "gc.h" #ifndef _OS_WINDOWS_ # include @@ -335,3 +337,5 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc.c b/src/gc.c index cab7c37369450..e656fa331be38 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#ifndef MMTK_GC + #include "gc.h" #include "julia_gcext.h" #include "julia_assert.h" @@ -7,10 +9,6 @@ #include // for malloc_trim #endif -#ifdef MMTKHEAP -#include "mmtk_julia.h" -#endif - #ifdef __cplusplus extern "C" { #endif @@ -123,9 +121,6 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_fre static jl_mutex_t finalizers_lock; static uv_mutex_t gc_cache_lock; -// mutex for gc-heap-snapshot. -jl_mutex_t heapsnapshot_lock; - // Flag that tells us whether we need to support conservative marking // of objects. static _Atomic(int) support_conservative_marking = 0; @@ -162,16 +157,6 @@ static _Atomic(int) support_conservative_marking = 0; * finalizers in unmanaged (GC safe) mode. */ -jl_gc_num_t gc_num = {0}; -static size_t last_long_collect_interval; -int gc_n_threads; -jl_ptls_t* gc_all_tls_states; -const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 -JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) -{ - return jl_buff_tag; -} - pagetable_t memory_map; // List of marked big objects. Not per-thread. Accessed only by master thread. @@ -185,7 +170,6 @@ bigval_t *big_objects_marked = NULL; // `to_finalize` should not have tagged pointers. arraylist_t finalizer_list_marked; arraylist_t to_finalize; -JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0; NOINLINE uintptr_t gc_get_stack_ptr(void) { @@ -215,31 +199,26 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads) } -void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads); - // malloc wrappers, aligned allocation #if defined(_OS_WINDOWS_) -STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) +inline void *jl_malloc_aligned(size_t sz, size_t align) { return _aligned_malloc(sz ? sz : 1, align); } -STATIC_INLINE void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, +inline void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, size_t align) { (void)oldsz; return _aligned_realloc(p, sz ? sz : 1, align); } -STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT { _aligned_free(p); } #else -STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) +inline void *jl_malloc_aligned(size_t sz, size_t align) { -#ifdef MMTKHEAP - return mmtk_malloc_aligned(sz, align); -#endif #if defined(_P64) || defined(__APPLE__) if (align <= 16) return malloc(sz); @@ -249,17 +228,9 @@ STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) return NULL; return ptr; } -STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, +inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align) { -#ifdef MMTKHEAP - void *res = jl_malloc_aligned(sz, align); - if (res != NULL) { - memcpy(res, d, oldsz > sz ? sz : oldsz); - mmtk_free_aligned(d); - } - return res; -#endif #if defined(_P64) || defined(__APPLE__) if (align <= 16) return realloc(d, sz); @@ -271,17 +242,11 @@ STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, } return b; } -STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT { -#ifdef MMTKHEAP - mmtk_free_aligned(p); -#else free(p); -#endif } #endif -#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT) -#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT) static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT { @@ -292,31 +257,6 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1); } -#ifndef MMTKHEAP -static -#endif -void run_finalizer(jl_task_t *ct, void *o, void *ff) -{ - int ptr_finalizer = gc_ptr_tag(o, 1); - o = gc_ptr_clear_tag(o, 3); - if (ptr_finalizer) { - ((void (*)(void*))ff)((void*)o); - return; - } - JL_TRY { - size_t last_age = ct->world_age; - ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1); - ct->world_age = last_age; - } - JL_CATCH { - jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: "); - jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception()); - jl_printf((JL_STREAM*)STDERR_FILENO, "\n"); - jlbacktrace(); // written to STDERR_FILENO - } -} - // if `need_sync` is true, the `list` is the `finalizers` list of another // thread and we need additional synchronizations static void finalize_object(arraylist_t *list, jl_value_t *o, @@ -404,24 +344,8 @@ static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NO ct->sticky = sticky; } -#ifndef MMTKHEAP -static -#endif -uint64_t finalizer_rngState[4]; - -void jl_rng_split(uint64_t to[4], uint64_t from[4]) JL_NOTSAFEPOINT; - -JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void) -{ - jl_rng_split(finalizer_rngState, jl_current_task->rngState); -} - static void run_finalizers(jl_task_t *ct) { -#ifdef MMTKHEAP - mmtk_jl_run_finalizers(ct->ptls); - return; -#endif // Racy fast path: // The race here should be OK since the race can only happen if // another thread is writing to it with the lock held. In such case, @@ -460,67 +384,12 @@ JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) { if (ct == NULL) ct = jl_current_task; -#ifdef MMTKHEAP - mmtk_jl_run_pending_finalizers(ct->ptls); - return; -#endif jl_ptls_t ptls = ct->ptls; if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) { run_finalizers(ct); } } -JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls) -{ - if (ptls == NULL) - ptls = jl_current_task->ptls; - return ptls->finalizers_inhibited; -} - -JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - ptls->finalizers_inhibited++; -} - -JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void) -{ - jl_task_t *ct = jl_current_task; -#ifdef NDEBUG - ct->ptls->finalizers_inhibited--; -#else - jl_gc_enable_finalizers(ct, 1); -#endif -} - -JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on) -{ - if (ct == NULL) - ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - int old_val = ptls->finalizers_inhibited; - int new_val = old_val + (on ? -1 : 1); - if (new_val < 0) { - JL_TRY { - jl_error(""); // get a backtrace - } - JL_CATCH { - jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n"); - // Only print the backtrace once, to avoid spamming the logs - static int backtrace_printed = 0; - if (backtrace_printed == 0) { - backtrace_printed = 1; - jlbacktrace(); // written to STDERR_FILENO - } - } - return; - } - ptls->finalizers_inhibited = new_val; - if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) { - jl_gc_run_pending_finalizers(ct); - } -} - static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT { void **items = flist->items; @@ -537,6 +406,7 @@ static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT void jl_gc_run_all_finalizers(jl_task_t *ct) { + if (!ct) return; int gc_n_threads; jl_ptls_t* gc_all_tls_states; gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); @@ -554,10 +424,6 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT { -#ifdef MMTKHEAP - register_finalizer(v, f, 0); - return; -#endif assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); arraylist_t *a = &ptls->finalizers; // This acquire load and the release store at the end are used to @@ -586,20 +452,14 @@ void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT { -#ifndef MMTKHEAP jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); -#else - register_finalizer(v, f, 1); -#endif } // schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT { -#ifndef MMTKHEAP assert(!gc_ptr_tag(v, 3)); jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f); -#endif } JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT @@ -614,10 +474,6 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) { -#ifdef MMTKHEAP - run_finalizers_for_obj(o); - return; -#endif JL_LOCK_NOGC(&finalizers_lock); // Copy the finalizers into a temporary list so that code in the finalizer // won't change the list as we loop through them. @@ -677,28 +533,6 @@ static void gc_sweep_foreign_objs(void) } } -// GC knobs and self-measurement variables -static int64_t last_gc_total_bytes = 0; - -// max_total_memory is a suggestion. We try very hard to stay -// under this limit, but we will go above it rather than halting. -#ifdef _P64 -typedef uint64_t memsize_t; -static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*); -static const size_t max_collect_interval = 1250000000UL; -static size_t total_mem; -// We expose this to the user/ci as jl_gc_set_max_memory -static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024; -#else -typedef uint32_t memsize_t; -static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); -static const size_t max_collect_interval = 500000000UL; -// Work really hard to stay within 2GB -// Alternative is to risk running out of address space -// on 32 bit architectures. -static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024; -#endif - // global variables for GC stats // Resetting the object to a young object, this is used when marking the @@ -761,7 +595,7 @@ int prev_sweep_full = 1; #define inc_sat(v,s) v = (v) >= s ? s : (v)+1 // Full collection heuristics -static int64_t live_bytes = 0; +extern int64_t live_bytes; static int64_t promoted_bytes = 0; static int64_t last_live_bytes = 0; // live_bytes at last collection static int64_t t_start = 0; // Time GC starts; @@ -977,18 +811,14 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT jl_gc_queue_root(v); } -STATIC_INLINE void maybe_collect(jl_ptls_t ptls) +inline void maybe_collect(jl_ptls_t ptls) { -#ifndef MMTKHEAP if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) { jl_gc_collect(JL_GC_AUTO); } else { jl_gc_safepoint_(ptls); } -#else - mmtk_gc_poll(ptls); -#endif } // weak references @@ -999,11 +829,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here -#ifdef MMTKHEAP - mmtk_add_weak_candidate(wr); -#else arraylist_push(&ptls->heap.weak_refs, wr); -#endif return wr; } @@ -1057,7 +883,7 @@ static void sweep_weak_refs(void) // big value list // Size includes the tag and the tag is not cleared!! -STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) +inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) { maybe_collect(ptls); size_t offs = offsetof(bigval_t, header); @@ -1085,21 +911,6 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) return jl_valueof(&v->header); } -// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code. -JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz) -{ - jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz); - maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag); - return val; -} - -// This wrapper exists only to prevent `jl_gc_big_alloc_inner` from being inlined into -// its callers. We provide an external-facing interface for callers, and inline `jl_gc_big_alloc_inner` -// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) -jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) { - return jl_gc_big_alloc_inner(ptls, sz); -} - // Sweep list rooted at *pv, removing and freeing any unmarked objects. // Return pointer to last `next` field in the culled list. static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT @@ -1166,108 +977,14 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT gc_time_big_end(); } -// tracking Arrays with malloc'd storage - -void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT -{ - // This is **NOT** a GC safe point. - mallocarray_t *ma; - if (ptls->heap.mafreelist == NULL) { - ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t)); - } - else { - ma = ptls->heap.mafreelist; - ptls->heap.mafreelist = ma->next; - } - ma->a = a; - ma->next = ptls->heap.mallocarrays; - ptls->heap.mallocarrays = ma; -} - -void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT -{ - jl_ptls_t ptls = jl_current_task->ptls; - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); -} - -static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT -{ - int gc_n_threads; - jl_ptls_t* gc_all_tls_states; - gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); - gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); - for (int i = 0; i < gc_n_threads; i++) { - jl_ptls_t ptls = gc_all_tls_states[i]; - if (ptls) { - dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed); - dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc); - dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc); - dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc); - dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc); - dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall); - } - } -} - -static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT -{ - int gc_n_threads; - jl_ptls_t* gc_all_tls_states; - gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); - gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); - for (int i = 0; i < gc_n_threads; i++) { - jl_ptls_t ptls = gc_all_tls_states[i]; - if (ptls != NULL) { - memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); - } - } -} - -void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT -{ - combine_thread_gc_counts(&gc_num); - live_bytes += (gc_num.deferred_alloc + gc_num.allocd); - gc_num.allocd = 0; - gc_num.deferred_alloc = 0; - reset_thread_gc_counts(); -} - -size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT -{ - size_t sz = 0; - int isbitsunion = jl_array_isbitsunion(a); - if (jl_array_ndims(a) == 1) - sz = a->elsize * a->maxsize + ((a->elsize == 1 && !isbitsunion) ? 1 : 0); - else - sz = a->elsize * jl_array_len(a); - if (isbitsunion) - // account for isbits Union array selector bytes - sz += jl_array_len(a); - return sz; -} - -#ifndef MMTKHEAP -static -#endif -void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT +static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT { if (a->flags.how == 2) { char *d = (char*)a->data - a->offset*a->elsize; -#ifndef MMTKHEAP if (a->flags.isaligned) jl_free_aligned(d); else free(d); -#else - if (a->flags.isaligned) - mmtk_free_aligned(d); - else { - mmtk_free(d); - } -#endif gc_num.freed += jl_array_nbytes(a); gc_num.freecall++; } @@ -1351,7 +1068,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT } // Size includes the tag and the tag is not cleared!! -STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, +inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize) { // Use the pool offset instead of the pool address as the argument @@ -1409,32 +1126,6 @@ STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset return jl_valueof(v); } -// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code. -JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, - int osize) -{ - jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize); - maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag); - return val; -} - -// This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into -// its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner` -// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) -jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize) { - return jl_gc_pool_alloc_inner(ptls, pool_offset, osize); -} - -int jl_gc_classify_pools(size_t sz, int *osize) -{ - if (sz > GC_MAX_SZCLASS) - return -1; - size_t allocsz = sz + sizeof(jl_taggedvalue_t); - int klass = jl_gc_szclass(allocsz); - *osize = jl_gc_sizeclasses[klass]; - return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]); -} - // sweep phase int64_t lazy_freed_pages = 0; @@ -1743,7 +1434,6 @@ static void gc_sweep_perm_alloc(void) JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) { -#ifndef MMTKHEAP jl_ptls_t ptls = jl_current_task->ptls; jl_taggedvalue_t *o = jl_astaggedvalue(ptr); // The modification of the `gc_bits` is not atomic but it @@ -1753,7 +1443,6 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) o->bits.gc = GC_MARKED; arraylist_push(ptls->heap.remset, (jl_value_t*)ptr); ptls->heap.remset_nptr++; // conservative -#endif } void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT @@ -2639,27 +2328,6 @@ JL_EXTENSION NOINLINE void gc_mark_loop(jl_ptls_t ptls) gc_drain_own_chunkqueue(ptls, &ptls->mark_queue); } -#ifndef MMTKHEAP -static -#endif -void gc_premark(jl_ptls_t ptls2) -{ - arraylist_t *remset = ptls2->heap.remset; - ptls2->heap.remset = ptls2->heap.last_remset; - ptls2->heap.last_remset = remset; - ptls2->heap.remset->len = 0; - ptls2->heap.remset_nptr = 0; - // avoid counting remembered objects - // in `perm_scanned_bytes` - size_t len = remset->len; - void **items = remset->items; - for (size_t i = 0; i < len; i++) { - jl_value_t *item = (jl_value_t *)items[i]; - objprofile_count(jl_typeof(item), 2, 0); - jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED; - } -} - static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2) { jl_task_t *task; @@ -2797,93 +2465,6 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -// collector entry point and control -static _Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) -{ - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; -#ifdef MMTKHEAP - enable_collection(); -#endif - } - } - else if (prev && !on) { -#ifdef MMTKHEAP - disable_collection(); -#endif - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; -} - -JL_DLLEXPORT int jl_gc_is_enabled(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - -JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT -{ - jl_gc_num_t num = gc_num; - combine_thread_gc_counts(&num); - // Sync this logic with `base/util.jl:GC_Diff` - *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); -} - -JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) -{ - return gc_num.total_time; -} - -JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) -{ - jl_gc_num_t num = gc_num; - combine_thread_gc_counts(&num); - return num; -} - -JL_DLLEXPORT void jl_gc_reset_stats(void) -{ - gc_num.max_pause = 0; - gc_num.max_memory = 0; - gc_num.max_time_to_safepoint = 0; -} - -// TODO: these were supposed to be thread local -JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT -{ - int64_t oldtb = last_gc_total_bytes; - int64_t newtb; - jl_gc_get_total_bytes(&newtb); - last_gc_total_bytes = newtb; - return newtb - oldtb; -} - -JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT -{ - int64_t oldtb = last_gc_total_bytes; - int64_t newtb; - jl_gc_get_total_bytes(&newtb); - last_gc_total_bytes = newtb - offset; - return newtb - oldtb; -} - -JL_DLLEXPORT int64_t jl_gc_live_bytes(void) -{ - return live_bytes; -} - size_t jl_maxrss(void); // Only one thread should be running in this function @@ -3165,10 +2746,6 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; } -#ifdef MMTKHEAP - handle_user_collection_request(ptls); - return; -#endif jl_gc_debug_print(); int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state); @@ -3258,11 +2835,6 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) // allocator entry points -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc_(ptls, sz, ty); -} - // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { @@ -3302,10 +2874,6 @@ void jl_init_thread_heap(jl_ptls_t ptls) memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); -#ifdef MMTKHEAP - MMTk_Mutator mmtk_mutator = bind_mutator((void *)ptls, ptls->tid); - ptls->mmtk_mutator_ptr = ((MMTkMutatorContext*)mmtk_mutator); -#endif } // System-wide initializations @@ -3344,67 +2912,9 @@ void jl_gc_init(void) if (high_water_mark < max_total_memory) max_total_memory = high_water_mark; - -#ifdef MMTKHEAP - long long min_heap_size; - long long max_heap_size; - char* min_size_def = getenv("MMTK_MIN_HSIZE"); - char* min_size_gb = getenv("MMTK_MIN_HSIZE_G"); - - char* max_size_def = getenv("MMTK_MAX_HSIZE"); - char* max_size_gb = getenv("MMTK_MAX_HSIZE_G"); - - // default min heap currently set as Julia's default_collect_interval - if (min_size_def != NULL) { - char *p; - double min_size = strtod(min_size_def, &p); - min_heap_size = (long) 1024 * 1024 * min_size; - } else if (min_size_gb != NULL) { - char *p; - double min_size = strtod(min_size_gb, &p); - min_heap_size = (long) 1024 * 1024 * 1024 * min_size; - } else { - min_heap_size = default_collect_interval; - } - - // default max heap currently set as 70% the free memory in the system - if (max_size_def != NULL) { - char *p; - double max_size = strtod(max_size_def, &p); - max_heap_size = (long) 1024 * 1024 * max_size; - } else if (max_size_gb != NULL) { - char *p; - double max_size = strtod(max_size_gb, &p); - max_heap_size = (long) 1024 * 1024 * 1024 * max_size; - } else { - max_heap_size = uv_get_free_memory() * 70 / 100; - } - - // if only max size is specified initialize MMTk with a fixed size heap - if (max_size_def != NULL || max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL)) { - gc_init(0, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); - } else { - gc_init(min_heap_size, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); - } - -#endif t_start = jl_hrtime(); } -JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) -{ - if (max_mem > 0 - && max_mem < (uint64_t)1 << (sizeof(memsize_t) * 8 - 1)) { - max_total_memory = max_mem; - } -} - -// callback for passing OOM errors from gmp -JL_DLLEXPORT void jl_throw_out_of_memory_error(void) -{ - jl_throw(jl_memory_exception); -} - // allocation wrappers that track allocation and let collection run JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) @@ -3418,9 +2928,6 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); -#ifdef MMTKHEAP - return mmtk_counted_malloc(sz); -#endif } return malloc(sz); } @@ -3436,9 +2943,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); -#ifdef MMTKHEAP - return mmtk_counted_calloc(nm, sz); -#endif } return calloc(nm, sz); } @@ -3447,18 +2951,14 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); jl_task_t *ct = jl_current_task; + free(p); if (pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; jl_atomic_store_relaxed(&ptls->gc_num.freed, jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); jl_atomic_store_relaxed(&ptls->gc_num.freecall, jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); -#ifdef MMTKHEAP - mmtk_free_with_size(p, sz); - return; -#endif } - free(p); } JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) @@ -3476,151 +2976,12 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); jl_atomic_store_relaxed(&ptls->gc_num.realloc, jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); -#ifdef MMTKHEAP - return mmtk_realloc_with_old_size(p, sz, old); -#endif } return realloc(p, sz); } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) -{ - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) -{ - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); -} - -JL_DLLEXPORT void jl_free(void *p) -{ - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } -} - -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) -{ - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -// allocating blocks for Arrays and Strings - -JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - maybe_collect(ptls); - size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - int last_errno = errno; -#ifdef _OS_WINDOWS_ - DWORD last_error = GetLastError(); -#endif - void *b = malloc_cache_align(allocsz); - if (b == NULL) - jl_throw(jl_memory_exception); -#ifdef _OS_WINDOWS_ - SetLastError(last_error); -#endif - errno = last_errno; - // jl_gc_managed_malloc is currently always used for allocating array buffers. - maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag); - return b; -} - -static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, - int isaligned, jl_value_t *owner, int8_t can_collect) -{ - if (can_collect) - maybe_collect(ptls); - - size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - - if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) { - ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz; - live_bytes += allocsz - oldsz; - } - else if (allocsz < oldsz) - jl_atomic_store_relaxed(&ptls->gc_num.freed, - jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz)); - else - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz)); - jl_atomic_store_relaxed(&ptls->gc_num.realloc, - jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); - - int last_errno = errno; -#ifdef _OS_WINDOWS_ - DWORD last_error = GetLastError(); -#endif - void *b; - if (isaligned) - b = realloc_cache_align(d, allocsz, oldsz); - else - b = realloc(d, allocsz); - if (b == NULL) - jl_throw(jl_memory_exception); -#ifdef _OS_WINDOWS_ - SetLastError(last_error); -#endif - errno = last_errno; - maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag); - return b; -} - -JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, - int isaligned, jl_value_t *owner) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1); -} - jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) { -#ifndef MMTKHEAP size_t len = jl_string_len(s); if (sz <= len) return s; jl_taggedvalue_t *v = jl_astaggedvalue(s); @@ -3654,148 +3015,6 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) jl_value_t *snew = jl_valueof(&newbig->header); *(size_t*)snew = sz; return snew; -#else - size_t len = jl_string_len(s); - jl_value_t *snew = jl_alloc_string(sz); - memcpy(jl_string_data(snew), jl_string_data(s), sz <= len ? sz : len); - return snew; -#endif -} - -// Perm gen allocator -// 2M pool -#define GC_PERM_POOL_SIZE (2 * 1024 * 1024) -// 20k limit for pool allocation. At most 1% fragmentation -#define GC_PERM_POOL_LIMIT (20 * 1024) -uv_mutex_t gc_perm_lock; -static uintptr_t gc_perm_pool = 0; -static uintptr_t gc_perm_end = 0; - -static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT -{ - // `align` must be power of two - assert(offset == 0 || offset < align); - const size_t malloc_align = sizeof(void*) == 8 ? 16 : 4; - if (align > 1 && (offset != 0 || align > malloc_align)) - sz += align - 1; - int last_errno = errno; -#ifdef _OS_WINDOWS_ - DWORD last_error = GetLastError(); -#endif - void *base = zero ? calloc(1, sz) : malloc(sz); - if (base == NULL) - jl_throw(jl_memory_exception); -#ifdef _OS_WINDOWS_ - SetLastError(last_error); -#endif - errno = last_errno; - jl_may_leak(base); - assert(align > 0); - unsigned diff = (offset - (uintptr_t)base) % align; - return (void*)((char*)base + diff); -} - -STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT -{ - uintptr_t pool = LLT_ALIGN(gc_perm_pool + offset, (uintptr_t)align) - offset; - uintptr_t end = pool + sz; - if (end > gc_perm_end) - return NULL; - gc_perm_pool = end; - return (void*)jl_assume(pool); -} - -// **NOT** a safepoint -void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) -{ - // The caller should have acquired `gc_perm_lock` - assert(align < GC_PERM_POOL_LIMIT); -#ifndef MEMDEBUG - if (__unlikely(sz > GC_PERM_POOL_LIMIT)) -#endif - return gc_perm_alloc_large(sz, zero, align, offset); - void *ptr = gc_try_perm_alloc_pool(sz, align, offset); - if (__likely(ptr)) - return ptr; - int last_errno = errno; -#ifdef _OS_WINDOWS_ - DWORD last_error = GetLastError(); - void *pool = VirtualAlloc(NULL, GC_PERM_POOL_SIZE, MEM_COMMIT, PAGE_READWRITE); - SetLastError(last_error); - errno = last_errno; - if (__unlikely(pool == NULL)) - return NULL; -#else - void *pool = mmap(0, GC_PERM_POOL_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - errno = last_errno; - if (__unlikely(pool == MAP_FAILED)) - return NULL; -#endif - gc_perm_pool = (uintptr_t)pool; - gc_perm_end = gc_perm_pool + GC_PERM_POOL_SIZE; - return gc_try_perm_alloc_pool(sz, align, offset); -} - -// **NOT** a safepoint -void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) -{ - assert(align < GC_PERM_POOL_LIMIT); -#ifndef MEMDEBUG - if (__unlikely(sz > GC_PERM_POOL_LIMIT)) -#endif - return gc_perm_alloc_large(sz, zero, align, offset); - uv_mutex_lock(&gc_perm_lock); - void *p = jl_gc_perm_alloc_nolock(sz, zero, align, offset); - uv_mutex_unlock(&gc_perm_lock); - return p; -} - -JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) -{ - jl_ptls_t ptls = jl_current_task->ptls; - jl_gc_add_finalizer_th(ptls, v, f); -} - -JL_DLLEXPORT void jl_finalize(jl_value_t *o) -{ - jl_finalize_th(jl_current_task, o); -} - -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - -JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sz, NULL); -} - -JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, 0, NULL); -} - -JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sizeof(void*), NULL); -} - -JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL); -} - -JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL); } JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) @@ -3915,27 +3134,16 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) return NULL; } -JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void) -{ - return GC_MAX_SZCLASS; -} - -JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) -{ - return sizeof(bigval_t); -} - - -JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +// added for MMTk integration +void enable_collection(void) { - return jl_gc_alloc(ptls, sz, ty); } - -JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +void disable_collection(void) { - arraylist_push(&ptls->sweep_objs, obj); } #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc.h b/src/gc.h index 930f7f3c30594..1db0211eb6c68 100644 --- a/src/gc.h +++ b/src/gc.h @@ -4,6 +4,7 @@ allocation and garbage collection . non-moving, precise mark and sweep collector . pool-allocates small objects, keeps big objects on a simple list + MMTk alternative */ #ifndef JL_GC_H @@ -27,36 +28,48 @@ #include "gc-heap-snapshot.h" #include "gc-alloc-profiler.h" -#ifdef __cplusplus -extern "C" { +// interface from and to gc-common.c +extern void maybe_collect(jl_ptls_t ptls); +extern void run_finalizer(jl_task_t *ct, void *o, void *ff); +extern void *jl_malloc_aligned(size_t sz, size_t align); +extern void *jl_gc_counted_calloc(size_t nm, size_t sz); +extern void jl_gc_counted_free_with_size(void *p, size_t sz); +extern void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); +extern void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align); +extern void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f); +extern void jl_finalize_th(jl_task_t *ct, jl_value_t *o); +extern jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value); +extern jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz); +extern jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize); +extern void jl_rng_split(uint64_t to[4], uint64_t from[4]); +extern void gc_premark(jl_ptls_t ptls2); +extern void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, + int isaligned, jl_value_t *owner, int8_t can_collect); +extern size_t jl_array_nbytes(jl_array_t *a); +extern void objprofile_count(void *ty, int old, int sz); + +#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT) +#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT) + +// common types and globals +#ifdef _P64 +typedef uint64_t memsize_t; +#else +typedef uint32_t memsize_t; #endif -#define GC_PAGE_LG2 14 // log2(size of a page) -#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k -#define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT)) - -#define jl_malloc_tag ((void*)0xdeadaa01) -#define jl_singleton_tag ((void*)0xdeadaa02) - -// Used by GC_DEBUG_ENV -typedef struct { - uint64_t num; - uint64_t next; - uint64_t min; - uint64_t interv; - uint64_t max; - unsigned short random[3]; -} jl_alloc_num_t; - -typedef struct { - int always_full; - int wait_for_debugger; - jl_alloc_num_t pool; - jl_alloc_num_t other; - jl_alloc_num_t print; -} jl_gc_debug_env_t; +extern const size_t default_collect_interval; +extern const size_t max_collect_interval; +extern size_t last_long_collect_interval; +extern size_t total_mem; +extern memsize_t max_total_memory; +extern _Atomic(uint32_t) jl_gc_disable_counter; +extern jl_mutex_t heapsnapshot_lock; +extern uint64_t finalizer_rngState[]; +extern int gc_n_threads; +extern jl_ptls_t* gc_all_tls_states; -// This struct must be kept in sync with the Julia type of the same name in base/timing.jl +// keep in sync with the Julia type of the same name in base/timing.jl typedef struct { int64_t allocd; int64_t deferred_alloc; @@ -82,29 +95,18 @@ typedef struct { uint64_t total_mark_time; } jl_gc_num_t; -typedef enum { - GC_empty_chunk, - GC_objary_chunk, - GC_ary8_chunk, - GC_ary16_chunk, - GC_finlist_chunk, -} gc_chunk_id_t; +extern jl_gc_num_t gc_num; -typedef struct _jl_gc_chunk_t { - gc_chunk_id_t cid; - struct _jl_value_t *parent; - struct _jl_value_t **begin; - struct _jl_value_t **end; - void *elem_begin; - void *elem_end; - uint32_t step; - uintptr_t nptr; -} jl_gc_chunk_t; +// data structure for tracking malloc'd arrays. +typedef struct _mallocarray_t { + jl_array_t *a; + struct _mallocarray_t *next; +} mallocarray_t; -#define MAX_REFS_AT_ONCE (1 << 16) +extern void combine_thread_gc_counts(jl_gc_num_t *dest); +extern void reset_thread_gc_counts(void); // layout for big (>2k) objects - JL_EXTENSION typedef struct _bigval_t { struct _bigval_t *next; struct _bigval_t **prev; // pointer to the next field of the prev entry @@ -129,12 +131,111 @@ JL_EXTENSION typedef struct _bigval_t { // must be 64-byte aligned here, in 32 & 64 bit modes } bigval_t; -// data structure for tracking malloc'd arrays. +STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT +{ + return ((uintptr_t)v) & mask; +} -typedef struct _mallocarray_t { - jl_array_t *a; - struct _mallocarray_t *next; -} mallocarray_t; +STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT +{ + return (void*)(((uintptr_t)v) & ~mask); +} + +STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT +{ + return (bits & GC_MARKED) != 0; +} + +#ifdef GC_VERIFY +#error "GC_VERIFY is unsupported with MMTk" +#endif + +#ifdef MEMFENCE +#error "MEMFENCE is unsupported with MMTk" +#endif + +#ifdef GC_DEBUG_ENV +#error "GC_DEBUG_ENV is unsupported with MMTk" +#endif + +#ifdef GC_FINAL_STATS +#error "GC_FINAL_STATS is currently unsupported with MMTk" +#endif + +#ifdef GC_TIME +#error "GC_TIME is currently unsupported with MMTk" +#endif + +#ifdef MEMPROFILE +#error "MEMPROFILE is not supported with MMTk" +#endif + +#ifdef OBJPROFILE +#ifdef MMTK_GC +#warning "OBJPROFILE is unsupported with MMTk; disabling" +#undef OBJPROFILE +#endif +#endif + + +#ifdef MMTK_GC +#include "mmtk.h" + +typedef struct { + char c; +} jl_gc_pagemeta_t; + +#else // !MMTK_GC + +#ifdef __cplusplus +extern "C" { +#endif + +#define GC_PAGE_LG2 14 // log2(size of a page) +#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k +#define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT)) + +#define jl_malloc_tag ((void*)0xdeadaa01) +#define jl_singleton_tag ((void*)0xdeadaa02) + +// Used by GC_DEBUG_ENV +typedef struct { + uint64_t num; + uint64_t next; + uint64_t min; + uint64_t interv; + uint64_t max; + unsigned short random[3]; +} jl_alloc_num_t; + +typedef struct { + int always_full; + int wait_for_debugger; + jl_alloc_num_t pool; + jl_alloc_num_t other; + jl_alloc_num_t print; +} jl_gc_debug_env_t; + +typedef enum { + GC_empty_chunk, + GC_objary_chunk, + GC_ary8_chunk, + GC_ary16_chunk, + GC_finlist_chunk, +} gc_chunk_id_t; + +typedef struct _jl_gc_chunk_t { + gc_chunk_id_t cid; + struct _jl_value_t *parent; + struct _jl_value_t **begin; + struct _jl_value_t **end; + void *elem_begin; + void *elem_end; + uint32_t step; + uintptr_t nptr; +} jl_gc_chunk_t; + +#define MAX_REFS_AT_ONCE (1 << 16) // pool page metadata typedef struct { @@ -250,14 +351,11 @@ STATIC_INLINE unsigned ffs_u32(uint32_t bitvec) } #endif -extern jl_gc_num_t gc_num; extern pagetable_t memory_map; extern bigval_t *big_objects_marked; extern arraylist_t finalizer_list_marked; extern arraylist_t to_finalize; extern int64_t lazy_freed_pages; -extern int gc_n_threads; -extern jl_ptls_t* gc_all_tls_states; STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT { @@ -280,11 +378,6 @@ STATIC_INLINE jl_taggedvalue_t *page_pfl_end(jl_gc_pagemeta_t *p) JL_NOTSAFEPOIN return (jl_taggedvalue_t*)(p->data + p->fl_end_offset); } -STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT -{ - return (bits & GC_MARKED) != 0; -} - STATIC_INLINE int gc_old(uintptr_t bits) JL_NOTSAFEPOINT { return (bits & GC_OLD) != 0; @@ -295,16 +388,6 @@ STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT return (tag & ~(uintptr_t)3) | bits; } -STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT -{ - return ((uintptr_t)v) & mask; -} - -STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT -{ - return (void*)(((uintptr_t)v) & ~mask); -} - NOINLINE uintptr_t gc_get_stack_ptr(void); STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT @@ -538,24 +621,6 @@ static inline void gc_scrub(void) } #endif -#ifdef OBJPROFILE -void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT; -void objprofile_printall(void); -void objprofile_reset(void); -#else -static inline void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT -{ -} - -static inline void objprofile_printall(void) -{ -} - -static inline void objprofile_reset(void) -{ -} -#endif - #ifdef MEMPROFILE void gc_stats_all_pool(void); void gc_stats_big_obj(void); @@ -567,8 +632,6 @@ void gc_stats_big_obj(void); // For debugging void gc_count_pool(void); -size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT; - JL_DLLEXPORT void jl_enable_gc_logging(int enable); void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT; @@ -576,4 +639,6 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect } #endif +#endif // !MMTK_GC + #endif diff --git a/src/init.c b/src/init.c index 45d6b8ee98873..2bfdebe00dfaf 100644 --- a/src/init.c +++ b/src/init.c @@ -295,12 +295,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER JL_STDOUT = (uv_stream_t*) STDOUT_FILENO; JL_STDERR = (uv_stream_t*) STDERR_FILENO; -#ifndef MMTKHEAP - if (ct) - jl_gc_run_all_finalizers(ct); -#else - mmtk_jl_gc_run_all_finalizers(); -#endif + jl_gc_run_all_finalizers(ct); uv_loop_t *loop = jl_global_event_loop(); if (loop != NULL) { @@ -811,11 +806,9 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) jl_ptls_t ptls = jl_init_threadtls(0); -#ifdef MMTKHEAP - // start MMTk's GC - initialize_collection((void*) ptls); +#ifdef MMTK_GC + initialize_collection((void *)ptls); #endif - #pragma GCC diagnostic push #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 #pragma GCC diagnostic ignored "-Wdangling-pointer" diff --git a/src/julia.h b/src/julia.h index 2bc1a97b681ed..8a8624360fc7a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -930,29 +930,25 @@ JL_DLLEXPORT void jl_clear_malloc_data(void); JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *root) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const jl_value_t *stored) JL_NOTSAFEPOINT; +#ifndef MMTK_GC STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT { -#ifndef MMTKHEAP // parent and ptr isa jl_value_t* if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 && // parent is old and not in remset (jl_astaggedvalue(ptr)->bits.gc & 1) == 0)) // ptr is young jl_gc_queue_root((jl_value_t*)parent); -#endif } STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* { -#ifndef MMTKHEAP // if ptr is old if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3)) { jl_gc_queue_root((jl_value_t*)ptr); } -#endif } STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT { -#ifndef MMTKHEAP // ptr is an immutable object if (__likely(jl_astaggedvalue(parent)->bits.gc != 3)) return; // parent is young or in remset @@ -962,9 +958,23 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_ const jl_datatype_layout_t *ly = dt->layout; if (ly->npointers) jl_gc_queue_multiroot((jl_value_t*)parent, ptr); -#endif } +#else // MMTK_GC + +STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ +} + +STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* +{ +} + +STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT +{ +} +#endif // MMTK_GC + JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t *owner); diff --git a/src/julia_internal.h b/src/julia_internal.h index 5b60be740bfb8..b921c63444e86 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -326,13 +326,15 @@ extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED; JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; +void enable_collection(void); +void disable_collection(void); jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize); jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); -#ifdef MMTKHEAP +#ifdef MMTK_GC JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); -#endif +#endif // MMTK_GC JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; void *jl_gc_perm_alloc_nolock(size_t sz, int zero, @@ -451,37 +453,50 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE #define GC_MAX_SZCLASS (2032-sizeof(void*)) static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); +#ifndef MMTK_GC STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) { jl_value_t *v; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { -#ifndef MMTKHEAP int pool_id = jl_gc_szclass(allocsz); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) v = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); -#else + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_gc_big_alloc_noinline(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + +#else // MMTK_GC + +STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { int pool_id = jl_gc_szclass(allocsz); int osize = jl_gc_sizeclasses[pool_id]; v = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, ty); -#endif } else { if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); -#ifndef MMTKHEAP - v = jl_gc_big_alloc_noinline(ptls, allocsz); -#else v = jl_mmtk_gc_alloc_big(ptls, allocsz); -#endif } jl_set_typeof(v, ty); maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); return v; } +#endif // MMTK_GC /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a * gc frame, until it has been fully initialized. An uninitialized value in a @@ -576,24 +591,32 @@ void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT; void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT; +#ifndef MMTK_GC STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* { -#ifndef MMTKHEAP jl_gc_wb(bnd, val); -#endif } STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* { -#ifndef MMTKHEAP // if parent is marked and buf is not if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) { jl_task_t *ct = jl_current_task; gc_setmark_buf(ct->ptls, bufptr, 3, minsz); } -#endif } +#else // MMTK_GC + +STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* +{ +} + +STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* +{ +} +#endif // MMTK_GC + void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT; void jl_print_gc_stats(JL_STREAM *s); diff --git a/src/julia_threads.h b/src/julia_threads.h index c15f19e78966f..17d9d0857dc39 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -4,7 +4,7 @@ #ifndef JL_THREADS_H #define JL_THREADS_H -#ifdef MMTKHEAP +#ifdef MMTK_GC #include "mmtkMutator.h" #endif @@ -281,7 +281,7 @@ typedef struct _jl_tls_states_t { uint64_t sleep_leave; ) -#ifdef MMTKHEAP +#ifdef MMTK_GC MMTkMutatorContext* mmtk_mutator_ptr; void* cursor; void* limit; diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 3e2eb3bcdf6ed..5b8eeb49f60ad 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -224,12 +224,12 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*)); } else { -#ifndef MMTKHEAP +#ifndef MMTK_GC auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset); auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize }); derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize); - #else +#else // MMTK_GC auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, cursor)); @@ -295,7 +295,7 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) phiNode->takeName(target); return phiNode; -#endif +#endif // MMTK_GC } newI->setAttributes(newI->getCalledFunction()->getAttributes()); newI->addRetAttr(derefAttr); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index c46228f13490b..ea390f01010fd 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -226,13 +226,8 @@ namespace jl_intrinsics { } namespace jl_well_known { -#ifndef MMTKHEAP static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc); static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc); -#else - static const char *GC_BIG_ALLOC_NAME = XSTR(jl_mmtk_gc_alloc_big); - static const char *GC_POOL_ALLOC_NAME = XSTR(jl_mmtk_gc_alloc_default_llvm); -#endif static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root); using jl_intrinsics::addGCAllocAttributes; diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c new file mode 100644 index 0000000000000..00cd54c9df920 --- /dev/null +++ b/src/mmtk-gc.c @@ -0,0 +1,487 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#ifdef MMTK_GC + +#include "gc.h" +#include "mmtk_julia.h" +#include "julia_gcext.h" + +// callbacks +// --- + +typedef void (*jl_gc_cb_func_t)(void); + +JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable) +{ +} +JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable) +{ +} +JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable) +{ +} +JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable) +{ +} +JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable) +{ +} +JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable) +{ +} + + +inline void maybe_collect(jl_ptls_t ptls) +{ + mmtk_gc_poll(ptls); +} + + +// malloc wrappers, aligned allocation +// --- + +inline void *jl_malloc_aligned(size_t sz, size_t align) +{ + return mmtk_malloc_aligned(sz ? sz : 1, align); // XXX sz +} +inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, + size_t align) +{ + void *res = jl_malloc_aligned(sz, align); + if (res != NULL) { + memcpy(res, d, oldsz > sz ? sz : oldsz); + mmtk_free_aligned(d); + } + return res; +} +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT +{ + mmtk_free_aligned(p); +} + + +// finalizers +// --- + +JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) +{ + if (ct == NULL) + ct = jl_current_task; + mmtk_jl_run_pending_finalizers(ct->ptls); +} + +JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT +{ + register_finalizer(v, f, 1); +} + +// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) +JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT +{ + /* TODO: unsupported? */ +} + +JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT +{ + if (__unlikely(jl_typeis(f, jl_voidpointer_type))) { + jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f)); + } + else { + register_finalizer(v, f, 0); + } +} + +JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) +{ + run_finalizers_for_obj(o); +} + +void jl_gc_run_all_finalizers(jl_task_t *ct) +{ + mmtk_jl_gc_run_all_finalizers(); +} + +void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT +{ + register_finalizer(v, f, 0); +} + + +// weak references +// --- +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) +{ + jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); + wr->value = value; // NOTE: wb not needed here + mmtk_add_weak_candidate(wr); + return wr; +} + + +// big values +// --- + +// Size includes the tag and the tag is not cleared!! +inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) +{ + // TODO: assertion needed here? + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); + // TODO: drop this okay? + // maybe_collect(ptls); + + jl_value_t *v = jl_mmtk_gc_alloc_big(ptls, sz); + // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_big; enable + // here when that's edited? + /* + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, + jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1); + */ + // TODO: move to jl_mmtk_gc_alloc_big if needed. +/* +#ifdef MEMDEBUG + memset(v, 0xee, allocsz); +#endif +*/ + // TODO: need to set this? have to move to jl_mmtk_gc_alloc_big then. + // v->age = 0; + // TODO: dropping this; confirm okay? `sweep_big` no longer needed? + // gc_big_object_link(v, &ptls->heap.big_objects); + return v; +} + +// Size includes the tag and the tag is not cleared!! +inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize) +{ + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); +#ifdef MEMDEBUG + return jl_gc_big_alloc(ptls, osize); +#endif + // TODO: drop this okay? + // maybe_collect(ptls); + + jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, pool_offset, osize, NULL); + // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_default; enable + // here when that's edited? + /* + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize); + jl_atomic_store_relaxed(&ptls->gc_num.poolalloc, + jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1); + */ + return v; +} + +void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT +{ + if (a->flags.how == 2) { + char *d = (char*)a->data - a->offset*a->elsize; + if (a->flags.isaligned) + mmtk_free_aligned(d); + else + mmtk_free(d); + gc_num.freed += jl_array_nbytes(a); + gc_num.freecall++; + } +} + + +// roots +// --- + +JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) +{ + /* TODO: not needed? */ +} + +// TODO: exported, but not MMTk-specific? +JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT +{ + /* TODO: confirm not needed? */ +} + + +// marking +// --- + +JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) +{ + return 0; +} +JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent, + jl_value_t **objs, size_t nobjs) +{ +} + + +// GC control +// --- + +JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + if (jl_atomic_load_relaxed(&jl_gc_disable_counter)) { + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); + jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); + return; + } + handle_user_collection_request(ptls); +} + +// Per-thread initialization +// TODO: remove `norm_pools`, `weak_refs`, etc. from `heap`? +// TODO: remove `gc_cache`? +void jl_init_thread_heap(jl_ptls_t ptls) +{ + jl_thread_heap_t *heap = &ptls->heap; + jl_gc_pool_t *p = heap->norm_pools; + for (int i = 0; i < JL_GC_N_POOLS; i++) { + p[i].osize = jl_gc_sizeclasses[i]; + p[i].freelist = NULL; + p[i].newpages = NULL; + } + arraylist_new(&heap->weak_refs, 0); + arraylist_new(&heap->live_tasks, 0); + heap->mallocarrays = NULL; + heap->mafreelist = NULL; + heap->big_objects = NULL; + heap->remset = &heap->_remset[0]; + heap->last_remset = &heap->_remset[1]; + arraylist_new(heap->remset, 0); + arraylist_new(heap->last_remset, 0); + arraylist_new(&ptls->finalizers, 0); + arraylist_new(&ptls->sweep_objs, 0); + + jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache; + gc_cache->perm_scanned_bytes = 0; + gc_cache->scanned_bytes = 0; + gc_cache->nbig_obj = 0; + + memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + + MMTk_Mutator mmtk_mutator = bind_mutator((void *)ptls, ptls->tid); + ptls->mmtk_mutator_ptr = ((MMTkMutatorContext*)mmtk_mutator); +} + +// System-wide initialization +// TODO: remove locks? remove anything else? +void jl_gc_init(void) +{ + if (jl_options.heap_size_hint) + jl_gc_set_max_memory(jl_options.heap_size_hint); + + JL_MUTEX_INIT(&heapsnapshot_lock); + uv_mutex_init(&gc_perm_lock); + + gc_num.interval = default_collect_interval; + last_long_collect_interval = default_collect_interval; + gc_num.allocd = 0; + gc_num.max_pause = 0; + gc_num.max_memory = 0; + +#ifdef _P64 + total_mem = uv_get_total_memory(); + uint64_t constrained_mem = uv_get_constrained_memory(); + if (constrained_mem > 0 && constrained_mem < total_mem) + total_mem = constrained_mem; +#endif + + // We allocate with abandon until we get close to the free memory on the machine. + uint64_t free_mem = uv_get_available_memory(); + uint64_t high_water_mark = free_mem / 10 * 7; // 70% high water mark + + if (high_water_mark < max_total_memory) + max_total_memory = high_water_mark; + + // MMTk-specific + long long min_heap_size; + long long max_heap_size; + char* min_size_def = getenv("MMTK_MIN_HSIZE"); + char* min_size_gb = getenv("MMTK_MIN_HSIZE_G"); + + char* max_size_def = getenv("MMTK_MAX_HSIZE"); + char* max_size_gb = getenv("MMTK_MAX_HSIZE_G"); + + // default min heap currently set as Julia's default_collect_interval + if (min_size_def != NULL) { + char *p; + double min_size = strtod(min_size_def, &p); + min_heap_size = (long) 1024 * 1024 * min_size; + } else if (min_size_gb != NULL) { + char *p; + double min_size = strtod(min_size_gb, &p); + min_heap_size = (long) 1024 * 1024 * 1024 * min_size; + } else { + min_heap_size = default_collect_interval; + } + + // default max heap currently set as 70% the free memory in the system + if (max_size_def != NULL) { + char *p; + double max_size = strtod(max_size_def, &p); + max_heap_size = (long) 1024 * 1024 * max_size; + } else if (max_size_gb != NULL) { + char *p; + double max_size = strtod(max_size_gb, &p); + max_heap_size = (long) 1024 * 1024 * 1024 * max_size; + } else { + max_heap_size = uv_get_free_memory() * 70 / 100; + } + + // if only max size is specified initialize MMTk with a fixed size heap + if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { + gc_init(0, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + } else { + gc_init(min_heap_size, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + } +} + +// allocation wrappers that track allocation and let collection run + +JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + maybe_collect(ptls); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); + return mmtk_counted_malloc(sz); + } + return malloc(sz); +} + +JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + maybe_collect(ptls); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); + return mmtk_counted_calloc(nm, sz); + } + return calloc(nm, sz); +} + +JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + jl_atomic_store_relaxed(&ptls->gc_num.freed, + jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); + jl_atomic_store_relaxed(&ptls->gc_num.freecall, + jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); + mmtk_free_with_size(p, sz); + return; + } + free(p); +} + +JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + maybe_collect(ptls); + if (sz < old) + jl_atomic_store_relaxed(&ptls->gc_num.freed, + jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz)); + else + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); + jl_atomic_store_relaxed(&ptls->gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); + return mmtk_realloc_with_old_size(p, sz, old); + } + // TODO: correct? + return realloc(p, sz); +} + +jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) +{ + size_t len = jl_string_len(s); + jl_value_t *snew = jl_alloc_string(sz); + memcpy(jl_string_data(snew), jl_string_data(s), sz <= len ? sz : len); + return snew; +} + +JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) +{ + return 0; +} + +JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void) +{ + return 0; +} + +// TODO: if this is needed, it can be added in MMTk +JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) +{ + return NULL; +} + + +// gc-debug functions +// --- + +jl_gc_pagemeta_t *jl_gc_page_metadata(void *data) +{ + return NULL; +} + +JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p) +{ + return NULL; +} + +void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT +{ +} + +void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT +{ + // May not be accurate but should be helpful enough + uint64_t pool_count = gc_num.poolalloc; + uint64_t big_count = gc_num.bigalloc; + jl_safe_printf("Allocations: %" PRIu64 " " + "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n", + pool_count + big_count, pool_count, big_count, gc_num.pause); +} + +void jl_print_gc_stats(JL_STREAM *s) +{ +} + +void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT +{ +} + +void objprofile_printall(void) +{ +} + +void objprofile_reset(void) +{ +} + +#ifdef __cplusplus +} +#endif + +#endif // MMTK_GC diff --git a/src/threading.c b/src/threading.c index 52b3fc2d8c06d..bc31eb1e46bb6 100644 --- a/src/threading.c +++ b/src/threading.c @@ -345,12 +345,12 @@ jl_ptls_t jl_init_threadtls(int16_t tid) #endif ptls->system_id = (jl_thread_t)(uintptr_t)uv_thread_self(); ptls->rngseed = jl_rand(); - if (tid == 0) + if (tid == 0) { ptls->disable_gc = 1; -#ifdef MMTKHEAP - if (tid == 0) +#ifdef MMTK_GC disable_collection(); #endif + } #ifdef _OS_WINDOWS_ if (tid == 0) { if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), From 9dbc8fc65e1e273cefbbe87b20e35e4c43a7ebaf Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 3 May 2023 16:56:37 +1200 Subject: [PATCH 003/116] Make perm alloc calls specific to GC implementation (#9) --- src/gc-common.c | 87 -------------------------------------- src/gc.c | 99 ++++++++++++++++++++++++++++++++++++++++++++ src/julia_internal.h | 8 ++++ src/mmtk-gc.c | 22 ++++++++++ src/staticdata.c | 3 ++ src/symbol.c | 4 ++ 6 files changed, 136 insertions(+), 87 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index f5636c97fe32a..8abee999ec48a 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -521,94 +521,7 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1); } -// Perm gen allocator -// 2M pool -#define GC_PERM_POOL_SIZE (2 * 1024 * 1024) -// 20k limit for pool allocation. At most 1% fragmentation -#define GC_PERM_POOL_LIMIT (20 * 1024) uv_mutex_t gc_perm_lock; -static uintptr_t gc_perm_pool = 0; -static uintptr_t gc_perm_end = 0; - -static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT -{ - // `align` must be power of two - assert(offset == 0 || offset < align); - const size_t malloc_align = sizeof(void*) == 8 ? 16 : 4; - if (align > 1 && (offset != 0 || align > malloc_align)) - sz += align - 1; - int last_errno = errno; -#ifdef _OS_WINDOWS_ - DWORD last_error = GetLastError(); -#endif - void *base = zero ? calloc(1, sz) : malloc(sz); - if (base == NULL) - jl_throw(jl_memory_exception); -#ifdef _OS_WINDOWS_ - SetLastError(last_error); -#endif - errno = last_errno; - jl_may_leak(base); - assert(align > 0); - unsigned diff = (offset - (uintptr_t)base) % align; - return (void*)((char*)base + diff); -} - -STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT -{ - uintptr_t pool = LLT_ALIGN(gc_perm_pool + offset, (uintptr_t)align) - offset; - uintptr_t end = pool + sz; - if (end > gc_perm_end) - return NULL; - gc_perm_pool = end; - return (void*)jl_assume(pool); -} - -// **NOT** a safepoint -void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) -{ - // The caller should have acquired `gc_perm_lock` - assert(align < GC_PERM_POOL_LIMIT); -#ifndef MEMDEBUG - if (__unlikely(sz > GC_PERM_POOL_LIMIT)) -#endif - return gc_perm_alloc_large(sz, zero, align, offset); - void *ptr = gc_try_perm_alloc_pool(sz, align, offset); - if (__likely(ptr)) - return ptr; - int last_errno = errno; -#ifdef _OS_WINDOWS_ - DWORD last_error = GetLastError(); - void *pool = VirtualAlloc(NULL, GC_PERM_POOL_SIZE, MEM_COMMIT, PAGE_READWRITE); - SetLastError(last_error); - errno = last_errno; - if (__unlikely(pool == NULL)) - return NULL; -#else - void *pool = mmap(0, GC_PERM_POOL_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - errno = last_errno; - if (__unlikely(pool == MAP_FAILED)) - return NULL; -#endif - gc_perm_pool = (uintptr_t)pool; - gc_perm_end = gc_perm_pool + GC_PERM_POOL_SIZE; - return gc_try_perm_alloc_pool(sz, align, offset); -} - -// **NOT** a safepoint -void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) -{ - assert(align < GC_PERM_POOL_LIMIT); -#ifndef MEMDEBUG - if (__unlikely(sz > GC_PERM_POOL_LIMIT)) -#endif - return gc_perm_alloc_large(sz, zero, align, offset); - uv_mutex_lock(&gc_perm_lock); - void *p = jl_gc_perm_alloc_nolock(sz, zero, align, offset); - uv_mutex_unlock(&gc_perm_lock); - return p; -} JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) { diff --git a/src/gc.c b/src/gc.c index e656fa331be38..69ec08b6cf9d4 100644 --- a/src/gc.c +++ b/src/gc.c @@ -114,6 +114,105 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_fre jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb); } +// Perm gen allocator +// 2M pool +#define GC_PERM_POOL_SIZE (2 * 1024 * 1024) +// 20k limit for pool allocation. At most 1% fragmentation +#define GC_PERM_POOL_LIMIT (20 * 1024) + +static uintptr_t gc_perm_pool = 0; +static uintptr_t gc_perm_end = 0; + +static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT +{ + // `align` must be power of two + assert(offset == 0 || offset < align); + const size_t malloc_align = sizeof(void*) == 8 ? 16 : 4; + if (align > 1 && (offset != 0 || align > malloc_align)) + sz += align - 1; + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *base = zero ? calloc(1, sz) : malloc(sz); + if (base == NULL) + jl_throw(jl_memory_exception); +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + jl_may_leak(base); + assert(align > 0); + unsigned diff = (offset - (uintptr_t)base) % align; + return (void*)((char*)base + diff); +} + +STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT +{ + uintptr_t pool = LLT_ALIGN(gc_perm_pool + offset, (uintptr_t)align) - offset; + uintptr_t end = pool + sz; + if (end > gc_perm_end) + return NULL; + gc_perm_pool = end; + return (void*)jl_assume(pool); +} + +// **NOT** a safepoint +void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) +{ + // The caller should have acquired `gc_perm_lock` + assert(align < GC_PERM_POOL_LIMIT); +#ifndef MEMDEBUG + if (__unlikely(sz > GC_PERM_POOL_LIMIT)) +#endif + return gc_perm_alloc_large(sz, zero, align, offset); + void *ptr = gc_try_perm_alloc_pool(sz, align, offset); + if (__likely(ptr)) + return ptr; + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); + void *pool = VirtualAlloc(NULL, GC_PERM_POOL_SIZE, MEM_COMMIT, PAGE_READWRITE); + SetLastError(last_error); + errno = last_errno; + if (__unlikely(pool == NULL)) + return NULL; +#else + void *pool = mmap(0, GC_PERM_POOL_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + errno = last_errno; + if (__unlikely(pool == MAP_FAILED)) + return NULL; +#endif + gc_perm_pool = (uintptr_t)pool; + gc_perm_end = gc_perm_pool + GC_PERM_POOL_SIZE; + return gc_try_perm_alloc_pool(sz, align, offset); +} + +// **NOT** a safepoint +void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) +{ + assert(align < GC_PERM_POOL_LIMIT); +#ifndef MEMDEBUG + if (__unlikely(sz > GC_PERM_POOL_LIMIT)) +#endif + return gc_perm_alloc_large(sz, zero, align, offset); + uv_mutex_lock(&gc_perm_lock); + void *p = jl_gc_perm_alloc_nolock(sz, zero, align, offset); + uv_mutex_unlock(&gc_perm_lock); + return p; +} + +void jl_gc_notify_image_load(const char* img_data, size_t len) +{ + // Do nothing +} + +void jl_gc_notify_image_alloc(char* img_data, size_t len) +{ + // Do nothing +} + // Protect all access to `finalizer_list_marked` and `to_finalize`. // For accessing `ptls->finalizers`, the lock is needed if a thread // is going to realloc the buffer (of its own list) or accessing the diff --git a/src/julia_internal.h b/src/julia_internal.h index b921c63444e86..6db9a7325baff 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -334,6 +334,7 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); #ifdef MMTK_GC JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); +extern void post_alloc(void* mutator, void* obj, size_t bytes, int allocator); #endif // MMTK_GC JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; @@ -344,6 +345,8 @@ void *jl_gc_perm_alloc(size_t sz, int zero, void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v); void gc_sweep_sysimg(void); +void jl_gc_notify_image_load(const char* img_data, size_t len); +void jl_gc_notify_image_alloc(char* img_data, size_t len); // pools are 16376 bytes large (GC_POOL_SZ - GC_PAGE_OFFSET) static const int jl_gc_sizeclasses[] = { @@ -534,8 +537,13 @@ STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT sizeof(void*) * 2 : 16)); jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align, sizeof(void*) % align); + // Possibly we do not need this for MMTk. We could declare a post_alloc func and define it differently in two GCs. uintptr_t tag = (uintptr_t)ty; o->header = tag | GC_OLD_MARKED; +#ifdef MMTK_GC + jl_ptls_t ptls = jl_current_task->ptls; + post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(o), allocsz, 1); +#endif return jl_valueof(o); } jl_value_t *jl_permbox8(jl_datatype_t *t, int8_t x); diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 00cd54c9df920..943570167e1ff 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -480,6 +480,28 @@ void objprofile_reset(void) { } +void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) +{ + jl_ptls_t ptls = jl_current_task->ptls; + void* addr = alloc(ptls->mmtk_mutator_ptr, sz, align, offset, 1); + return addr; +} + +void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) +{ + return jl_gc_perm_alloc_nolock(sz, zero, align, offset); +} + +void jl_gc_notify_image_load(const char* img_data, size_t len) +{ + // TODO: We should notify MMTk about the image (VM space) +} + +void jl_gc_notify_image_alloc(char* img_data, size_t len) +{ + // TODO: We should call MMTk to bulk set object metadata for the image region +} + #ifdef __cplusplus } #endif diff --git a/src/staticdata.c b/src/staticdata.c index 804193ff90229..16b4791bb4200 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -489,6 +489,7 @@ static void jl_load_sysimg_so(void) jl_dlsym(jl_sysimg_handle, "jl_system_image_data", (void **)&sysimg_data, 1); size_t *plen; jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -3235,6 +3236,7 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im JL_SIGATOMIC_BEGIN(); size_t len = dataendpos - datastartpos; char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + jl_gc_notify_image_alloc(sysimg, len); ios_seek(f, datastartpos); if (ios_readall(f, sysimg, len) != len || jl_crc32c(0, sysimg, len) != (uint32_t)checksum) { restored = jl_get_exceptionf(jl_errorexception_type, "Error reading system image file."); @@ -3331,6 +3333,7 @@ JL_DLLEXPORT void jl_restore_system_image(const char *fname) ios_seek_end(&f); size_t len = ios_pos(&f); char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + jl_gc_notify_image_alloc(sysimg, len); ios_seek(&f, 0); if (ios_readall(&f, sysimg, len) != len) jl_errorf("Error reading system image file."); diff --git a/src/symbol.c b/src/symbol.c index 14606c82b9778..0f8b41787ad13 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -41,6 +41,10 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT sym = (jl_sym_t*)jl_valueof(tag); // set to old marked so that we won't look at it in the GC or write barrier. tag->header = ((uintptr_t)jl_symbol_type) | GC_OLD_MARKED; +#ifdef MMTK_GC + jl_ptls_t ptls = jl_current_task->ptls; + post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(tag), nb, 1); +#endif jl_atomic_store_relaxed(&sym->left, NULL); jl_atomic_store_relaxed(&sym->right, NULL); sym->hash = hash_symbol(str, len); From a760a7ee28150261669cb0b31a8284214b3635c7 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 4 May 2023 17:34:21 +1200 Subject: [PATCH 004/116] Implement MMTk write barrier (#11) * Implement MMTk write barrier * Check which barrier to use in codegen --- src/jl_exported_funcs.inc | 2 + src/julia.h | 16 ++++++++ src/julia_internal.h | 5 +++ src/llvm-final-gc-lowering.cpp | 49 ++++++++++++++++++++++++- src/llvm-late-gc-lowering.cpp | 44 ++++++++++++++++++++++ src/llvm-pass-helpers.cpp | 67 ++++++++++++++++++++++++++++++++++ src/llvm-pass-helpers.h | 10 +++++ src/mmtk-gc.c | 11 ++++++ 8 files changed, 203 insertions(+), 1 deletion(-) diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index c475184573faa..b51e55510e172 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -186,6 +186,8 @@ XX(jl_gc_pool_alloc) \ XX(jl_gc_queue_multiroot) \ XX(jl_gc_queue_root) \ + XX(jl_gc_wb1_noinline) \ + XX(jl_gc_wb2_noinline) \ XX(jl_gc_safepoint) \ XX(jl_gc_schedule_foreign_sweepfunc) \ XX(jl_gc_set_cb_notify_external_alloc) \ diff --git a/src/julia.h b/src/julia.h index 8a8624360fc7a..710fd11cf9372 100644 --- a/src/julia.h +++ b/src/julia.h @@ -961,17 +961,23 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_ } #else // MMTK_GC +// MMTk's write barrier method. This is the full write barier including fastpath and slowpath. +// TODO: We should inline fastpath in the following functions, and only call slowpath. +STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT; STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT { + mmtk_gc_wb_full(parent, ptr); } STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* { + mmtk_gc_wb_full(ptr, (void*)0); } STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT { + mmtk_gc_wb_full(parent, (void*)0); } #endif // MMTK_GC @@ -2268,6 +2274,16 @@ typedef struct { } jl_cgparams_t; extern JL_DLLEXPORT int jl_default_debug_info_kind; +#ifdef MMTK_GC +extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); +STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_post(ptls->mmtk_mutator_ptr, parent, ptr); +} +#endif + #ifdef __cplusplus } #endif diff --git a/src/julia_internal.h b/src/julia_internal.h index 6db9a7325baff..65e1966385039 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -335,6 +335,7 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); extern void post_alloc(void* mutator, void* obj, size_t bytes, int allocator); +extern uint8_t mmtk_needs_write_barrier(void); #endif // MMTK_GC JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; @@ -616,12 +617,16 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT #else // MMTK_GC +// TODO: We should inline fastpath in the following functions, and only call slowpath. + STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* { + mmtk_gc_wb_full(bnd, val); } STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* { + mmtk_gc_wb_full(parent, (void*)0); } #endif // MMTK_GC diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 5b8eeb49f60ad..a41f69d74b1e5 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -48,6 +48,10 @@ struct FinalLowerGC: private JuliaPassContext { Function *queueRootFunc; Function *poolAllocFunc; Function *bigAllocFunc; +#ifdef MMTK_GC + Function *writeBarrier1Func; + Function *writeBarrier2Func; +#endif Instruction *pgcstack; // Lowers a `julia.new_gc_frame` intrinsic. @@ -70,6 +74,11 @@ struct FinalLowerGC: private JuliaPassContext { // Lowers a `julia.safepoint` intrinsic. Value *lowerSafepoint(CallInst *target, Function &F); + +#ifdef MMTK_GC + Value *lowerWriteBarrier1(CallInst *target, Function &F); + Value *lowerWriteBarrier2(CallInst *target, Function &F); +#endif }; Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F) @@ -204,6 +213,22 @@ Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F) return load; } +#ifdef MMTK_GC +Value *FinalLowerGC::lowerWriteBarrier1(CallInst *target, Function &F) +{ + assert(target->arg_size() == 1); + target->setCalledFunction(writeBarrier1Func); + return target; +} + +Value *FinalLowerGC::lowerWriteBarrier2(CallInst *target, Function &F) +{ + assert(target->arg_size() == 2); + target->setCalledFunction(writeBarrier2Func); + return target; +} +#endif + Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) { ++GCAllocBytesCount; @@ -311,8 +336,13 @@ bool FinalLowerGC::doInitialization(Module &M) { queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot); poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc); bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc); - +#ifdef MMTK_GC + writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1); + writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2); + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func}; +#else GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc}; +#endif unsigned j = 0; for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) { if (!functionList[i]) @@ -328,8 +358,13 @@ bool FinalLowerGC::doInitialization(Module &M) { bool FinalLowerGC::doFinalization(Module &M) { +#ifdef MMTK_GC + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func}; + queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = nullptr; +#else GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc}; queueRootFunc = poolAllocFunc = bigAllocFunc = nullptr; +#endif auto used = M.getGlobalVariable("llvm.compiler.used"); if (!used) return false; @@ -399,6 +434,10 @@ bool FinalLowerGC::runOnFunction(Function &F) auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes); auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot); auto safepointFunc = getOrNull(jl_intrinsics::safepoint); +#ifdef MMTK_GC + auto writeBarrier1Func = getOrNull(jl_intrinsics::writeBarrier1); + auto writeBarrier2Func = getOrNull(jl_intrinsics::writeBarrier2); +#endif // Lower all calls to supported intrinsics. for (BasicBlock &BB : F) { @@ -432,6 +471,14 @@ bool FinalLowerGC::runOnFunction(Function &F) else if (callee == queueGCRootFunc) { replaceInstruction(CI, lowerQueueGCRoot(CI, F), it); } +#ifdef MMTK_GC + else if (callee == writeBarrier1Func) { + replaceInstruction(CI, lowerWriteBarrier1(CI, F), it); + } + else if (callee == writeBarrier2Func) { + replaceInstruction(CI, lowerWriteBarrier2(CI, F), it); + } +#endif else if (callee == safepointFunc) { lowerSafepoint(CI, F); it = CI->eraseFromParent(); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 6837dc505a503..11f807bdca33f 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2512,6 +2512,50 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { } IRBuilder<> builder(CI); builder.SetCurrentDebugLocation(CI->getDebugLoc()); +#ifndef MMTK_GC + auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3); + auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3)); + auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false); + builder.SetInsertPoint(mayTrigTerm); + Value *anyChldNotMarked = NULL; + for (unsigned i = 1; i < CI->arg_size(); i++) { + Value *child = CI->getArgOperand(i); + Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1); + Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0)); + anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked; + } + assert(anyChldNotMarked); // handled by all_of test above + MDBuilder MDB(parent->getContext()); + SmallVector Weights{1, 9}; + auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false, + MDB.createBranchWeights(Weights)); + builder.SetInsertPoint(trigTerm); + if (CI->getCalledOperand() == write_barrier_func) { + builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent); + } + else { + assert(false); + } +#else + if (CI->getCalledOperand() == write_barrier_func) { + // if (CI->arg_size() == 2) { + // // parent, target + // Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier2); + // builder.CreateCall(wb_func, { parent, CI->getArgOperand(1) }); // We need to be careful about arg1, which may not match the type for wb_func. We probably need a bitcast + // } else { + // // parent and many targets + // Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); + // builder.CreateCall(wb_func, { parent }); + // } + auto barrier = mmtk_needs_write_barrier(); + if (barrier == 1) { + // We only care about parent + Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); + builder.CreateCall(wb_func, { parent }); + } + } +#endif + auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3); auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3)); auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index ea390f01010fd..ff65ec7de3aab 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -117,6 +117,10 @@ namespace jl_intrinsics { static const char *POP_GC_FRAME_NAME = "julia.pop_gc_frame"; static const char *QUEUE_GC_ROOT_NAME = "julia.queue_gc_root"; static const char *SAFEPOINT_NAME = "julia.safepoint"; +#ifdef MMTK_GC + static const char *WRITE_BARRIER_1_NAME = "julia.write_barrier1_noinline"; + static const char *WRITE_BARRIER_2_NAME = "julia.write_barrier2_noinline"; +#endif // Annotates a function with attributes suitable for GC allocation // functions. Specifically, the return value is marked noalias and nonnull. @@ -223,12 +227,45 @@ namespace jl_intrinsics { intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return intrinsic; }); + +#ifdef MMTK_GC + const IntrinsicDescription writeBarrier1( + WRITE_BARRIER_1_NAME, + [](const JuliaPassContext &context) { + auto intrinsic = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue }, + false), + Function::ExternalLinkage, + WRITE_BARRIER_1_NAME); + intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return intrinsic; + }); + const IntrinsicDescription writeBarrier2( + WRITE_BARRIER_2_NAME, + [](const JuliaPassContext &context) { + auto intrinsic = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue, context.T_prjlvalue }, + false), + Function::ExternalLinkage, + WRITE_BARRIER_2_NAME); + intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return intrinsic; + }); +#endif } namespace jl_well_known { static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc); static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc); static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root); +#ifdef MMTK_GC + static const char *GC_WB_1_NAME = XSTR(jl_gc_wb1_noinline); + static const char *GC_WB_2_NAME = XSTR(jl_gc_wb2_noinline); +#endif using jl_intrinsics::addGCAllocAttributes; @@ -276,4 +313,34 @@ namespace jl_well_known { func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return func; }); + +#ifdef MMTK_GC + const WellKnownFunctionDescription GCWriteBarrier1( + GC_WB_1_NAME, + [](const JuliaPassContext &context) { + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue }, + false), + Function::ExternalLinkage, + GC_WB_1_NAME); + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return func; + }); + + const WellKnownFunctionDescription GCWriteBarrier2( + GC_WB_2_NAME, + [](const JuliaPassContext &context) { + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue, context.T_prjlvalue }, + false), + Function::ExternalLinkage, + GC_WB_2_NAME); + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return func; + }); +#endif } diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index 2b2bd50cd0e4d..7f4d7646829f3 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -129,6 +129,11 @@ namespace jl_intrinsics { // `julia.safepoint`: an intrinsic that triggers a GC safepoint. extern const IntrinsicDescription safepoint; + +#ifdef MMTK_GC + extern const IntrinsicDescription writeBarrier1; + extern const IntrinsicDescription writeBarrier2; +#endif } // A namespace for well-known Julia runtime function descriptions. @@ -149,6 +154,11 @@ namespace jl_well_known { // `jl_gc_queue_root`: queues a GC root. extern const WellKnownFunctionDescription GCQueueRoot; + +#ifdef MMTK_GC + extern const WellKnownFunctionDescription GCWriteBarrier1; + extern const WellKnownFunctionDescription GCWriteBarrier2; +#endif } #endif diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 943570167e1ff..06a0a028303ab 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -480,6 +480,17 @@ void objprofile_reset(void) { } +// No inline write barrier -- only used for debugging +JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT +{ + jl_gc_wb_back(parent); +} + +JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + jl_gc_wb(parent, ptr); +} + void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) { jl_ptls_t ptls = jl_current_task->ptls; From 95bc54ab673fcdf11604324638f597f32158a22f Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Fri, 5 May 2023 22:45:31 +1200 Subject: [PATCH 005/116] Allow GC to implement array ptr copy (#10) --- src/array.c | 69 +------------------------------------------- src/gc.c | 65 +++++++++++++++++++++++++++++++++++++++++ src/julia.h | 8 +++++ src/julia_internal.h | 2 ++ src/mmtk-gc.c | 6 ++++ 5 files changed, 82 insertions(+), 68 deletions(-) diff --git a/src/array.c b/src/array.c index c6cefbebceb20..86b1056ef4d07 100644 --- a/src/array.c +++ b/src/array.c @@ -59,15 +59,6 @@ JL_DLLEXPORT char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT return ((char*)jl_array_data(a)) + ((jl_array_ndims(a) == 1 ? (a->maxsize - a->offset) : jl_array_len(a)) * a->elsize) + a->offset; } -STATIC_INLINE jl_value_t *jl_array_owner(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT -{ - if (a->flags.how == 3) { - a = (jl_array_t*)jl_array_data_owner(a); - assert(jl_is_string(a) || a->flags.how != 3); - } - return (jl_value_t*)a; -} - #if defined(_P64) && defined(UINT128MAX) typedef __uint128_t wideint_t; #else @@ -1198,69 +1189,11 @@ JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary) return new_ary; } -// Copy element by element until we hit a young object, at which point -// we can finish by using `memmove`. -static NOINLINE ssize_t jl_array_ptr_copy_forward(jl_value_t *owner, - void **src_p, void **dest_p, - ssize_t n) JL_NOTSAFEPOINT -{ - _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p; - _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p; - for (ssize_t i = 0; i < n; i++) { - void *val = jl_atomic_load_relaxed(src_pa + i); - jl_atomic_store_release(dest_pa + i, val); - // `val` is young or old-unmarked - if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) { - jl_gc_queue_root(owner); - return i; - } - } - return n; -} - -static NOINLINE ssize_t jl_array_ptr_copy_backward(jl_value_t *owner, - void **src_p, void **dest_p, - ssize_t n) JL_NOTSAFEPOINT -{ - _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p; - _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p; - for (ssize_t i = 0; i < n; i++) { - void *val = jl_atomic_load_relaxed(src_pa + n - i - 1); - jl_atomic_store_release(dest_pa + n - i - 1, val); - // `val` is young or old-unmarked - if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) { - jl_gc_queue_root(owner); - return i; - } - } - return n; -} - // Unsafe, assume inbounds and that dest and src have the same eltype JL_DLLEXPORT void jl_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT { - assert(dest->flags.ptrarray && src->flags.ptrarray); - jl_value_t *owner = jl_array_owner(dest); - // Destination is old and doesn't refer to any young object - if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) { - jl_value_t *src_owner = jl_array_owner(src); - // Source is young or being promoted or might refer to young objects - // (i.e. source is not an old object that doesn't have wb triggered) - if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) { - ssize_t done; - if (dest_p < src_p || dest_p > src_p + n) { - done = jl_array_ptr_copy_forward(owner, src_p, dest_p, n); - dest_p += done; - src_p += done; - } - else { - done = jl_array_ptr_copy_backward(owner, src_p, dest_p, n); - } - n -= done; - } - } - memmove_refs(dest_p, src_p, n); + jl_gc_array_ptr_copy(dest, dest_p, src, src_p, n); } JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item) diff --git a/src/gc.c b/src/gc.c index 69ec08b6cf9d4..0f6c13777e265 100644 --- a/src/gc.c +++ b/src/gc.c @@ -114,6 +114,71 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_fre jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb); } +// Copy element by element until we hit a young object, at which point +// we can finish by using `memmove`. +static NOINLINE ssize_t jl_array_ptr_copy_forward(jl_value_t *owner, + void **src_p, void **dest_p, + ssize_t n) JL_NOTSAFEPOINT +{ + _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p; + _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p; + for (ssize_t i = 0; i < n; i++) { + void *val = jl_atomic_load_relaxed(src_pa + i); + jl_atomic_store_release(dest_pa + i, val); + // `val` is young or old-unmarked + if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) { + jl_gc_queue_root(owner); + return i; + } + } + return n; +} + +static NOINLINE ssize_t jl_array_ptr_copy_backward(jl_value_t *owner, + void **src_p, void **dest_p, + ssize_t n) JL_NOTSAFEPOINT +{ + _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p; + _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p; + for (ssize_t i = 0; i < n; i++) { + void *val = jl_atomic_load_relaxed(src_pa + n - i - 1); + jl_atomic_store_release(dest_pa + n - i - 1, val); + // `val` is young or old-unmarked + if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) { + jl_gc_queue_root(owner); + return i; + } + } + return n; +} + +// Unsafe, assume inbounds and that dest and src have the same eltype +JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, + jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT +{ + assert(dest->flags.ptrarray && src->flags.ptrarray); + jl_value_t *owner = jl_array_owner(dest); + // Destination is old and doesn't refer to any young object + if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) { + jl_value_t *src_owner = jl_array_owner(src); + // Source is young or being promoted or might refer to young objects + // (i.e. source is not an old object that doesn't have wb triggered) + if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) { + ssize_t done; + if (dest_p < src_p || dest_p > src_p + n) { + done = jl_array_ptr_copy_forward(owner, src_p, dest_p, n); + dest_p += done; + src_p += done; + } + else { + done = jl_array_ptr_copy_backward(owner, src_p, dest_p, n); + } + n -= done; + } + } + memmove_refs(dest_p, src_p, n); +} + // Perm gen allocator // 2M pool #define GC_PERM_POOL_SIZE (2 * 1024 * 1024) diff --git a/src/julia.h b/src/julia.h index 710fd11cf9372..2396b7a38a00d 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1345,6 +1345,14 @@ STATIC_INLINE int jl_is_array(void *v) JL_NOTSAFEPOINT return jl_is_array_type(t); } +STATIC_INLINE jl_value_t *jl_array_owner(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT +{ + if (a->flags.how == 3) { + a = (jl_array_t*)jl_array_data_owner(a); + assert(jl_is_string(a) || a->flags.how != 3); + } + return (jl_value_t*)a; +} STATIC_INLINE int jl_is_opaque_closure_type(void *t) JL_NOTSAFEPOINT { diff --git a/src/julia_internal.h b/src/julia_internal.h index 65e1966385039..6d456b470a116 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -630,6 +630,8 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT } #endif // MMTK_GC +JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT; + void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT; void jl_print_gc_stats(JL_STREAM *s); diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 06a0a028303ab..9dc21c2ad48db 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -480,6 +480,12 @@ void objprofile_reset(void) { } +JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT +{ + jl_ptls_t ptls = jl_current_task->ptls; + mmtk_memory_region_copy(ptls->mmtk_mutator_ptr, jl_array_owner(src), src_p, jl_array_owner(dest), dest_p, n); +} + // No inline write barrier -- only used for debugging JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT { From eb407eb68ea976df72c8e1cd6ec8607ff8d98fa5 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 8 May 2023 17:51:14 +1200 Subject: [PATCH 006/116] Use MMTk VM space (#12) --- src/mmtk-gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 9dc21c2ad48db..b354d287baa14 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -511,7 +511,7 @@ void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) void jl_gc_notify_image_load(const char* img_data, size_t len) { - // TODO: We should notify MMTk about the image (VM space) + mmtk_set_vm_space((void*)img_data, len); } void jl_gc_notify_image_alloc(char* img_data, size_t len) From 620cb793090956892210c4233dd8e35c4d19d873 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 9 May 2023 20:37:17 +1200 Subject: [PATCH 007/116] Remove duplicate code that is possibly introduced during merging (#13) --- src/llvm-late-gc-lowering.cpp | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 11f807bdca33f..8a0210c626935 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2556,29 +2556,6 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { } #endif - auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3); - auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3)); - auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false); - builder.SetInsertPoint(mayTrigTerm); - Value *anyChldNotMarked = NULL; - for (unsigned i = 1; i < CI->arg_size(); i++) { - Value *child = CI->getArgOperand(i); - Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1); - Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0)); - anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked; - } - assert(anyChldNotMarked); // handled by all_of test above - MDBuilder MDB(parent->getContext()); - SmallVector Weights{1, 9}; - auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false, - MDB.createBranchWeights(Weights)); - builder.SetInsertPoint(trigTerm); - if (CI->getCalledOperand() == write_barrier_func) { - builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent); - } - else { - assert(false); - } CI->eraseFromParent(); } if (maxframeargs == 0 && Frame) { From e7e43f11687e7db62dc18e90f3ab9b24099539fd Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 16 May 2023 16:07:44 +1200 Subject: [PATCH 008/116] Implement write barrier fastpath for sticky immix (#8) This PR implements the write barrier fastpath for sticky immix in both the runtime write barrier and the codegen write barrier. There are also a few other changes: 1. pass collection type to MMTk's `handle_user_collection_request`, 2. call MMTk in `jl_gc_notify_image_alloc`. --- src/jl_exported_funcs.inc | 2 ++ src/julia.h | 37 ++++++++++++++++++---- src/julia_internal.h | 7 ++-- src/llvm-final-gc-lowering.cpp | 35 ++++++++++++++++++-- src/llvm-late-gc-lowering.cpp | 57 +++++++++++++++++++++++++-------- src/llvm-pass-helpers.cpp | 58 ++++++++++++++++++++++++++++++++++ src/llvm-pass-helpers.h | 4 +++ src/mmtk-gc.c | 24 +++++++++++--- 8 files changed, 192 insertions(+), 32 deletions(-) diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index b51e55510e172..1f182f37f938f 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -188,6 +188,8 @@ XX(jl_gc_queue_root) \ XX(jl_gc_wb1_noinline) \ XX(jl_gc_wb2_noinline) \ + XX(jl_gc_wb1_slow) \ + XX(jl_gc_wb2_slow) \ XX(jl_gc_safepoint) \ XX(jl_gc_schedule_foreign_sweepfunc) \ XX(jl_gc_set_cb_notify_external_alloc) \ diff --git a/src/julia.h b/src/julia.h index 2396b7a38a00d..75ebab99dbbf7 100644 --- a/src/julia.h +++ b/src/julia.h @@ -961,23 +961,21 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_ } #else // MMTK_GC -// MMTk's write barrier method. This is the full write barier including fastpath and slowpath. -// TODO: We should inline fastpath in the following functions, and only call slowpath. -STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT; +STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT; STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT { - mmtk_gc_wb_full(parent, ptr); + mmtk_gc_wb(parent, ptr); } STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* { - mmtk_gc_wb_full(ptr, (void*)0); + mmtk_gc_wb(ptr, (void*)0); } STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT { - mmtk_gc_wb_full(parent, (void*)0); + mmtk_gc_wb(parent, (void*)0); } #endif // MMTK_GC @@ -2284,12 +2282,39 @@ extern JL_DLLEXPORT int jl_default_debug_info_kind; #ifdef MMTK_GC extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); +extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); +extern const uint8_t MMTK_NEEDS_WRITE_BARRIER; +extern const uint8_t OBJECT_BARRIER; +extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; + +// Directly call into MMTk for write barrier (debugging only) STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; mmtk_object_reference_write_post(ptls->mmtk_mutator_ptr, parent, ptr); } + +// Inlined fastpath +STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + if (MMTK_NEEDS_WRITE_BARRIER == OBJECT_BARRIER) { + intptr_t addr = (intptr_t) (void*) parent; + uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); + intptr_t shift = (addr >> 3) & 0b111; + uint8_t byte_val = *meta_addr; + if (((byte_val >> shift) & 1) == 1) { + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(ptls->mmtk_mutator_ptr, parent, ptr); + } + } +} + +STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + mmtk_gc_wb_fast(parent, ptr); +} #endif #ifdef __cplusplus diff --git a/src/julia_internal.h b/src/julia_internal.h index 6d456b470a116..fb939e81b4a69 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -335,7 +335,6 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); extern void post_alloc(void* mutator, void* obj, size_t bytes, int allocator); -extern uint8_t mmtk_needs_write_barrier(void); #endif // MMTK_GC JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; @@ -617,16 +616,14 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT #else // MMTK_GC -// TODO: We should inline fastpath in the following functions, and only call slowpath. - STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* { - mmtk_gc_wb_full(bnd, val); + mmtk_gc_wb(bnd, val); } STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* { - mmtk_gc_wb_full(parent, (void*)0); + mmtk_gc_wb(parent, (void*)0); } #endif // MMTK_GC diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index a41f69d74b1e5..d60a8e181177b 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -51,6 +51,8 @@ struct FinalLowerGC: private JuliaPassContext { #ifdef MMTK_GC Function *writeBarrier1Func; Function *writeBarrier2Func; + Function *writeBarrier1SlowFunc; + Function *writeBarrier2SlowFunc; #endif Instruction *pgcstack; @@ -78,6 +80,8 @@ struct FinalLowerGC: private JuliaPassContext { #ifdef MMTK_GC Value *lowerWriteBarrier1(CallInst *target, Function &F); Value *lowerWriteBarrier2(CallInst *target, Function &F); + Value *lowerWriteBarrier1Slow(CallInst *target, Function &F); + Value *lowerWriteBarrier2Slow(CallInst *target, Function &F); #endif }; @@ -227,6 +231,21 @@ Value *FinalLowerGC::lowerWriteBarrier2(CallInst *target, Function &F) target->setCalledFunction(writeBarrier2Func); return target; } + +Value *FinalLowerGC::lowerWriteBarrier1Slow(CallInst *target, Function &F) +{ + assert(target->arg_size() == 1); + target->setCalledFunction(writeBarrier1SlowFunc); + return target; +} + +Value *FinalLowerGC::lowerWriteBarrier2Slow(CallInst *target, Function &F) +{ + assert(target->arg_size() == 2); + target->setCalledFunction(writeBarrier2SlowFunc); + return target; +} + #endif Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) @@ -339,7 +358,9 @@ bool FinalLowerGC::doInitialization(Module &M) { #ifdef MMTK_GC writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1); writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2); - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func}; + writeBarrier1SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier1Slow); + writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow); + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; #else GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc}; #endif @@ -359,8 +380,8 @@ bool FinalLowerGC::doInitialization(Module &M) { bool FinalLowerGC::doFinalization(Module &M) { #ifdef MMTK_GC - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func}; - queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = nullptr; + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; + queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr; #else GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc}; queueRootFunc = poolAllocFunc = bigAllocFunc = nullptr; @@ -437,6 +458,8 @@ bool FinalLowerGC::runOnFunction(Function &F) #ifdef MMTK_GC auto writeBarrier1Func = getOrNull(jl_intrinsics::writeBarrier1); auto writeBarrier2Func = getOrNull(jl_intrinsics::writeBarrier2); + auto writeBarrier1SlowFunc = getOrNull(jl_intrinsics::writeBarrier1Slow); + auto writeBarrier2SlowFunc = getOrNull(jl_intrinsics::writeBarrier2Slow); #endif // Lower all calls to supported intrinsics. @@ -478,6 +501,12 @@ bool FinalLowerGC::runOnFunction(Function &F) else if (callee == writeBarrier2Func) { replaceInstruction(CI, lowerWriteBarrier2(CI, F), it); } + else if (callee == writeBarrier1SlowFunc) { + replaceInstruction(CI, lowerWriteBarrier1Slow(CI, F), it); + } + else if (callee == writeBarrier2SlowFunc) { + replaceInstruction(CI, lowerWriteBarrier2Slow(CI, F), it); + } #endif else if (callee == safepointFunc) { lowerSafepoint(CI, F); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 8a0210c626935..eec21c0c64010 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2537,22 +2537,51 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { assert(false); } #else + // FIXME: Currently we call write barrier with the src object (parent). + // This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all. + // But for other MMTk plans, we need to be careful. + const bool INLINE_WRITE_BARRIER = true; if (CI->getCalledOperand() == write_barrier_func) { - // if (CI->arg_size() == 2) { - // // parent, target - // Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier2); - // builder.CreateCall(wb_func, { parent, CI->getArgOperand(1) }); // We need to be careful about arg1, which may not match the type for wb_func. We probably need a bitcast - // } else { - // // parent and many targets - // Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); - // builder.CreateCall(wb_func, { parent }); - // } - auto barrier = mmtk_needs_write_barrier(); - if (barrier == 1) { - // We only care about parent - Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); - builder.CreateCall(wb_func, { parent }); + if (MMTK_NEEDS_WRITE_BARRIER == OBJECT_BARRIER) { + if (INLINE_WRITE_BARRIER) { + auto i8_ty = Type::getInt8Ty(F.getContext()); + auto intptr_ty = T_size; + + // intptr_t addr = (intptr_t) (void*) src; + // uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6)); + intptr_t metadata_base_address = reinterpret_cast(MMTK_SIDE_LOG_BIT_BASE_ADDRESS); + auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address); + auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0)); + + auto parent_val = builder.CreatePtrToInt(parent, intptr_ty); + auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6)); + auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr); + + // intptr_t shift = (addr >> 3) & 0b111; + auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7)); + auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty); + + // uint8_t byte_val = *meta_addr; + auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align()); + + // if (((byte_val >> shift) & 1) == 1) { + auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8); + auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1)); + auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1)); + + // object_reference_write_slow_call((void*) src, (void*) slot, (void*) target); + MDBuilder MDB(F.getContext()); + SmallVector Weights{1, 9}; + auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights)); + builder.SetInsertPoint(mayTriggerSlowpath); + builder.CreateCall(getOrDeclare(jl_intrinsics::writeBarrier1Slow), { parent }); + } else { + Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); + builder.CreateCall(wb_func, { parent }); + } } + } else { + assert(false); } #endif diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index ff65ec7de3aab..1e1ae4bc7eada 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -120,6 +120,8 @@ namespace jl_intrinsics { #ifdef MMTK_GC static const char *WRITE_BARRIER_1_NAME = "julia.write_barrier1_noinline"; static const char *WRITE_BARRIER_2_NAME = "julia.write_barrier2_noinline"; + static const char *WRITE_BARRIER_1_SLOW_NAME = "julia.write_barrier_1_slow"; + static const char *WRITE_BARRIER_2_SLOW_NAME = "julia.write_barrier_2_slow"; #endif // Annotates a function with attributes suitable for GC allocation @@ -255,6 +257,32 @@ namespace jl_intrinsics { intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return intrinsic; }); + const IntrinsicDescription writeBarrier1Slow( + WRITE_BARRIER_1_SLOW_NAME, + [](const JuliaPassContext &context) { + auto intrinsic = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue }, + false), + Function::ExternalLinkage, + WRITE_BARRIER_1_SLOW_NAME); + intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return intrinsic; + }); + const IntrinsicDescription writeBarrier2Slow( + WRITE_BARRIER_2_SLOW_NAME, + [](const JuliaPassContext &context) { + auto intrinsic = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue, context.T_prjlvalue }, + false), + Function::ExternalLinkage, + WRITE_BARRIER_2_SLOW_NAME); + intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return intrinsic; + }); #endif } @@ -265,6 +293,8 @@ namespace jl_well_known { #ifdef MMTK_GC static const char *GC_WB_1_NAME = XSTR(jl_gc_wb1_noinline); static const char *GC_WB_2_NAME = XSTR(jl_gc_wb2_noinline); + static const char *GC_WB_1_SLOW_NAME = XSTR(jl_gc_wb1_slow); + static const char *GC_WB_2_SLOW_NAME = XSTR(jl_gc_wb2_slow); #endif using jl_intrinsics::addGCAllocAttributes; @@ -342,5 +372,33 @@ namespace jl_well_known { func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return func; }); + + const WellKnownFunctionDescription GCWriteBarrier1Slow( + GC_WB_1_SLOW_NAME, + [](const JuliaPassContext &context) { + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue }, + false), + Function::ExternalLinkage, + GC_WB_1_SLOW_NAME); + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return func; + }); + + const WellKnownFunctionDescription GCWriteBarrier2Slow( + GC_WB_2_SLOW_NAME, + [](const JuliaPassContext &context) { + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue, context.T_prjlvalue }, + false), + Function::ExternalLinkage, + GC_WB_2_SLOW_NAME); + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return func; + }); #endif } diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index 7f4d7646829f3..d6e4be7e05338 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -133,6 +133,8 @@ namespace jl_intrinsics { #ifdef MMTK_GC extern const IntrinsicDescription writeBarrier1; extern const IntrinsicDescription writeBarrier2; + extern const IntrinsicDescription writeBarrier1Slow; + extern const IntrinsicDescription writeBarrier2Slow; #endif } @@ -158,6 +160,8 @@ namespace jl_well_known { #ifdef MMTK_GC extern const WellKnownFunctionDescription GCWriteBarrier1; extern const WellKnownFunctionDescription GCWriteBarrier2; + extern const WellKnownFunctionDescription GCWriteBarrier1Slow; + extern const WellKnownFunctionDescription GCWriteBarrier2Slow; #endif } diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index b354d287baa14..a9feeb6ef4921 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -192,13 +192,13 @@ void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) { - /* TODO: not needed? */ + unreachable(); } // TODO: exported, but not MMTk-specific? JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT { - /* TODO: confirm not needed? */ + unreachable(); } @@ -207,11 +207,13 @@ JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) { + unreachable(); return 0; } JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent, jl_value_t **objs, size_t nobjs) { + unreachable(); } @@ -229,7 +231,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; } - handle_user_collection_request(ptls); + handle_user_collection_request(ptls, collection); } // Per-thread initialization @@ -497,6 +499,20 @@ JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOT jl_gc_wb(parent, ptr); } +JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(ptls->mmtk_mutator_ptr, parent, (const void*) 0); +} + +JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(ptls->mmtk_mutator_ptr, parent, ptr); +} + void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) { jl_ptls_t ptls = jl_current_task->ptls; @@ -516,7 +532,7 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) void jl_gc_notify_image_alloc(char* img_data, size_t len) { - // TODO: We should call MMTk to bulk set object metadata for the image region + mmtk_immortal_region_post_alloc((void*)img_data, len); } #ifdef __cplusplus From ed8580ad3ae1d1ee46b84ec7bbe69ac9b37befca Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 17 May 2023 01:03:35 +0000 Subject: [PATCH 009/116] WIP --- src/gc-common.c | 6 +++--- src/gc.c | 25 +++++++++++++++++++++++++ src/gc.h | 2 +- src/mmtk-gc.c | 10 ++++++++-- src/partr.c | 25 ------------------------- 5 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 0c6138231e24f..8fd368f9e0875 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -46,7 +46,7 @@ memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024; // finalizers // --- -static uint64_t finalizer_rngState[JL_RNG_SIZE]; +uint64_t finalizer_rngState[JL_RNG_SIZE]; void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT; @@ -259,7 +259,7 @@ static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT { combine_thread_gc_counts(&gc_num); - live_bytes += (gc_num.deferred_alloc + gc_num.allocd); + inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd); gc_num.allocd = 0; gc_num.deferred_alloc = 0; reset_thread_gc_counts(); @@ -501,7 +501,7 @@ void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, // TODO: not needed? gc_cache.*? if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz; - live_bytes += allocsz - oldsz; + inc_live_bytes(allocsz - oldsz); } else if (allocsz < oldsz) jl_atomic_store_relaxed(&ptls->gc_num.freed, diff --git a/src/gc.c b/src/gc.c index 4a87980ae3924..d6d3955bdb68f 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3677,6 +3677,31 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) return NULL; } +// gc thread function +void jl_gc_threadfun(void *arg) +{ + jl_threadarg_t *targ = (jl_threadarg_t*)arg; + + // initialize this thread (set tid and create heap) + jl_ptls_t ptls = jl_init_threadtls(targ->tid); + + // wait for all threads + jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0); + uv_barrier_wait(targ->barrier); + + // free the thread argument here + free(targ); + + while (1) { + uv_mutex_lock(&gc_threads_lock); + while (jl_atomic_load(&gc_n_threads_marking) == 0) { + uv_cond_wait(&gc_threads_cond, &gc_threads_lock); + } + uv_mutex_unlock(&gc_threads_lock); + gc_mark_loop_parallel(ptls, 0); + } +} + // added for MMTk integration void enable_collection(void) { diff --git a/src/gc.h b/src/gc.h index a340a1ec0b545..3def80327ceda 100644 --- a/src/gc.h +++ b/src/gc.h @@ -42,7 +42,7 @@ extern void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value); extern jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz); extern jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize); -extern void jl_rng_split(uint64_t to[4], uint64_t from[4]); +extern void jl_rng_split(uint64_t to[JL_RNG_SIZE], uint64_t from[JL_RNG_SIZE]); extern void gc_premark(jl_ptls_t ptls2); extern void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t *owner, int8_t can_collect); diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index b354d287baa14..08d0bed7b4304 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -229,7 +229,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; } - handle_user_collection_request(ptls); + handle_user_collection_request(ptls, collection); } // Per-thread initialization @@ -275,7 +275,7 @@ void jl_gc_init(void) if (jl_options.heap_size_hint) jl_gc_set_max_memory(jl_options.heap_size_hint); - JL_MUTEX_INIT(&heapsnapshot_lock); + JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock"); uv_mutex_init(&gc_perm_lock); gc_num.interval = default_collect_interval; @@ -480,6 +480,12 @@ void objprofile_reset(void) { } +// gc thread function +void jl_gc_threadfun(void *arg) +{ + unreachable(); +} + JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; diff --git a/src/partr.c b/src/partr.c index 403f911b1284f..2c729add629e2 100644 --- a/src/partr.c +++ b/src/partr.c @@ -108,31 +108,6 @@ void jl_init_threadinginfra(void) void JL_NORETURN jl_finish_task(jl_task_t *t); -// gc thread function -void jl_gc_threadfun(void *arg) -{ - jl_threadarg_t *targ = (jl_threadarg_t*)arg; - - // initialize this thread (set tid and create heap) - jl_ptls_t ptls = jl_init_threadtls(targ->tid); - - // wait for all threads - jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0); - uv_barrier_wait(targ->barrier); - - // free the thread argument here - free(targ); - - while (1) { - uv_mutex_lock(&gc_threads_lock); - while (jl_atomic_load(&gc_n_threads_marking) == 0) { - uv_cond_wait(&gc_threads_cond, &gc_threads_lock); - } - uv_mutex_unlock(&gc_threads_lock); - gc_mark_loop_parallel(ptls, 0); - } -} - // thread function: used by all mutator threads except the main thread void jl_threadfun(void *arg) { From ec37ebe24b43973746e4730572c00365dd4edf5e Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 18 May 2023 04:45:30 +0000 Subject: [PATCH 010/116] Minor fix --- src/gc-common.c | 2 ++ src/gc.c | 2 -- src/llvm-late-gc-lowering.cpp | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 8fd368f9e0875..cfb83c08a7a6b 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -6,6 +6,8 @@ jl_gc_num_t gc_num = {0}; size_t last_long_collect_interval; int gc_n_threads; jl_ptls_t* gc_all_tls_states; +// `tid` of first GC thread +int gc_first_tid; int64_t live_bytes = 0; diff --git a/src/gc.c b/src/gc.c index d6d3955bdb68f..932bb1d97c6db 100644 --- a/src/gc.c +++ b/src/gc.c @@ -17,8 +17,6 @@ extern "C" { _Atomic(int) gc_n_threads_marking; // `tid` of mutator thread that triggered GC _Atomic(int) gc_master_tid; -// `tid` of first GC thread -int gc_first_tid; // Mutex/cond used to synchronize sleep/wakeup of GC threads uv_mutex_t gc_threads_lock; uv_cond_t gc_threads_cond; diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index d812146027eba..4877565c61495 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2513,6 +2513,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { } IRBuilder<> builder(CI); builder.SetCurrentDebugLocation(CI->getDebugLoc()); +#ifndef MMTK_GC auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), 3); auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3)); auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false); From 34930e502fac00cac2698ecf14564ff764b03527 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 18 May 2023 05:58:48 +0000 Subject: [PATCH 011/116] Use JL_DLLIMPORT for MMTk functions. Update uses of MMTk functions with prefix mmtk_ --- src/init.c | 2 +- src/julia.h | 4 +-- src/julia_internal.h | 13 +++---- src/llvm-late-gc-lowering.cpp | 2 +- src/llvm-pass-helpers.cpp | 64 ++++++++++++++++++++++------------- src/mmtk-gc.c | 38 +++++++++++++-------- src/symbol.c | 2 +- 7 files changed, 76 insertions(+), 49 deletions(-) diff --git a/src/init.c b/src/init.c index a076b9d0fbed5..9c18a60eb8b06 100644 --- a/src/init.c +++ b/src/init.c @@ -825,7 +825,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) jl_ptls_t ptls = jl_init_threadtls(0); #ifdef MMTK_GC - initialize_collection((void *)ptls); + mmtk_initialize_collection((void *)ptls); #endif #pragma GCC diagnostic push #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 diff --git a/src/julia.h b/src/julia.h index 5f692d1f4de2d..7950eca3e0f1d 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2383,7 +2383,7 @@ extern JL_DLLEXPORT int jl_default_debug_info_kind; extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); extern const uint8_t MMTK_NEEDS_WRITE_BARRIER; -extern const uint8_t OBJECT_BARRIER; +extern const uint8_t MMTK_OBJECT_BARRIER; extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; // Directly call into MMTk for write barrier (debugging only) @@ -2397,7 +2397,7 @@ STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSA // Inlined fastpath STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSAFEPOINT { - if (MMTK_NEEDS_WRITE_BARRIER == OBJECT_BARRIER) { + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { intptr_t addr = (intptr_t) (void*) parent; uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); intptr_t shift = (addr >> 3) & 0b111; diff --git a/src/julia_internal.h b/src/julia_internal.h index d89de5753c380..5e5b0ebb76e41 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -333,15 +333,16 @@ extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED; JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; -void enable_collection(void); -void disable_collection(void); +extern void enable_collection(void); +extern void disable_collection(void); jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize); jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); #ifdef MMTK_GC -JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); -JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); -extern void post_alloc(void* mutator, void* obj, size_t bytes, int allocator); +JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); +JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); +JL_DLLIMPORT extern void mmtk_post_alloc(void* mutator, void* obj, size_t bytes, int allocator); +JL_DLLIMPORT extern void mmtk_initialize_collection(void* tls); #endif // MMTK_GC JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; @@ -549,7 +550,7 @@ STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT o->header = tag | GC_OLD_MARKED; #ifdef MMTK_GC jl_ptls_t ptls = jl_current_task->ptls; - post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(o), allocsz, 1); + mmtk_post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(o), allocsz, 1); #endif return jl_valueof(o); } diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 4877565c61495..2bf340be13b62 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2543,7 +2543,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { // But for other MMTk plans, we need to be careful. const bool INLINE_WRITE_BARRIER = true; if (CI->getCalledOperand() == write_barrier_func) { - if (MMTK_NEEDS_WRITE_BARRIER == OBJECT_BARRIER) { + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { if (INLINE_WRITE_BARRIER) { auto i8_ty = Type::getInt8Ty(F.getContext()); auto intptr_ty = T_size; diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index 1aa7346516f62..df3ffa5e27486 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -243,11 +243,13 @@ namespace jl_intrinsics { #ifdef MMTK_GC const IntrinsicDescription writeBarrier1( WRITE_BARRIER_1_NAME, - [](const JuliaPassContext &context) { + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx); auto intrinsic = Function::Create( FunctionType::get( - Type::getVoidTy(context.getLLVMContext()), - { context.T_prjlvalue }, + Type::getVoidTy(ctx), + { T_prjlvalue }, false), Function::ExternalLinkage, WRITE_BARRIER_1_NAME); @@ -256,11 +258,13 @@ namespace jl_intrinsics { }); const IntrinsicDescription writeBarrier2( WRITE_BARRIER_2_NAME, - [](const JuliaPassContext &context) { + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx); auto intrinsic = Function::Create( FunctionType::get( - Type::getVoidTy(context.getLLVMContext()), - { context.T_prjlvalue, context.T_prjlvalue }, + Type::getVoidTy(ctx), + { T_prjlvalue, T_prjlvalue }, false), Function::ExternalLinkage, WRITE_BARRIER_2_NAME); @@ -269,11 +273,13 @@ namespace jl_intrinsics { }); const IntrinsicDescription writeBarrier1Slow( WRITE_BARRIER_1_SLOW_NAME, - [](const JuliaPassContext &context) { + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx); auto intrinsic = Function::Create( FunctionType::get( - Type::getVoidTy(context.getLLVMContext()), - { context.T_prjlvalue }, + Type::getVoidTy(ctx), + { T_prjlvalue }, false), Function::ExternalLinkage, WRITE_BARRIER_1_SLOW_NAME); @@ -282,11 +288,13 @@ namespace jl_intrinsics { }); const IntrinsicDescription writeBarrier2Slow( WRITE_BARRIER_2_SLOW_NAME, - [](const JuliaPassContext &context) { + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx); auto intrinsic = Function::Create( FunctionType::get( - Type::getVoidTy(context.getLLVMContext()), - { context.T_prjlvalue, context.T_prjlvalue }, + Type::getVoidTy(ctx), + { T_prjlvalue, T_prjlvalue }, false), Function::ExternalLinkage, WRITE_BARRIER_2_SLOW_NAME); @@ -379,11 +387,13 @@ namespace jl_well_known { #ifdef MMTK_GC const WellKnownFunctionDescription GCWriteBarrier1( GC_WB_1_NAME, - [](const JuliaPassContext &context) { + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx); auto func = Function::Create( FunctionType::get( - Type::getVoidTy(context.getLLVMContext()), - { context.T_prjlvalue }, + Type::getVoidTy(ctx), + { T_prjlvalue }, false), Function::ExternalLinkage, GC_WB_1_NAME); @@ -393,11 +403,13 @@ namespace jl_well_known { const WellKnownFunctionDescription GCWriteBarrier2( GC_WB_2_NAME, - [](const JuliaPassContext &context) { + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx); auto func = Function::Create( FunctionType::get( - Type::getVoidTy(context.getLLVMContext()), - { context.T_prjlvalue, context.T_prjlvalue }, + Type::getVoidTy(ctx), + { T_prjlvalue, T_prjlvalue }, false), Function::ExternalLinkage, GC_WB_2_NAME); @@ -407,11 +419,13 @@ namespace jl_well_known { const WellKnownFunctionDescription GCWriteBarrier1Slow( GC_WB_1_SLOW_NAME, - [](const JuliaPassContext &context) { + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx); auto func = Function::Create( FunctionType::get( - Type::getVoidTy(context.getLLVMContext()), - { context.T_prjlvalue }, + Type::getVoidTy(ctx), + { T_prjlvalue }, false), Function::ExternalLinkage, GC_WB_1_SLOW_NAME); @@ -421,11 +435,13 @@ namespace jl_well_known { const WellKnownFunctionDescription GCWriteBarrier2Slow( GC_WB_2_SLOW_NAME, - [](const JuliaPassContext &context) { + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx); auto func = Function::Create( FunctionType::get( - Type::getVoidTy(context.getLLVMContext()), - { context.T_prjlvalue, context.T_prjlvalue }, + Type::getVoidTy(ctx), + { T_prjlvalue, T_prjlvalue }, false), Function::ExternalLinkage, GC_WB_2_SLOW_NAME); diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index b3646cb16dacf..6d232919a55f8 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -72,7 +72,7 @@ JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT { - register_finalizer(v, f, 1); + mmtk_register_finalizer(v, f, 1); } // schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) @@ -87,13 +87,13 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f)); } else { - register_finalizer(v, f, 0); + mmtk_register_finalizer(v, f, 0); } } JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) { - run_finalizers_for_obj(o); + mmtk_run_finalizers_for_obj(o); } void jl_gc_run_all_finalizers(jl_task_t *ct) @@ -103,7 +103,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT { - register_finalizer(v, f, 0); + mmtk_register_finalizer(v, f, 0); } @@ -192,13 +192,13 @@ void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) { - unreachable(); + mmtk_unreachable(); } // TODO: exported, but not MMTk-specific? JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT { - unreachable(); + mmtk_unreachable(); } @@ -207,13 +207,13 @@ JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) { - unreachable(); + mmtk_unreachable(); return 0; } JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent, jl_value_t **objs, size_t nobjs) { - unreachable(); + mmtk_unreachable(); } @@ -231,7 +231,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; } - handle_user_collection_request(ptls, collection); + mmtk_handle_user_collection_request(ptls, collection); } // Per-thread initialization @@ -266,7 +266,7 @@ void jl_init_thread_heap(jl_ptls_t ptls) memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); - MMTk_Mutator mmtk_mutator = bind_mutator((void *)ptls, ptls->tid); + MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid); ptls->mmtk_mutator_ptr = ((MMTkMutatorContext*)mmtk_mutator); } @@ -337,9 +337,9 @@ void jl_gc_init(void) // if only max size is specified initialize MMTk with a fixed size heap if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { - gc_init(0, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + mmtk_gc_init(0, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); } else { - gc_init(min_heap_size, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + mmtk_gc_init(min_heap_size, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); } } @@ -485,7 +485,17 @@ void objprofile_reset(void) // gc thread function void jl_gc_threadfun(void *arg) { - unreachable(); + mmtk_unreachable(); +} + +// added for MMTk integration +void enable_collection(void) +{ + mmtk_enable_collection(); +} +void disable_collection(void) +{ + mmtk_disable_collection(); } JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT @@ -522,7 +532,7 @@ JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFE void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) { jl_ptls_t ptls = jl_current_task->ptls; - void* addr = alloc(ptls->mmtk_mutator_ptr, sz, align, offset, 1); + void* addr = mmtk_alloc(ptls->mmtk_mutator_ptr, sz, align, offset, 1); return addr; } diff --git a/src/symbol.c b/src/symbol.c index 00de9872e8255..dcfa0b6086846 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -41,7 +41,7 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED); #ifdef MMTK_GC jl_ptls_t ptls = jl_current_task->ptls; - post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(tag), nb, 1); + mmtk_post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(tag), nb, 1); #endif jl_atomic_store_relaxed(&sym->left, NULL); jl_atomic_store_relaxed(&sym->right, NULL); From 1af2dd00b700032ed5757a8e29b855732e989c81 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 18 May 2023 22:17:48 +0000 Subject: [PATCH 012/116] Pass n_gcthreads to mmtk_gc_init. Avoid spawning GC threads in Julia. --- src/mmtk-gc.c | 4 ++-- src/threading.c | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 6d232919a55f8..10635cc11a07a 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -337,9 +337,9 @@ void jl_gc_init(void) // if only max size is specified initialize MMTk with a fixed size heap if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { - mmtk_gc_init(0, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + mmtk_gc_init(0, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); } else { - mmtk_gc_init(min_heap_size, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + mmtk_gc_init(min_heap_size, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); } } diff --git a/src/threading.c b/src/threading.c index 52d69805d0b79..4f24ce1aad704 100644 --- a/src/threading.c +++ b/src/threading.c @@ -663,6 +663,11 @@ void jl_init_threading(void) ngcthreads = (nthreads / 2) - 1; } } +#ifdef MMTK_GC + // MMTk gets the number of GC threads from jl_options.ngcthreads. So we just set ngcthreads to 0 here + // to avoid spawning any GC threads in Julia. + ngcthreads = 0; +#endif jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); From da45bf76f3431ae0662e44d581a91bee7cae987d Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Fri, 19 May 2023 05:49:34 +0000 Subject: [PATCH 013/116] Set ngcthreads=0 in jl_start_threads --- src/mmtk-gc.c | 3 +++ src/threading.c | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 10635cc11a07a..8b4d1f2c22397 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -335,6 +335,9 @@ void jl_gc_init(void) max_heap_size = uv_get_free_memory() * 70 / 100; } + // If the two values are the same, we can use either. Otherwise, we need to be careful. + assert(jl_n_gcthreads == jl_options.ngcthreads); + // if only max size is specified initialize MMTk with a fixed size heap if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { mmtk_gc_init(0, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); diff --git a/src/threading.c b/src/threading.c index 4f24ce1aad704..51bdd6e8107da 100644 --- a/src/threading.c +++ b/src/threading.c @@ -663,11 +663,6 @@ void jl_init_threading(void) ngcthreads = (nthreads / 2) - 1; } } -#ifdef MMTK_GC - // MMTk gets the number of GC threads from jl_options.ngcthreads. So we just set ngcthreads to 0 here - // to avoid spawning any GC threads in Julia. - ngcthreads = 0; -#endif jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); @@ -686,6 +681,11 @@ void jl_start_threads(void) { int nthreads = jl_atomic_load_relaxed(&jl_n_threads); int ngcthreads = jl_n_gcthreads; +#ifdef MMTK_GC + // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. + // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. + ngcthreads = 0; +#endif int cpumasksize = uv_cpumask_size(); char *cp; int i, exclusive; From 8194356082ec76655dc4fb14a909e9b721730b79 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 25 May 2023 23:48:23 +0000 Subject: [PATCH 014/116] Fix stock Julia build --- src/gc-debug.c | 203 ++++++++++++++----------------------------------- src/gc.c | 138 ++++++++++++++++++++++++++++----- src/gc.h | 49 +++++------- src/mmtk-gc.c | 2 + 4 files changed, 201 insertions(+), 191 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index fc3da5b2ba282..df2e3487506fa 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -647,91 +647,6 @@ void jl_gc_debug_print_status(void) } #endif -#ifdef OBJPROFILE -static htable_t obj_counts[3]; -static htable_t obj_sizes[3]; -void objprofile_count(void *ty, int old, int sz) -{ - if (gc_verifying) return; - if ((intptr_t)ty <= 0x10) { - ty = (void*)jl_buff_tag; - } - else if (ty != (void*)jl_buff_tag && ty != jl_malloc_tag && - jl_typeof(ty) == (jl_value_t*)jl_datatype_type && - ((jl_datatype_t*)ty)->instance) { - ty = jl_singleton_tag; - } - void **bp = ptrhash_bp(&obj_counts[old], ty); - if (*bp == HT_NOTFOUND) - *bp = (void*)2; - else - (*((intptr_t*)bp))++; - bp = ptrhash_bp(&obj_sizes[old], ty); - if (*bp == HT_NOTFOUND) - *bp = (void*)(intptr_t)(1 + sz); - else - *((intptr_t*)bp) += sz; -} - -void objprofile_reset(void) -{ - for (int g = 0; g < 3; g++) { - htable_reset(&obj_counts[g], 0); - htable_reset(&obj_sizes[g], 0); - } -} - -static void objprofile_print(htable_t nums, htable_t sizes) -{ - for(int i=0; i < nums.size; i+=2) { - if (nums.table[i+1] != HT_NOTFOUND) { - void *ty = nums.table[i]; - int num = (intptr_t)nums.table[i + 1] - 1; - size_t sz = (uintptr_t)ptrhash_get(&sizes, ty) - 1; - static const int ptr_hex_width = 2 * sizeof(void*); - if (sz > 2e9) { - jl_safe_printf(" %6d : %*.1f GB of (%*p) ", - num, 6, ((double)sz) / 1024 / 1024 / 1024, - ptr_hex_width, ty); - } - else if (sz > 2e6) { - jl_safe_printf(" %6d : %*.1f MB of (%*p) ", - num, 6, ((double)sz) / 1024 / 1024, - ptr_hex_width, ty); - } - else if (sz > 2e3) { - jl_safe_printf(" %6d : %*.1f kB of (%*p) ", - num, 6, ((double)sz) / 1024, - ptr_hex_width, ty); - } - else { - jl_safe_printf(" %6d : %*d B of (%*p) ", - num, 6, (int)sz, ptr_hex_width, ty); - } - if (ty == (void*)jl_buff_tag) - jl_safe_printf("#"); - else if (ty == jl_malloc_tag) - jl_safe_printf("#"); - else if (ty == jl_singleton_tag) - jl_safe_printf("#"); - else - jl_static_show(JL_STDERR, (jl_value_t*)ty); - jl_safe_printf("\n"); - } - } -} - -void objprofile_printall(void) -{ - jl_safe_printf("Transient mark :\n"); - objprofile_print(obj_counts[0], obj_sizes[0]); - jl_safe_printf("Perm mark :\n"); - objprofile_print(obj_counts[1], obj_sizes[1]); - jl_safe_printf("Remset :\n"); - objprofile_print(obj_counts[2], obj_sizes[2]); -} -#endif - #if defined(GC_TIME) || defined(GC_FINAL_STATS) STATIC_INLINE double jl_ns2ms(int64_t t) { @@ -1257,68 +1172,68 @@ void gc_count_pool(void) // `offset` will be added to `mq->current` for convenience in the debugger. NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_markqueue_t *mq, int offset) { - jl_jmp_buf *old_buf = jl_get_safe_restore(); - jl_jmp_buf buf; - jl_set_safe_restore(&buf); - if (jl_setjmp(buf, 0) != 0) { - jl_safe_printf("\n!!! ERROR when unwinding gc mark loop -- ABORTING !!!\n"); - jl_set_safe_restore(old_buf); - return; - } - jl_value_t **start = mq->start; - jl_value_t **end = mq->current + offset; - for (; start < end; start++) { - jl_value_t *obj = *start; - jl_taggedvalue_t *o = jl_astaggedvalue(obj); - jl_safe_printf("Queued object: %p :: (tag: %zu) (bits: %zu)\n", obj, - (uintptr_t)o->header, ((uintptr_t)o->header & 3)); - jl_((void*)(jl_datatype_t *)(o->header & ~(uintptr_t)0xf)); - } - jl_set_safe_restore(old_buf); -} - -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - else if (vt->name == jl_array_typename) { - jl_array_t *a = (jl_array_t*)obj; - start = (char*)a->data; - len = jl_array_len(a); - elsize = a->elsize; - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} + // jl_jmp_buf *old_buf = jl_get_safe_restore(); + // jl_jmp_buf buf; + // jl_set_safe_restore(&buf); + // if (jl_setjmp(buf, 0) != 0) { + // jl_safe_printf("\n!!! ERROR when unwinding gc mark loop -- ABORTING !!!\n"); + // jl_set_safe_restore(old_buf); + // return; + // } + // jl_value_t **start = mq->start; + // jl_value_t **end = mq->current + offset; + // for (; start < end; start++) { + // jl_value_t *obj = *start; + // jl_taggedvalue_t *o = jl_astaggedvalue(obj); + // jl_safe_printf("Queued object: %p :: (tag: %zu) (bits: %zu)\n", obj, + // (uintptr_t)o->header, ((uintptr_t)o->header & 3)); + // jl_((void*)(jl_datatype_t *)(o->header & ~(uintptr_t)0xf)); + // } + // jl_set_safe_restore(old_buf); +} + +// int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +// { +// int nf = (int)jl_datatype_nfields(vt); +// for (int i = 1; i < nf; i++) { +// if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) +// return i - 1; +// } +// return nf - 1; +// } + +// int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +// { +// char *slot = (char*)_slot; +// jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); +// char *start = NULL; +// size_t len = 0; +// size_t elsize = sizeof(void*); +// if (vt == jl_module_type) { +// jl_module_t *m = (jl_module_t*)obj; +// start = (char*)m->usings.items; +// len = m->usings.len; +// } +// else if (vt == jl_simplevector_type) { +// start = (char*)jl_svec_data(obj); +// len = jl_svec_len(obj); +// } +// else if (vt->name == jl_array_typename) { +// jl_array_t *a = (jl_array_t*)obj; +// start = (char*)a->data; +// len = jl_array_len(a); +// elsize = a->elsize; +// } +// if (slot < start || slot >= start + elsize * len) +// return -1; +// return (slot - start) / elsize; +// } static int gc_logging_enabled = 0; -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} +// JL_DLLEXPORT void jl_enable_gc_logging(int enable) { +// gc_logging_enabled = enable; +// } void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { diff --git a/src/gc.c b/src/gc.c index 932bb1d97c6db..ce80597a937f1 100644 --- a/src/gc.c +++ b/src/gc.c @@ -376,10 +376,6 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads) } } -<<<<<<< HEAD - -======= ->>>>>>> upstream/master // malloc wrappers, aligned allocation #if defined(_OS_WINDOWS_) @@ -2648,6 +2644,8 @@ JL_EXTENSION NOINLINE void gc_mark_loop_serial(jl_ptls_t ptls) gc_drain_own_chunkqueue(ptls, &ptls->mark_queue); } +extern int gc_first_tid; + void gc_mark_and_steal(jl_ptls_t ptls) { jl_gc_markqueue_t *mq = &ptls->mark_queue; @@ -2799,24 +2797,109 @@ void gc_mark_clean_reclaim_sets(void) } } -static void gc_premark(jl_ptls_t ptls2) +// void gc_premark(jl_ptls_t ptls2) +// { +// arraylist_t *remset = ptls2->heap.remset; +// ptls2->heap.remset = ptls2->heap.last_remset; +// ptls2->heap.last_remset = remset; +// ptls2->heap.remset->len = 0; +// ptls2->heap.remset_nptr = 0; +// // avoid counting remembered objects +// // in `perm_scanned_bytes` +// size_t len = remset->len; +// void **items = remset->items; +// for (size_t i = 0; i < len; i++) { +// jl_value_t *item = (jl_value_t *)items[i]; +// objprofile_count(jl_typeof(item), 2, 0); +// jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED; +// } +// } + +#ifdef OBJPROFILE +static htable_t obj_counts[3]; +static htable_t obj_sizes[3]; +void objprofile_count(void *ty, int old, int sz) +{ + if (gc_verifying) return; + if ((intptr_t)ty <= 0x10) { + ty = (void*)jl_buff_tag; + } + else if (ty != (void*)jl_buff_tag && ty != jl_malloc_tag && + jl_typeof(ty) == (jl_value_t*)jl_datatype_type && + ((jl_datatype_t*)ty)->instance) { + ty = jl_singleton_tag; + } + void **bp = ptrhash_bp(&obj_counts[old], ty); + if (*bp == HT_NOTFOUND) + *bp = (void*)2; + else + (*((intptr_t*)bp))++; + bp = ptrhash_bp(&obj_sizes[old], ty); + if (*bp == HT_NOTFOUND) + *bp = (void*)(intptr_t)(1 + sz); + else + *((intptr_t*)bp) += sz; +} + +void objprofile_reset(void) { - arraylist_t *remset = ptls2->heap.remset; - ptls2->heap.remset = ptls2->heap.last_remset; - ptls2->heap.last_remset = remset; - ptls2->heap.remset->len = 0; - ptls2->heap.remset_nptr = 0; - // avoid counting remembered objects - // in `perm_scanned_bytes` - size_t len = remset->len; - void **items = remset->items; - for (size_t i = 0; i < len; i++) { - jl_value_t *item = (jl_value_t *)items[i]; - objprofile_count(jl_typeof(item), 2, 0); - jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED; + for (int g = 0; g < 3; g++) { + htable_reset(&obj_counts[g], 0); + htable_reset(&obj_sizes[g], 0); + } +} + +static void objprofile_print(htable_t nums, htable_t sizes) +{ + for(int i=0; i < nums.size; i+=2) { + if (nums.table[i+1] != HT_NOTFOUND) { + void *ty = nums.table[i]; + int num = (intptr_t)nums.table[i + 1] - 1; + size_t sz = (uintptr_t)ptrhash_get(&sizes, ty) - 1; + static const int ptr_hex_width = 2 * sizeof(void*); + if (sz > 2e9) { + jl_safe_printf(" %6d : %*.1f GB of (%*p) ", + num, 6, ((double)sz) / 1024 / 1024 / 1024, + ptr_hex_width, ty); + } + else if (sz > 2e6) { + jl_safe_printf(" %6d : %*.1f MB of (%*p) ", + num, 6, ((double)sz) / 1024 / 1024, + ptr_hex_width, ty); + } + else if (sz > 2e3) { + jl_safe_printf(" %6d : %*.1f kB of (%*p) ", + num, 6, ((double)sz) / 1024, + ptr_hex_width, ty); + } + else { + jl_safe_printf(" %6d : %*d B of (%*p) ", + num, 6, (int)sz, ptr_hex_width, ty); + } + if (ty == (void*)jl_buff_tag) + jl_safe_printf("#"); + else if (ty == jl_malloc_tag) + jl_safe_printf("#"); + else if (ty == jl_singleton_tag) + jl_safe_printf("#"); + else + jl_static_show(JL_STDERR, (jl_value_t*)ty); + jl_safe_printf("\n"); + } } } +void objprofile_printall(void) +{ + jl_safe_printf("Transient mark :\n"); + objprofile_print(obj_counts[0], obj_sizes[0]); + jl_safe_printf("Perm mark :\n"); + objprofile_print(obj_counts[1], obj_sizes[1]); + jl_safe_printf("Remset :\n"); + objprofile_print(obj_counts[2], obj_sizes[2]); +} +#endif + static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2) { jl_task_t *task; @@ -2955,6 +3038,9 @@ static void sweep_finalizer_list(arraylist_t *list) size_t jl_maxrss(void); +extern void objprofile_printall(void); +extern void objprofile_reset(void); + // Only one thread should be running in this function static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) { @@ -3708,6 +3794,22 @@ void disable_collection(void) { } +JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT +{ +} + +JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ +} + +JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT +{ +} + +JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT +{ +} + #ifdef __cplusplus } #endif diff --git a/src/gc.h b/src/gc.h index 3def80327ceda..701c2c769e1b4 100644 --- a/src/gc.h +++ b/src/gc.h @@ -47,7 +47,24 @@ extern void gc_premark(jl_ptls_t ptls2); extern void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t *owner, int8_t can_collect); extern size_t jl_array_nbytes(jl_array_t *a); -extern void objprofile_count(void *ty, int old, int sz); + +#ifdef OBJPROFILE +void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT; +void objprofile_printall(void); +void objprofile_reset(void); +#else +static inline void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT +{ +} + +static inline void objprofile_printall(void) +{ +} + +static inline void objprofile_reset(void) +{ +} +#endif #define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT) #define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT) @@ -70,7 +87,7 @@ extern uint64_t finalizer_rngState[]; extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; -// keep in sync with the Julia type of the same name in base/timing.jl +// This struct must be kept in sync with the Julia type of the same name in base/timing.jl typedef struct { int64_t allocd; int64_t deferred_alloc; @@ -82,7 +99,6 @@ typedef struct { uint64_t freecall; uint64_t total_time; uint64_t total_allocd; - uint64_t since_sweep; size_t interval; int pause; int full_sweep; @@ -90,6 +106,7 @@ typedef struct { uint64_t max_memory; uint64_t time_to_safepoint; uint64_t max_time_to_safepoint; + uint64_t total_time_to_safepoint; uint64_t sweep_time; uint64_t mark_time; uint64_t total_sweep_time; @@ -217,32 +234,6 @@ typedef struct { jl_alloc_num_t print; } jl_gc_debug_env_t; -// This struct must be kept in sync with the Julia type of the same name in base/timing.jl -typedef struct { - int64_t allocd; - int64_t deferred_alloc; - int64_t freed; - uint64_t malloc; - uint64_t realloc; - uint64_t poolalloc; - uint64_t bigalloc; - uint64_t freecall; - uint64_t total_time; - uint64_t total_allocd; - size_t interval; - int pause; - int full_sweep; - uint64_t max_pause; - uint64_t max_memory; - uint64_t time_to_safepoint; - uint64_t max_time_to_safepoint; - uint64_t total_time_to_safepoint; - uint64_t sweep_time; - uint64_t mark_time; - uint64_t total_sweep_time; - uint64_t total_mark_time; -} jl_gc_num_t; - // Array chunks (work items representing suffixes of // large arrays of pointers left to be marked) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 8b4d1f2c22397..5e868ef11c1d2 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -473,6 +473,7 @@ void jl_print_gc_stats(JL_STREAM *s) { } +#ifdef OBJPROFILE void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT { } @@ -484,6 +485,7 @@ void objprofile_printall(void) void objprofile_reset(void) { } +#endif // gc thread function void jl_gc_threadfun(void *arg) From fb024c6f51849fde6ec10783a5eb595f34269e78 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 30 May 2023 16:00:51 +1200 Subject: [PATCH 015/116] Copy libmmtk_julia to usr/lib (#15) Copies libmmtk_julia.so from the source directory (`mmtk-julia/mmtk/target/debug/libmmtk_julia.so`) to `build/usr/lib`. --- Make.inc | 15 ++++++++++++++- src/Makefile | 4 +++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Make.inc b/Make.inc index 65b1468781632..6920fc64ecf70 100644 --- a/Make.inc +++ b/Make.inc @@ -750,7 +750,14 @@ endif MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk MMTK_API_INC = $(MMTK_DIR)/api MMTK_JULIA_INC = ${MMTK_JULIA_DIR}/julia -MMTK_LIB := -L$(MMTK_DIR)/target/$(MMTK_BUILD) -lmmtk_julia +ifeq ($(OS),Linux) +MMTK_LIB_NAME := libmmtk_julia.so +else +$(error "Unsupported OS for MMTk") +endif +MMTK_LIB_SRC := $(MMTK_DIR)/target/$(MMTK_BUILD)/$(MMTK_LIB_NAME) +MMTK_LIB_DST := $(BUILDROOT)/usr/lib/$(MMTK_LIB_NAME) +MMTK_LIB := -lmmtk_julia LDFLAGS += -Wl,-rpath=$(MMTK_DIR)/target/$(MMTK_BUILD)/ else MMTK_JULIA_INC := @@ -1692,6 +1699,9 @@ PRINT_PERL = printf ' %b %b\n' $(PERLCOLOR)PERL$(ENDCOLOR) $(BINCOLOR)$(GOAL) PRINT_FLISP = printf ' %b %b\n' $(FLISPCOLOR)FLISP$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) PRINT_JULIA = printf ' %b %b\n' $(JULIACOLOR)JULIA$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) PRINT_DTRACE = printf ' %b %b\n' $(DTRACECOLOR)DTRACE$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) +ifeq ($(WITH_MMTK), 1) +PRINT_MMTK = printf ' %b %b\n' $(LINKCOLOR)MMTK$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) +endif else QUIET_MAKE = @@ -1702,6 +1712,9 @@ PRINT_PERL = echo '$(subst ','\'',$(1))'; $(1) PRINT_FLISP = echo '$(subst ','\'',$(1))'; $(1) PRINT_JULIA = echo '$(subst ','\'',$(1))'; $(1) PRINT_DTRACE = echo '$(subst ','\'',$(1))'; $(1) +ifeq ($(WITH_MMTK), 1) +PRINT_MMTK = echo '$(subst ','\'',$(1))'; $(1) +endif endif diff --git a/src/Makefile b/src/Makefile index 66a3f3ac1c24b..ff5f4ce8b99d6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -175,7 +175,7 @@ DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) ifeq ($(WITH_MMTK), 1) MMTK_SRCS := mmtk_julia -MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.o) +MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.o) $(MMTK_LIB_DST) MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) else MMTK_OBJS := @@ -254,6 +254,8 @@ $(MMTK_JULIA_INC)/%.o: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@) $(MMTK_JULIA_INC)/%.dbg.obj: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@) +$(MMTK_LIB_DST): $(MMTK_LIB_SRC) + @$(call PRINT_MMTK, cp $< $@) endif # public header rules From 98a66ba3c0925ea21bfe051a191210eeae7df0f2 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Fri, 16 Jun 2023 10:10:34 +1200 Subject: [PATCH 016/116] Embed mutator in _jl_tls_states_t (#16) This PR embeds the MMTk mutator struct in `_jl_tls_states_t`, and also adds `jl_deinit_thread_heap` to allow a proper destruction of the mutator struct. --- src/gc.c | 5 +++++ src/julia.h | 4 ++-- src/julia_internal.h | 3 ++- src/julia_threads.h | 4 +--- src/llvm-final-gc-lowering.cpp | 12 +++++++++--- src/mmtk-gc.c | 19 ++++++++++++++----- src/symbol.c | 2 +- src/threading.c | 3 +++ 8 files changed, 37 insertions(+), 15 deletions(-) diff --git a/src/gc.c b/src/gc.c index ce80597a937f1..90eae32f0affc 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3501,6 +3501,11 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); } +void jl_deinit_thread_heap(jl_ptls_t ptls) +{ + // Do nothing +} + // System-wide initializations void jl_gc_init(void) { diff --git a/src/julia.h b/src/julia.h index 7950eca3e0f1d..253105ef94386 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2391,7 +2391,7 @@ STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSA { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_post(ptls->mmtk_mutator_ptr, parent, ptr); + mmtk_object_reference_write_post(&ptls->mmtk_mutator, parent, ptr); } // Inlined fastpath @@ -2405,7 +2405,7 @@ STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSA if (((byte_val >> shift) & 1) == 1) { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(ptls->mmtk_mutator_ptr, parent, ptr); + mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr); } } } diff --git a/src/julia_internal.h b/src/julia_internal.h index 5e5b0ebb76e41..76ed8f977dc7a 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -550,7 +550,7 @@ STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT o->header = tag | GC_OLD_MARKED; #ifdef MMTK_GC jl_ptls_t ptls = jl_current_task->ptls; - mmtk_post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(o), allocsz, 1); + mmtk_post_alloc(&ptls->mmtk_mutator, jl_valueof(o), allocsz, 1); #endif return jl_valueof(o); } @@ -918,6 +918,7 @@ void jl_init_serializer(void); void jl_gc_init(void); void jl_init_uv(void); void jl_init_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT; +void jl_deinit_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT; void jl_init_int32_int64_cache(void); JL_DLLEXPORT void jl_init_options(void); diff --git a/src/julia_threads.h b/src/julia_threads.h index 46ad724b71aa0..f79d17d35cb64 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -282,9 +282,7 @@ typedef struct _jl_tls_states_t { ) #ifdef MMTK_GC - MMTkMutatorContext* mmtk_mutator_ptr; - void* cursor; - void* limit; + MMTkMutatorContext mmtk_mutator; #endif // some hidden state (usually just because we don't have the type's size declaration) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 6ad46f1eb01d4..48eb584b81893 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -280,17 +280,23 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) #else // MMTK_GC auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); - auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, cursor)); - auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, limit)); + + // Assuming we use the first immix allocator. + // FIXME: We should get the allocator index and type from MMTk. + auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); + + auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); + auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit)); auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); - + // offset = 8 auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); + // alignment 16 (15 = 16 - 1) auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); auto result = builder.CreateNSWAdd(cursor, delta, "result"); diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 5e868ef11c1d2..db3affd603cb2 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -266,8 +266,17 @@ void jl_init_thread_heap(jl_ptls_t ptls) memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + // Create mutator MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid); - ptls->mmtk_mutator_ptr = ((MMTkMutatorContext*)mmtk_mutator); + // Copy the mutator to the thread local storage + memcpy(&ptls->mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext)); + // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed) + mmtk_post_bind_mutator(&ptls->mmtk_mutator, mmtk_mutator); +} + +void jl_deinit_thread_heap(jl_ptls_t ptls) +{ + mmtk_destroy_mutator(&ptls->mmtk_mutator); } // System-wide initialization @@ -506,7 +515,7 @@ void disable_collection(void) JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - mmtk_memory_region_copy(ptls->mmtk_mutator_ptr, jl_array_owner(src), src_p, jl_array_owner(dest), dest_p, n); + mmtk_memory_region_copy(&ptls->mmtk_mutator, jl_array_owner(src), src_p, jl_array_owner(dest), dest_p, n); } // No inline write barrier -- only used for debugging @@ -524,20 +533,20 @@ JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(ptls->mmtk_mutator_ptr, parent, (const void*) 0); + mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, (const void*) 0); } JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(ptls->mmtk_mutator_ptr, parent, ptr); + mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr); } void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) { jl_ptls_t ptls = jl_current_task->ptls; - void* addr = mmtk_alloc(ptls->mmtk_mutator_ptr, sz, align, offset, 1); + void* addr = mmtk_alloc(&ptls->mmtk_mutator, sz, align, offset, 1); return addr; } diff --git a/src/symbol.c b/src/symbol.c index dcfa0b6086846..f1cd18cfb84cc 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -41,7 +41,7 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED); #ifdef MMTK_GC jl_ptls_t ptls = jl_current_task->ptls; - mmtk_post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(tag), nb, 1); + mmtk_post_alloc(&ptls->mmtk_mutator, jl_valueof(tag), nb, 1); #endif jl_atomic_store_relaxed(&sym->left, NULL); jl_atomic_store_relaxed(&sym->right, NULL); diff --git a/src/threading.c b/src/threading.c index 51bdd6e8107da..d58528fa183be 100644 --- a/src/threading.c +++ b/src/threading.c @@ -478,6 +478,9 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER #else pthread_mutex_unlock(&in_signal_lock); #endif + + jl_deinit_thread_heap(ptls); + // then park in safe-region (void)jl_gc_safe_enter(ptls); } From 0d8bbd943af2642f04dc6d8c2a74543c4ec8e84f Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 29 Jun 2023 19:36:39 +1200 Subject: [PATCH 017/116] Align up alloc size (#18) --- src/julia.h | 8 ++++++++ src/mmtk-gc.c | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/julia.h b/src/julia.h index 253105ef94386..44650a7d6ed0a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2414,6 +2414,14 @@ STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOI { mmtk_gc_wb_fast(parent, ptr); } + +#define MMTK_MIN_ALIGNMENT 4 +// MMTk assumes allocation size is aligned to min alignment. +STATIC_INLINE size_t mmtk_align_alloc_sz(size_t sz) JL_NOTSAFEPOINT +{ + return (sz + MMTK_MIN_ALIGNMENT - 1) & ~(MMTK_MIN_ALIGNMENT - 1); +} + #endif #ifdef __cplusplus diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index db3affd603cb2..84df79f432b6a 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -546,7 +546,8 @@ JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFE void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) { jl_ptls_t ptls = jl_current_task->ptls; - void* addr = mmtk_alloc(&ptls->mmtk_mutator, sz, align, offset, 1); + size_t allocsz = mmtk_align_alloc_sz(sz); + void* addr = mmtk_alloc(&ptls->mmtk_mutator, allocsz, align, offset, 1); return addr; } From bf1c43e84b4177513b082fb1727f6c360b5c33d4 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Fri, 30 Jun 2023 13:21:56 +1200 Subject: [PATCH 018/116] Allow skip inlined fastpath (#19) --- src/llvm-final-gc-lowering.cpp | 146 ++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 68 deletions(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 48eb584b81893..3f644a365a86c 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -281,74 +281,84 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); - // Assuming we use the first immix allocator. - // FIXME: We should get the allocator index and type from MMTk. - auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); - - auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); - auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit)); - - auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); - auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); - auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); - - // offset = 8 - auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); - auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); - auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); - // alignment 16 (15 = 16 - 1) - auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); - auto result = builder.CreateNSWAdd(cursor, delta, "result"); - - auto new_cursor = builder.CreateNSWAdd(result, pool_osize); - - auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos); - auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr"); - auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit"); - - auto gt_limit = builder.CreateICmpSGT(new_cursor, limit); - - auto current_block = target->getParent(); - builder.SetInsertPoint(target->getNextNode()); - auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow"); - auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont"); - - auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction()); - auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont); - - auto next_br = current_block->getTerminator(); - next_br->eraseFromParent(); - builder.SetInsertPoint(current_block); - builder.CreateCondBr(gt_limit, slowpath, fastpath); - - // slowpath - builder.SetInsertPoint(slowpath); - auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 }); - new_call->setAttributes(new_call->getCalledFunction()->getAttributes()); - builder.CreateBr(top_cont); - - // // fastpath - builder.SetInsertPoint(fastpath); - builder.CreateStore(new_cursor, cursor_ptr); - - // ptls->gc_num.allocd += osize; - auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num)); - auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); - auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); - auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); - auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); - builder.CreateStore(pool_allocd_total, pool_alloc_tls); - - auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); - auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); - builder.CreateBr(top_cont); - - phiNode->addIncoming(new_call, slowpath); - phiNode->addIncoming(v_as_ptr, fastpath); - phiNode->takeName(target); - - return phiNode; + // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk. + // Setting this to false will increase allocation overhead a lot, and should only be used for debugging. + const bool INLINE_FASTPATH_ALLOCATION = true; + + if (INLINE_FASTPATH_ALLOCATION) { + // Assuming we use the first immix allocator. + // FIXME: We should get the allocator index and type from MMTk. + auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); + + auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); + auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit)); + + auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); + auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); + auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); + + // offset = 8 + auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); + auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); + auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); + // alignment 16 (15 = 16 - 1) + auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); + auto result = builder.CreateNSWAdd(cursor, delta, "result"); + + auto new_cursor = builder.CreateNSWAdd(result, pool_osize); + + auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos); + auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr"); + auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit"); + + auto gt_limit = builder.CreateICmpSGT(new_cursor, limit); + + auto current_block = target->getParent(); + builder.SetInsertPoint(target->getNextNode()); + auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow"); + auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont"); + + auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction()); + auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont); + + auto next_br = current_block->getTerminator(); + next_br->eraseFromParent(); + builder.SetInsertPoint(current_block); + builder.CreateCondBr(gt_limit, slowpath, fastpath); + + // slowpath + builder.SetInsertPoint(slowpath); + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 }); + new_call->setAttributes(new_call->getCalledFunction()->getAttributes()); + builder.CreateBr(top_cont); + + // // fastpath + builder.SetInsertPoint(fastpath); + builder.CreateStore(new_cursor, cursor_ptr); + + // ptls->gc_num.allocd += osize; + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num)); + auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); + auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); + auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); + auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); + builder.CreateStore(pool_allocd_total, pool_alloc_tls); + + auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); + auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); + builder.CreateBr(top_cont); + + phiNode->addIncoming(new_call, slowpath); + phiNode->addIncoming(v_as_ptr, fastpath); + phiNode->takeName(target); + + return phiNode; + } else { + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 }); + derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize); + } #endif // MMTK_GC } } else { From e5fc5ddebd884ee15124a1be8e4d599518433c1b Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 3 Jul 2023 11:50:56 +1200 Subject: [PATCH 019/116] Avoid calling mmtk_gc_poll frequently (#20) --- src/julia_threads.h | 1 + src/mmtk-gc.c | 28 ++++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/julia_threads.h b/src/julia_threads.h index f79d17d35cb64..3e9db5b676577 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -283,6 +283,7 @@ typedef struct _jl_tls_states_t { #ifdef MMTK_GC MMTkMutatorContext mmtk_mutator; + size_t malloc_sz_since_last_poll; #endif // some hidden state (usually just because we don't have the type's size declaration) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 84df79f432b6a..6f7e5f124e4b0 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -33,7 +33,24 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_fre inline void maybe_collect(jl_ptls_t ptls) { - mmtk_gc_poll(ptls); + // Just do a safe point for general maybe_collect + jl_gc_safepoint_(ptls); +} + +// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll), +// is expensive. So we only check for every few allocations. +static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) +{ + // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to + // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage + // as much as we can. + if (ptls->malloc_sz_since_last_poll > 4096) { + jl_atomic_store_relaxed(&ptls->malloc_sz_since_last_poll, 0); + mmtk_gc_poll(ptls); + } else { + jl_atomic_fetch_add_relaxed(&ptls->malloc_sz_since_last_poll, sz); + jl_gc_safepoint_(ptls); + } } @@ -266,6 +283,9 @@ void jl_init_thread_heap(jl_ptls_t ptls) memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + // Clear the malloc sz count + jl_atomic_store_relaxed(&ptls->malloc_sz_since_last_poll, 0); + // Create mutator MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid); // Copy the mutator to the thread local storage @@ -363,7 +383,7 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) jl_task_t *ct = jl_current_task; if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; - maybe_collect(ptls); + malloc_maybe_collect(ptls, sz); jl_atomic_store_relaxed(&ptls->gc_num.allocd, jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, @@ -379,7 +399,7 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_task_t *ct = jl_current_task; if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; - maybe_collect(ptls); + malloc_maybe_collect(ptls, sz); jl_atomic_store_relaxed(&ptls->gc_num.allocd, jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, @@ -411,7 +431,7 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_task_t *ct = jl_current_task; if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; - maybe_collect(ptls); + malloc_maybe_collect(ptls, sz); if (sz < old) jl_atomic_store_relaxed(&ptls->gc_num.freed, jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz)); From 05c42ddc757a0aa2b308f4edb2aa01f9c905ca14 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 3 Jul 2023 05:09:40 +0000 Subject: [PATCH 020/116] Notify GC when loading pkg image --- src/staticdata.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/staticdata.c b/src/staticdata.c index 6b21b2f80437d..452e4380deb02 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -3540,6 +3540,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From 67c5c32e61c7e28ef44eabc57e3a6fa2154a45cc Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 25 Jul 2023 10:32:47 +1200 Subject: [PATCH 021/116] Remove MMTk counted malloc (#24) This PR changes the implementation of malloc methods for MMTk. We no longer use malloc and counted malloc methods from MMTk for those. Instead, we maintain a global counter for the size, and report the size to MMTk. --- src/gc.h | 4 ++- src/mmtk-gc.c | 86 +++++++++++++++++++++++++++------------------------ 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/src/gc.h b/src/gc.h index 701c2c769e1b4..6c689c4d5478e 100644 --- a/src/gc.h +++ b/src/gc.h @@ -33,10 +33,11 @@ extern void maybe_collect(jl_ptls_t ptls); extern void run_finalizer(jl_task_t *ct, void *o, void *ff); extern void *jl_malloc_aligned(size_t sz, size_t align); +extern void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align); +extern void jl_free_aligned(void *p); extern void *jl_gc_counted_calloc(size_t nm, size_t sz); extern void jl_gc_counted_free_with_size(void *p, size_t sz); extern void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); -extern void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align); extern void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f); extern void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value); @@ -47,6 +48,7 @@ extern void gc_premark(jl_ptls_t ptls2); extern void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t *owner, int8_t can_collect); extern size_t jl_array_nbytes(jl_array_t *a); +extern void run_finalizers(jl_task_t *ct); #ifdef OBJPROFILE void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT; diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 6f7e5f124e4b0..fa9c4acd0aa9f 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -53,31 +53,56 @@ static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) } } - // malloc wrappers, aligned allocation -// --- +// We currently just duplicate what Julia GC does. We will in the future replace the malloc calls with MMTK's malloc. +#if defined(_OS_WINDOWS_) +inline void *jl_malloc_aligned(size_t sz, size_t align) +{ + return _aligned_malloc(sz ? sz : 1, align); +} +inline void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, + size_t align) +{ + (void)oldsz; + return _aligned_realloc(p, sz ? sz : 1, align); +} +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT +{ + _aligned_free(p); +} +#else inline void *jl_malloc_aligned(size_t sz, size_t align) { - return mmtk_malloc_aligned(sz ? sz : 1, align); // XXX sz +#if defined(_P64) || defined(__APPLE__) + if (align <= 16) + return malloc(sz); +#endif + void *ptr; + if (posix_memalign(&ptr, align, sz)) + return NULL; + return ptr; } inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align) { - void *res = jl_malloc_aligned(sz, align); - if (res != NULL) { - memcpy(res, d, oldsz > sz ? sz : oldsz); - mmtk_free_aligned(d); +#if defined(_P64) || defined(__APPLE__) + if (align <= 16) + return realloc(d, sz); +#endif + void *b = jl_malloc_aligned(sz, align); + if (b != NULL) { + memcpy(b, d, oldsz > sz ? sz : oldsz); + free(d); } - return res; + return b; } inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT { - mmtk_free_aligned(p); + free(p); } +#endif - -// finalizers // --- JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) @@ -195,15 +220,14 @@ void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT if (a->flags.how == 2) { char *d = (char*)a->data - a->offset*a->elsize; if (a->flags.isaligned) - mmtk_free_aligned(d); + jl_free_aligned(d); else - mmtk_free(d); + free(d); gc_num.freed += jl_array_nbytes(a); gc_num.freecall++; } } - // roots // --- @@ -384,11 +408,7 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; malloc_maybe_collect(ptls, sz); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); - jl_atomic_store_relaxed(&ptls->gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - return mmtk_counted_malloc(sz); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz); } return malloc(sz); } @@ -399,12 +419,8 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_task_t *ct = jl_current_task; if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; - malloc_maybe_collect(ptls, sz); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); - jl_atomic_store_relaxed(&ptls->gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - return mmtk_counted_calloc(nm, sz); + malloc_maybe_collect(ptls, nm * sz); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz); } return calloc(nm, sz); } @@ -413,16 +429,10 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); jl_task_t *ct = jl_current_task; + free(p); if (pgcstack && ct->world_age) { - jl_ptls_t ptls = ct->ptls; - jl_atomic_store_relaxed(&ptls->gc_num.freed, - jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); - jl_atomic_store_relaxed(&ptls->gc_num.freecall, - jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); - mmtk_free_with_size(p, sz); - return; + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, -sz); } - free(p); } JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) @@ -433,16 +443,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_ptls_t ptls = ct->ptls; malloc_maybe_collect(ptls, sz); if (sz < old) - jl_atomic_store_relaxed(&ptls->gc_num.freed, - jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz)); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, old - sz); else - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); - jl_atomic_store_relaxed(&ptls->gc_num.realloc, - jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); - return mmtk_realloc_with_old_size(p, sz, old); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz - old); } - // TODO: correct? return realloc(p, sz); } From f690aa3a5621bfa1d6a07f911818f203d3f8d650 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 25 Jul 2023 10:39:15 +1200 Subject: [PATCH 022/116] Use Julia's finalizer implementation (#22) This PR moves code about registering and running finalizers to `gc-common`. --- src/gc-common.c | 264 ++++++++++++++++++++++++++++++++++++++++++++++++ src/gc.c | 258 +--------------------------------------------- src/gc.h | 1 + src/mmtk-gc.c | 54 ++-------- 4 files changed, 274 insertions(+), 303 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index cfb83c08a7a6b..3eacc2b2fd92d 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -49,6 +49,14 @@ memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024; // finalizers // --- uint64_t finalizer_rngState[JL_RNG_SIZE]; +jl_mutex_t finalizers_lock; +// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers. +// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer. +// If an object pointer has the second lowest bit set, the current pointer is a c object pointer. +// It must be aligned at least 4, and it finalized immediately (at "quiescence"). +// `to_finalize` should not have tagged pointers. +arraylist_t finalizer_list_marked; +arraylist_t to_finalize; void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT; @@ -57,6 +65,25 @@ JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void) jl_rng_split(finalizer_rngState, jl_current_task->rngState); } +// The first two entries are assumed to be empty and the rest are assumed to +// be pointers to `jl_value_t` objects +STATIC_INLINE void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT +{ + void **items = list->items; + items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2); + items[1] = ct->gcstack; + ct->gcstack = (jl_gcframe_t*)items; +} + +STATIC_INLINE void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT +{ + arraylist_push(&to_finalize, o); + arraylist_push(&to_finalize, f); + // doesn't need release, since we'll keep checking (on the reader) until we see the work and + // release our lock, and that will have a release barrier by then + jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1); +} + void run_finalizer(jl_task_t *ct, void *o, void *ff) { int ptr_finalizer = gc_ptr_tag(o, 1); @@ -79,6 +106,243 @@ void run_finalizer(jl_task_t *ct, void *o, void *ff) } } +void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT +{ + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); + arraylist_t *a = &ptls->finalizers; + // This acquire load and the release store at the end are used to + // synchronize with `finalize_object` on another thread. Apart from the GC, + // which is blocked by entering a unsafe region, there might be only + // one other thread accessing our list in `finalize_object` + // (only one thread since it needs to acquire the finalizer lock). + // Similar to `finalize_object`, all content mutation has to be done + // between the acquire and the release of the length. + size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len); + if (__unlikely(oldlen + 2 > a->max)) { + JL_LOCK_NOGC(&finalizers_lock); + // `a->len` might have been modified. + // Another possibility is to always grow the array to `oldlen + 2` but + // it's simpler this way and uses slightly less memory =) + oldlen = a->len; + arraylist_grow(a, 2); + a->len = oldlen; + JL_UNLOCK_NOGC(&finalizers_lock); + } + void **items = a->items; + items[oldlen] = v; + items[oldlen + 1] = f; + jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2); +} + +// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock +// to be hold for the current thread and will release the lock when the +// function returns. +void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE +{ + // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring + // of `finalizer`) in a finalizer: + uint8_t sticky = ct->sticky; + // empty out the first two entries for the GC frame + arraylist_push(list, list->items[0]); + arraylist_push(list, list->items[1]); + jl_gc_push_arraylist(ct, list); + void **items = list->items; + size_t len = list->len; + JL_UNLOCK_NOGC(&finalizers_lock); + // run finalizers in reverse order they were added, so lower-level finalizers run last + for (size_t i = len-4; i >= 2; i -= 2) + run_finalizer(ct, items[i], items[i + 1]); + // first entries were moved last to make room for GC frame metadata + run_finalizer(ct, items[len-2], items[len-1]); + // matches the jl_gc_push_arraylist above + JL_GC_POP(); + ct->sticky = sticky; +} + +void run_finalizers(jl_task_t *ct) +{ + // Racy fast path: + // The race here should be OK since the race can only happen if + // another thread is writing to it with the lock held. In such case, + // we don't need to run pending finalizers since the writer thread + // will flush it. + if (to_finalize.len == 0) + return; + JL_LOCK_NOGC(&finalizers_lock); + if (to_finalize.len == 0) { + JL_UNLOCK_NOGC(&finalizers_lock); + return; + } + arraylist_t copied_list; + memcpy(&copied_list, &to_finalize, sizeof(copied_list)); + if (to_finalize.items == to_finalize._space) { + copied_list.items = copied_list._space; + } + jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0); + arraylist_new(&to_finalize, 0); + + uint64_t save_rngState[JL_RNG_SIZE]; + memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState)); + jl_rng_split(ct->rngState, finalizer_rngState); + + // This releases the finalizers lock. + int8_t was_in_finalizer = ct->ptls->in_finalizer; + ct->ptls->in_finalizer = 1; + jl_gc_run_finalizers_in_list(ct, &copied_list); + ct->ptls->in_finalizer = was_in_finalizer; + arraylist_free(&copied_list); + + memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState)); +} + +// if `need_sync` is true, the `list` is the `finalizers` list of another +// thread and we need additional synchronizations +void finalize_object(arraylist_t *list, jl_value_t *o, + arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT +{ + // The acquire load makes sure that the first `len` objects are valid. + // If `need_sync` is true, all mutations of the content should be limited + // to the first `oldlen` elements and no mutation is allowed after the + // new length is published with the `cmpxchg` at the end of the function. + // This way, the mutation should not conflict with the owning thread, + // which only writes to locations later than `len` + // and will not resize the buffer without acquiring the lock. + size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len; + size_t oldlen = len; + void **items = list->items; + size_t j = 0; + for (size_t i = 0; i < len; i += 2) { + void *v = items[i]; + int move = 0; + if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) { + void *f = items[i + 1]; + move = 1; + arraylist_push(copied_list, v); + arraylist_push(copied_list, f); + } + if (move || __unlikely(!v)) { + // remove item + } + else { + if (j < i) { + items[j] = items[i]; + items[j+1] = items[i+1]; + } + j += 2; + } + } + len = j; + if (oldlen == len) + return; + if (need_sync) { + // The memset needs to be unconditional since the thread might have + // already read the length. + // The `memset` (like any other content mutation) has to be done + // **before** the `cmpxchg` which publishes the length. + memset(&items[len], 0, (oldlen - len) * sizeof(void*)); + jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len); + } + else { + list->len = len; + } +} + +JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT +{ + jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); +} + +// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) +JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT +{ + assert(!gc_ptr_tag(v, 3)); + jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f); +} + +JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT +{ + if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) { + jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f)); + } + else { + jl_gc_add_finalizer_(ptls, v, f); + } +} + +JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) +{ + if (ct == NULL) + ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) { + run_finalizers(ct); + } +} + +JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) +{ + JL_LOCK_NOGC(&finalizers_lock); + // Copy the finalizers into a temporary list so that code in the finalizer + // won't change the list as we loop through them. + // This list is also used as the GC frame when we are running the finalizers + arraylist_t copied_list; + arraylist_new(&copied_list, 0); + // No need to check the to_finalize list since the user is apparently + // still holding a reference to the object + int gc_n_threads; + jl_ptls_t* gc_all_tls_states; + gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); + gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + if (ptls2 != NULL) + finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i); + } + finalize_object(&finalizer_list_marked, o, &copied_list, 0); + gc_n_threads = 0; + gc_all_tls_states = NULL; + if (copied_list.len > 0) { + // This releases the finalizers lock. + jl_gc_run_finalizers_in_list(ct, &copied_list); + } + else { + JL_UNLOCK_NOGC(&finalizers_lock); + } + arraylist_free(&copied_list); +} + +void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT +{ + void **items = flist->items; + size_t len = flist->len; + for(size_t i = 0; i < len; i+=2) { + void *v = items[i]; + void *f = items[i + 1]; + if (__unlikely(!v)) + continue; + schedule_finalization(v, f); + } + flist->len = 0; +} + +void jl_gc_run_all_finalizers(jl_task_t *ct) +{ + if (!ct) return; + int gc_n_threads; + jl_ptls_t* gc_all_tls_states; + gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); + gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + schedule_all_finalizers(&finalizer_list_marked); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + if (ptls2 != NULL) + schedule_all_finalizers(&ptls2->finalizers); + } + gc_n_threads = 0; + gc_all_tls_states = NULL; + run_finalizers(ct); +} + JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls) { if (ptls == NULL) diff --git a/src/gc.c b/src/gc.c index 90eae32f0affc..08741df919dfa 100644 --- a/src/gc.c +++ b/src/gc.c @@ -290,7 +290,6 @@ void jl_gc_notify_image_alloc(char* img_data, size_t len) // For accessing `ptls->finalizers`, the lock is needed if a thread // is going to realloc the buffer (of its own list) or accessing the // list of another thread -static jl_mutex_t finalizers_lock; static uv_mutex_t gc_cache_lock; // Flag that tells us whether we need to support conservative marking @@ -335,14 +334,6 @@ pagetable_t memory_map; bigval_t *big_objects_marked = NULL; // -- Finalization -- -// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers. -// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer. -// If an object pointer has the second lowest bit set, the current pointer is a c object pointer. -// It must be aligned at least 4, and it finalized immediately (at "quiescence"). -// `to_finalize` should not have tagged pointers. -arraylist_t finalizer_list_marked; -arraylist_t to_finalize; - NOINLINE uintptr_t gc_get_stack_ptr(void) { @@ -425,7 +416,7 @@ inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT } #endif -static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT +STATIC_INLINE void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT { arraylist_push(&to_finalize, o); arraylist_push(&to_finalize, f); @@ -434,253 +425,6 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1); } -// if `need_sync` is true, the `list` is the `finalizers` list of another -// thread and we need additional synchronizations -static void finalize_object(arraylist_t *list, jl_value_t *o, - arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT -{ - // The acquire load makes sure that the first `len` objects are valid. - // If `need_sync` is true, all mutations of the content should be limited - // to the first `oldlen` elements and no mutation is allowed after the - // new length is published with the `cmpxchg` at the end of the function. - // This way, the mutation should not conflict with the owning thread, - // which only writes to locations later than `len` - // and will not resize the buffer without acquiring the lock. - size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len; - size_t oldlen = len; - void **items = list->items; - size_t j = 0; - for (size_t i = 0; i < len; i += 2) { - void *v = items[i]; - int move = 0; - if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) { - void *f = items[i + 1]; - move = 1; - arraylist_push(copied_list, v); - arraylist_push(copied_list, f); - } - if (move || __unlikely(!v)) { - // remove item - } - else { - if (j < i) { - items[j] = items[i]; - items[j+1] = items[i+1]; - } - j += 2; - } - } - len = j; - if (oldlen == len) - return; - if (need_sync) { - // The memset needs to be unconditional since the thread might have - // already read the length. - // The `memset` (like any other content mutation) has to be done - // **before** the `cmpxchg` which publishes the length. - memset(&items[len], 0, (oldlen - len) * sizeof(void*)); - jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len); - } - else { - list->len = len; - } -} - -// The first two entries are assumed to be empty and the rest are assumed to -// be pointers to `jl_value_t` objects -static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT -{ - void **items = list->items; - items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2); - items[1] = ct->gcstack; - ct->gcstack = (jl_gcframe_t*)items; -} - -// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock -// to be hold for the current thread and will release the lock when the -// function returns. -static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE -{ - // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring - // of `finalizer`) in a finalizer: - uint8_t sticky = ct->sticky; - // empty out the first two entries for the GC frame - arraylist_push(list, list->items[0]); - arraylist_push(list, list->items[1]); - jl_gc_push_arraylist(ct, list); - void **items = list->items; - size_t len = list->len; - JL_UNLOCK_NOGC(&finalizers_lock); - // run finalizers in reverse order they were added, so lower-level finalizers run last - for (size_t i = len-4; i >= 2; i -= 2) - run_finalizer(ct, items[i], items[i + 1]); - // first entries were moved last to make room for GC frame metadata - run_finalizer(ct, items[len-2], items[len-1]); - // matches the jl_gc_push_arraylist above - JL_GC_POP(); - ct->sticky = sticky; -} - -static void run_finalizers(jl_task_t *ct) -{ - // Racy fast path: - // The race here should be OK since the race can only happen if - // another thread is writing to it with the lock held. In such case, - // we don't need to run pending finalizers since the writer thread - // will flush it. - if (to_finalize.len == 0) - return; - JL_LOCK_NOGC(&finalizers_lock); - if (to_finalize.len == 0) { - JL_UNLOCK_NOGC(&finalizers_lock); - return; - } - arraylist_t copied_list; - memcpy(&copied_list, &to_finalize, sizeof(copied_list)); - if (to_finalize.items == to_finalize._space) { - copied_list.items = copied_list._space; - } - jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0); - arraylist_new(&to_finalize, 0); - - uint64_t save_rngState[JL_RNG_SIZE]; - memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState)); - jl_rng_split(ct->rngState, finalizer_rngState); - - // This releases the finalizers lock. - int8_t was_in_finalizer = ct->ptls->in_finalizer; - ct->ptls->in_finalizer = 1; - jl_gc_run_finalizers_in_list(ct, &copied_list); - ct->ptls->in_finalizer = was_in_finalizer; - arraylist_free(&copied_list); - - memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState)); -} - -JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) -{ - if (ct == NULL) - ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) { - run_finalizers(ct); - } -} - -static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT -{ - void **items = flist->items; - size_t len = flist->len; - for(size_t i = 0; i < len; i+=2) { - void *v = items[i]; - void *f = items[i + 1]; - if (__unlikely(!v)) - continue; - schedule_finalization(v, f); - } - flist->len = 0; -} - -void jl_gc_run_all_finalizers(jl_task_t *ct) -{ - if (!ct) return; - int gc_n_threads; - jl_ptls_t* gc_all_tls_states; - gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); - gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); - schedule_all_finalizers(&finalizer_list_marked); - for (int i = 0; i < gc_n_threads; i++) { - jl_ptls_t ptls2 = gc_all_tls_states[i]; - if (ptls2 != NULL) - schedule_all_finalizers(&ptls2->finalizers); - } - gc_n_threads = 0; - gc_all_tls_states = NULL; - run_finalizers(ct); -} - -void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT -{ - assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); - arraylist_t *a = &ptls->finalizers; - // This acquire load and the release store at the end are used to - // synchronize with `finalize_object` on another thread. Apart from the GC, - // which is blocked by entering a unsafe region, there might be only - // one other thread accessing our list in `finalize_object` - // (only one thread since it needs to acquire the finalizer lock). - // Similar to `finalize_object`, all content mutation has to be done - // between the acquire and the release of the length. - size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len); - if (__unlikely(oldlen + 2 > a->max)) { - JL_LOCK_NOGC(&finalizers_lock); - // `a->len` might have been modified. - // Another possibility is to always grow the array to `oldlen + 2` but - // it's simpler this way and uses slightly less memory =) - oldlen = a->len; - arraylist_grow(a, 2); - a->len = oldlen; - JL_UNLOCK_NOGC(&finalizers_lock); - } - void **items = a->items; - items[oldlen] = v; - items[oldlen + 1] = f; - jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2); -} - -JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT -{ - jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); -} - -// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) -JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT -{ - assert(!gc_ptr_tag(v, 3)); - jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f); -} - -JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT -{ - if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) { - jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f)); - } - else { - jl_gc_add_finalizer_(ptls, v, f); - } -} - -JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) -{ - JL_LOCK_NOGC(&finalizers_lock); - // Copy the finalizers into a temporary list so that code in the finalizer - // won't change the list as we loop through them. - // This list is also used as the GC frame when we are running the finalizers - arraylist_t copied_list; - arraylist_new(&copied_list, 0); - // No need to check the to_finalize list since the user is apparently - // still holding a reference to the object - int gc_n_threads; - jl_ptls_t* gc_all_tls_states; - gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); - gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); - for (int i = 0; i < gc_n_threads; i++) { - jl_ptls_t ptls2 = gc_all_tls_states[i]; - if (ptls2 != NULL) - finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i); - } - finalize_object(&finalizer_list_marked, o, &copied_list, 0); - gc_n_threads = 0; - gc_all_tls_states = NULL; - if (copied_list.len > 0) { - // This releases the finalizers lock. - jl_gc_run_finalizers_in_list(ct, &copied_list); - } - else { - JL_UNLOCK_NOGC(&finalizers_lock); - } - arraylist_free(&copied_list); -} - // explicitly scheduled objects for the sweepfunc callback static void gc_sweep_foreign_objs_in_list(arraylist_t *objs) { diff --git a/src/gc.h b/src/gc.h index 6c689c4d5478e..9fa780c24c30f 100644 --- a/src/gc.h +++ b/src/gc.h @@ -382,6 +382,7 @@ extern pagetable_t memory_map; extern bigval_t *big_objects_marked; extern arraylist_t finalizer_list_marked; extern arraylist_t to_finalize; +extern jl_mutex_t finalizers_lock; extern int64_t lazy_freed_pages; STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index fa9c4acd0aa9f..86c6fd17eb571 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -103,52 +103,6 @@ inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT } #endif -// --- - -JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) -{ - if (ct == NULL) - ct = jl_current_task; - mmtk_jl_run_pending_finalizers(ct->ptls); -} - -JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT -{ - mmtk_register_finalizer(v, f, 1); -} - -// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) -JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT -{ - /* TODO: unsupported? */ -} - -JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT -{ - if (__unlikely(jl_typeis(f, jl_voidpointer_type))) { - jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f)); - } - else { - mmtk_register_finalizer(v, f, 0); - } -} - -JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) -{ - mmtk_run_finalizers_for_obj(o); -} - -void jl_gc_run_all_finalizers(jl_task_t *ct) -{ - mmtk_jl_gc_run_all_finalizers(); -} - -void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT -{ - mmtk_register_finalizer(v, f, 0); -} - - // weak references // --- JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) @@ -323,6 +277,10 @@ void jl_deinit_thread_heap(jl_ptls_t ptls) mmtk_destroy_mutator(&ptls->mmtk_mutator); } +extern jl_mutex_t finalizers_lock; +extern arraylist_t to_finalize; +extern arraylist_t finalizer_list_marked; + // System-wide initialization // TODO: remove locks? remove anything else? void jl_gc_init(void) @@ -331,8 +289,12 @@ void jl_gc_init(void) jl_gc_set_max_memory(jl_options.heap_size_hint); JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock"); + JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); uv_mutex_init(&gc_perm_lock); + arraylist_new(&to_finalize, 0); + arraylist_new(&finalizer_list_marked, 0); + gc_num.interval = default_collect_interval; last_long_collect_interval = default_collect_interval; gc_num.allocd = 0; From f41239c9d91dd7bc84e3735ceae0c3fbdeaac2a1 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 25 Jul 2023 23:29:11 +0000 Subject: [PATCH 023/116] Refactor the code for scanning, getting object size in Rust, and removing scan_obj_c option --- src/mmtk-gc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 86c6fd17eb571..3e2dd17fc7447 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -355,9 +355,9 @@ void jl_gc_init(void) // if only max size is specified initialize MMTk with a fixed size heap if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { - mmtk_gc_init(0, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + mmtk_gc_init(0, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); } else { - mmtk_gc_init(min_heap_size, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + mmtk_gc_init(min_heap_size, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); } } From ed30d1c925865dd5c4ca2482701e20d6bc4700ec Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 26 Jul 2023 11:34:46 +0000 Subject: [PATCH 024/116] Adding check for COPY_STACKS flag and julia_copy_stacks feature --- src/mmtk-gc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 3e2dd17fc7447..10336d3f7d1db 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -353,6 +353,17 @@ void jl_gc_init(void) // If the two values are the same, we can use either. Otherwise, we need to be careful. assert(jl_n_gcthreads == jl_options.ngcthreads); + // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined + int copy_stacks; + +#ifdef COPY_STACKS + copy_stacks = 1; +#else + copy_stacks = 0; +#endif + + mmtk_julia_copy_stack_check(copy_stacks); + // if only max size is specified initialize MMTk with a fixed size heap if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { mmtk_gc_init(0, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); From 73411572e118d7cfd0110da46663b34cb82eb520 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 27 Jul 2023 12:14:07 +1200 Subject: [PATCH 025/116] Inline runtime alloc (#23) This PR: * makes MMTk's plan as a compiler option when building Julia * implements MMTk allocation fastpath for runtime allocations. --- Make.inc | 8 ++++++ src/julia.h | 59 ++++++++++++++++++++++++++++++++++++++++++-- src/julia_internal.h | 2 +- src/mmtk-gc.c | 2 +- src/symbol.c | 2 +- 5 files changed, 68 insertions(+), 5 deletions(-) diff --git a/Make.inc b/Make.inc index 6920fc64ecf70..bef6d1747b7d7 100644 --- a/Make.inc +++ b/Make.inc @@ -747,6 +747,14 @@ else MMTK_BUILD = release endif endif +ifeq (${MMTK_PLAN},Immix) +JCXXFLAGS += -DMMTK_PLAN_IMMIX +JCFLAGS += -DMMTK_PLAN_IMMIX +endif +ifeq (${MMTK_PLAN},StickyImmix) +JCXXFLAGS += -DMMTK_PLAN_STICKYIMMIX +JCFLAGS += -DMMTK_PLAN_STICKYIMMIX +endif MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk MMTK_API_INC = $(MMTK_DIR)/api MMTK_JULIA_INC = ${MMTK_JULIA_DIR}/julia diff --git a/src/julia.h b/src/julia.h index 44650a7d6ed0a..77a95bf625b80 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2382,10 +2382,24 @@ extern JL_DLLEXPORT int jl_default_debug_info_kind; #ifdef MMTK_GC extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); -extern const uint8_t MMTK_NEEDS_WRITE_BARRIER; -extern const uint8_t MMTK_OBJECT_BARRIER; +extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator); + extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; +// These need to be constants. + +#define MMTK_OBJECT_BARRIER (1) +// Stickyimmix needs write barrier. Immix does not need write barrier. +#ifdef MMTK_PLAN_IMMIX +#define MMTK_NEEDS_WRITE_BARRIER (0) +#endif +#ifdef MMTK_PLAN_STICKYIMMIX +#define MMTK_NEEDS_WRITE_BARRIER (1) +#endif + +#define MMTK_DEFAULT_IMMIX_ALLOCATOR (0) +#define MMTK_IMMORTAL_BUMP_ALLOCATOR (0) + // Directly call into MMTk for write barrier (debugging only) STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT { @@ -2422,6 +2436,47 @@ STATIC_INLINE size_t mmtk_align_alloc_sz(size_t sz) JL_NOTSAFEPOINT return (sz + MMTK_MIN_ALIGNMENT - 1) & ~(MMTK_MIN_ALIGNMENT - 1); } +STATIC_INLINE void* bump_alloc_fast(MMTkMutatorContext* mutator, uintptr_t* cursor, uintptr_t limit, size_t size, size_t align, size_t offset, int allocator) { + intptr_t delta = (-offset - *cursor) & (align - 1); + uintptr_t result = *cursor + (uintptr_t)delta; + + if (__unlikely(result + size > limit)) { + return (void*) mmtk_alloc(mutator, size, align, offset, allocator); + } else{ + *cursor = result + size; + return (void*)result; + } +} + +STATIC_INLINE void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { + ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR]; + return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0); +} + +STATIC_INLINE void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { + // We do not need post alloc for immix objects in immix/stickyimmix +} + +STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { + BumpAllocator* allocator = &mutator->allocators.bump_pointer[MMTK_IMMORTAL_BUMP_ALLOCATOR]; + return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1); +} + +STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + intptr_t addr = (intptr_t) obj; + uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); + intptr_t shift = (addr >> 3) & 0b111; + while(1) { + uint8_t old_val = *meta_addr; + uint8_t new_val = old_val | (1 << shift); + if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) { + break; + } + } + } +} + #endif #ifdef __cplusplus diff --git a/src/julia_internal.h b/src/julia_internal.h index 76ed8f977dc7a..cbd0bf7750251 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -550,7 +550,7 @@ STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT o->header = tag | GC_OLD_MARKED; #ifdef MMTK_GC jl_ptls_t ptls = jl_current_task->ptls; - mmtk_post_alloc(&ptls->mmtk_mutator, jl_valueof(o), allocsz, 1); + mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(o), allocsz); #endif return jl_valueof(o); } diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 86c6fd17eb571..f45aa14692ad4 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -533,7 +533,7 @@ void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offs { jl_ptls_t ptls = jl_current_task->ptls; size_t allocsz = mmtk_align_alloc_sz(sz); - void* addr = mmtk_alloc(&ptls->mmtk_mutator, allocsz, align, offset, 1); + void* addr = mmtk_immortal_alloc_fast(&ptls->mmtk_mutator, allocsz, align, offset); return addr; } diff --git a/src/symbol.c b/src/symbol.c index f1cd18cfb84cc..b745adbfba80c 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -41,7 +41,7 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED); #ifdef MMTK_GC jl_ptls_t ptls = jl_current_task->ptls; - mmtk_post_alloc(&ptls->mmtk_mutator, jl_valueof(tag), nb, 1); + mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(tag), nb); #endif jl_atomic_store_relaxed(&sym->left, NULL); jl_atomic_store_relaxed(&sym->right, NULL); From ae2fa58013b841600cc416b65056813054dce3d5 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 3 Aug 2023 09:44:13 +0000 Subject: [PATCH 026/116] Supporting moving immix (wip) --- src/builtins.c | 12 +++++++++++- src/datatype.c | 2 ++ src/interpreter.c | 2 ++ src/ircode.c | 5 ++++- src/julia.h | 13 +++++++++++++ src/julia_internal.h | 4 +++- src/runtime_ccall.cpp | 2 ++ src/staticdata.c | 9 +++++++++ src/staticdata_utils.c | 9 +++++++++ src/toplevel.c | 2 ++ 10 files changed, 57 insertions(+), 3 deletions(-) diff --git a/src/builtins.c b/src/builtins.c index a6c904c851c95..f3fa4248b3fa4 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -344,6 +344,9 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN i++; pe = pe->prev; } + if(mmtk_object_is_managed_by_mmtk(v)) { + mmtk_pin_object(v); + } return inthash((uintptr_t)v); } if (tv == jl_uniontype_type) { @@ -392,6 +395,9 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT return ~h; size_t f, nf = jl_datatype_nfields(dt); if (nf == 0 || (!dt->layout->haspadding && dt->layout->npointers == 0)) { + if(mmtk_object_is_managed_by_mmtk(v)) { + mmtk_pin_object(v); + } // operate element-wise if there are unused bits inside, // otherwise just take the whole data block at once // a few select pointers (notably symbol) also have special hash values @@ -452,8 +458,12 @@ static uintptr_t NOINLINE jl_object_id__cold(jl_datatype_t *dt, jl_value_t *v) J jl_module_t *m = (jl_module_t*)v; return m->hash; } - if (dt->name->mutabl) + if (dt->name->mutabl) { + if(mmtk_object_is_managed_by_mmtk(v)) { + mmtk_pin_object(v); + } return inthash((uintptr_t)v); + } return immut_id_(dt, v, dt->hash); } diff --git a/src/datatype.c b/src/datatype.c index 95c3b11c9abdc..20c3af1555675 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -65,6 +65,7 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu jl_typename_t *tn = (jl_typename_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typename_t), jl_typename_type); + mmtk_pin_object(tn); tn->name = name; tn->module = module; tn->wrapper = NULL; @@ -96,6 +97,7 @@ jl_datatype_t *jl_new_uninitialized_datatype(void) { jl_task_t *ct = jl_current_task; jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type); + mmtk_pin_object(t); jl_set_typetagof(t, jl_datatype_tag, 0); t->hash = 0; t->hasfreetypevars = 0; diff --git a/src/interpreter.c b/src/interpreter.c index c08496f72ce04..cdc2a5a96beef 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -748,6 +748,7 @@ jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src); JL_GC_PUSHFRAME(s, s->locals, nroots); jl_array_t *stmts = src->code; + JL_GC_PUSH1(&stmts); assert(jl_typetagis(stmts, jl_array_any_type)); s->src = src; s->module = m; @@ -760,6 +761,7 @@ jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t jl_value_t *r = eval_body(stmts, s, 0, 1); ct->world_age = last_age; JL_GC_POP(); + JL_GC_POP(); return r; } diff --git a/src/ircode.c b/src/ircode.c index 4121d6691aa5b..69a5ed9a05a80 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -1162,12 +1162,15 @@ void jl_init_serializer(void) assert(LAST_TAG+1+i < 256); for (i = 2; i < 256; i++) { - if (deser_tag[i]) + if (deser_tag[i]) { + PTRHASH_PIN(deser_tag[i]) ptrhash_put(&ser_tag, deser_tag[i], (void*)i); + } } i = 2; while (common_symbols[i-2] != NULL) { + PTRHASH_PIN(common_symbols[i-2]) ptrhash_put(&common_symbol_tag, common_symbols[i-2], (void*)i); deser_symbols[i] = (jl_value_t*)common_symbols[i-2]; i += 1; diff --git a/src/julia.h b/src/julia.h index 77a95bf625b80..9d3e177544af4 100644 --- a/src/julia.h +++ b/src/julia.h @@ -3,6 +3,19 @@ #ifndef JULIA_H #define JULIA_H +#ifdef __cplusplus +extern "C" { +#endif + +extern int mmtk_object_is_managed_by_mmtk(void* addr); +extern unsigned char mmtk_pin_object(void* obj); +#define PTRHASH_PIN(key) \ + mmtk_pin_object(key); \ + +#ifdef __cplusplus +} +#endif + #if defined(JL_LIBRARY_EXPORTS_INTERNAL) || defined(JL_LIBRARY_EXPORTS_CODEGEN) #define JL_LIBRARY_EXPORTS #endif diff --git a/src/julia_internal.h b/src/julia_internal.h index cbd0bf7750251..c3588b6518816 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -535,7 +535,9 @@ JL_DLLEXPORT uintptr_t jl_get_buff_tag(void); typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz) { - return jl_gc_alloc(ptls, sz, (void*)jl_buff_tag); + jl_gc_tracked_buffer_t *buf = jl_gc_alloc(ptls, sz, (void*)jl_buff_tag); + mmtk_pin_object(buf); + return buf; } STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp index 23793254c205d..cac9270c72556 100644 --- a/src/runtime_ccall.cpp +++ b/src/runtime_ccall.cpp @@ -352,6 +352,8 @@ jl_value_t *jl_get_cfunction_trampoline( tramp = trampoline_alloc(); ((void**)result)[0] = tramp; tramp = init_trampoline(tramp, nval); + PTRHASH_PIN((void*)fobj) + PTRHASH_PIN(result) ptrhash_put(cache, (void*)fobj, result); uv_mutex_unlock(&trampoline_lock); return result; diff --git a/src/staticdata.c b/src/staticdata.c index 49b97480b5165..fba106ad632d3 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -655,6 +655,8 @@ static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT { + PTRHASH_PIN((void*)addr) + PTRHASH_PIN((void*)newval) ptrhash_put(&field_replace, (void*)addr, newval); } @@ -2137,6 +2139,8 @@ static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == cache); cache = cache_rehash_set(cache, l); // redirect all references to the old cache to relocate to the new cache object + PTRHASH_PIN((void*)cache) + PTRHASH_PIN((void*)idx) ptrhash_put(&serialization_order, cache, idx); serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = cache; return cache; @@ -2387,6 +2391,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs)); uintptr_t i; for (i = 0; id_to_fptrs[i] != NULL; i++) { + PTRHASH_PIN((void*)(uintptr_t)id_to_fptrs[i]) ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2)); } htable_new(&serialization_order, 25000); @@ -2473,6 +2478,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, htable_new(&external_objects, NUM_TAGS); for (size_t i = 0; tags[i] != NULL; i++) { jl_value_t *tag = *tags[i]; + PTRHASH_PIN(tag) ptrhash_put(&external_objects, tag, tag); } // Queue the worklist itself as the first item we serialize @@ -3044,6 +3050,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl assert(tag == 0); arraylist_push(&delay_list, pfld); arraylist_push(&delay_list, obj); + PTRHASH_PIN(obj) ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid *pfld = (uintptr_t)NULL; continue; @@ -3077,6 +3084,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl } static_assert(offsetof(jl_datatype_t, name) == 0, ""); newdt->name = dt->name; + PTRHASH_PIN(newdt) + PTRHASH_PIN(dt) ptrhash_put(&new_dt_objs, (void*)newdt, dt); } else { diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index bf1a830b608de..bc6502c134672 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -272,6 +272,7 @@ static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_specializati assert(jl_is_code_instance(ci)); jl_method_t *m = ci->def->def.method; assert(jl_is_method(m)); + PTRHASH_PIN(m) ptrhash_put(&mset, (void*)m, (void*)m); } int nwithkey; @@ -434,6 +435,7 @@ static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets, jl_arra for (size_t i = 0; i < jl_array_len(external_cis); i++) { jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(external_cis, i); jl_method_instance_t *mi = ci->def; + PTRHASH_PIN(mi) ptrhash_put(&external_mis, (void*)mi, (void*)mi); } } @@ -469,6 +471,8 @@ static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets, jl_arra for (size_t i = 0; i < l / 2; i++) { jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, i * 2); void *target = (void*)((char*)HT_NOTFOUND + i + 1); + PTRHASH_PIN(caller) + PTRHASH_PIN(target) ptrhash_put(&edges_ids, (void*)caller, target); } // process target list to turn it into a memoized validity table @@ -545,6 +549,8 @@ static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets, jl_arra jl_array_ptr_1d_push(ext_targets, callee); jl_array_ptr_1d_push(ext_targets, matches); target = (void*)((char*)HT_NOTFOUND + jl_array_len(ext_targets) / 3); + PTRHASH_PIN(callee) + PTRHASH_PIN(target) ptrhash_put(&edges_map2, (void*)callee, target); } idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1; @@ -1090,6 +1096,8 @@ static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_a jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(ci_list, i); assert(ci->min_world == minworld); if (ci->max_world == 1) { // sentinel value: has edges to external callables + PTRHASH_PIN((void*)ci->def) + PTRHASH_PIN((void*)ci) ptrhash_put(&visited, (void*)ci->def, (void*)ci); } else { @@ -1155,6 +1163,7 @@ static void classify_callers(htable_t *callers_with_edges, jl_array_t *edges) size_t l = edges ? jl_array_len(edges) / 2 : 0; for (size_t i = 0; i < l; i++) { jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); + PTRHASH_PIN((void*)caller) ptrhash_put(callers_with_edges, (void*)caller, (void*)caller); } } diff --git a/src/toplevel.c b/src/toplevel.c index 200d0ad220231..8a72ce8e6c2e6 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -140,6 +140,8 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex jl_value_t *form = (jl_value_t*)newm; JL_GC_PUSH1(&form); JL_LOCK(&jl_modules_mutex); + PTRHASH_PIN(newm) + PTRHASH_PIN((void*)((uintptr_t)HT_NOTFOUND + 1)) ptrhash_put(&jl_current_modules, (void*)newm, (void*)((uintptr_t)HT_NOTFOUND + 1)); JL_UNLOCK(&jl_modules_mutex); From 2f21eecf4c9a9156022fb4ebd18e774a3a293c57 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 7 Aug 2023 01:06:17 +0000 Subject: [PATCH 027/116] Pushing current task into the stack to set as red root; pin string when realloc-ing if pinned; removing static from functions needed to sweep live_tasks array --- src/gc-stacks.c | 4 ++-- src/mmtk-gc.c | 3 +++ src/task.c | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/gc-stacks.c b/src/gc-stacks.c index b35c1722c82ff..9e8b71ac442fc 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -66,7 +66,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT return stk; } -static void free_stack(void *stkbuf, size_t bufsz) +void free_stack(void *stkbuf, size_t bufsz) { munmap(stkbuf, bufsz); jl_atomic_fetch_add(&num_stack_mappings, -1); @@ -104,7 +104,7 @@ static unsigned select_pool(size_t nb) JL_NOTSAFEPOINT } -static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) +void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) { #ifdef _COMPILER_ASAN_ENABLED_ __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz); diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index baa5d6642721d..d8bae88fc56a1 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -428,6 +428,9 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) size_t len = jl_string_len(s); jl_value_t *snew = jl_alloc_string(sz); memcpy(jl_string_data(snew), jl_string_data(s), sz <= len ? sz : len); + if(mmtk_is_pinned(s)) { + mmtk_pin_object(snew); + } return snew; } diff --git a/src/task.c b/src/task.c index 477ae481071a0..267f7448fa52a 100644 --- a/src/task.c +++ b/src/task.c @@ -1207,6 +1207,7 @@ CFI_NORETURN jl_task_t *ct = jl_get_current_task(); #else jl_task_t *ct = jl_current_task; + JL_GC_PUSH1(&ct); #endif jl_ptls_t ptls = ct->ptls; jl_value_t *res; @@ -1247,6 +1248,7 @@ skip_pop_exception:; ct->result = res; jl_gc_wb(ct, ct->result); jl_finish_task(ct); + JL_GC_POP(); jl_gc_debug_critical_error(); abort(); } From 39530f9e4c9ca2d66bf152d4d125e44a43c9caef Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 9 Aug 2023 01:15:56 +0000 Subject: [PATCH 028/116] Fixing issue that prevented building Julia or running tests with Julia's debug build --- Makefile | 2 +- src/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3e4dbef73bb52..cfa5af6052db9 100644 --- a/Makefile +++ b/Makefile @@ -119,7 +119,7 @@ check-whitespace: ifneq ($(NO_GIT), 1) @# Append the directory containing the julia we just built to the end of `PATH`, @# to give us the best chance of being able to run this check. - @PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" julia $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) + @PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" $(JULIA_EXECUTABLE) $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) else $(warn "Skipping whitespace check because git is unavailable") endif diff --git a/src/Makefile b/src/Makefile index ff5f4ce8b99d6..63654f35026e9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -176,7 +176,7 @@ DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) ifeq ($(WITH_MMTK), 1) MMTK_SRCS := mmtk_julia MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.o) $(MMTK_LIB_DST) -MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) +MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) $(MMTK_LIB_DST) else MMTK_OBJS := MMTK_DOBJS := From 5fe96d76427b48cc07df3165b339bc0db9522dd7 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 9 Aug 2023 01:15:56 +0000 Subject: [PATCH 029/116] Fixing issue that prevented building Julia or running tests with Julia's debug build --- Makefile | 2 +- src/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3e4dbef73bb52..cfa5af6052db9 100644 --- a/Makefile +++ b/Makefile @@ -119,7 +119,7 @@ check-whitespace: ifneq ($(NO_GIT), 1) @# Append the directory containing the julia we just built to the end of `PATH`, @# to give us the best chance of being able to run this check. - @PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" julia $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) + @PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" $(JULIA_EXECUTABLE) $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) else $(warn "Skipping whitespace check because git is unavailable") endif diff --git a/src/Makefile b/src/Makefile index ff5f4ce8b99d6..63654f35026e9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -176,7 +176,7 @@ DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) ifeq ($(WITH_MMTK), 1) MMTK_SRCS := mmtk_julia MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.o) $(MMTK_LIB_DST) -MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) +MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) $(MMTK_LIB_DST) else MMTK_OBJS := MMTK_DOBJS := From fdada6c65a0c53de447abb2f331dae081dcb77cf Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 21 Aug 2023 01:49:01 +0000 Subject: [PATCH 030/116] Fix some build issues --- src/mmtk-gc.c | 7 +++---- src/partr.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index e87e7f0e1449f..a390de3ddffd9 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -350,8 +350,6 @@ void jl_gc_init(void) max_heap_size = uv_get_free_memory() * 70 / 100; } - // If the two values are the same, we can use either. Otherwise, we need to be careful. - assert(jl_n_gcthreads == jl_options.ngcthreads); // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined int copy_stacks; @@ -366,11 +364,12 @@ void jl_gc_init(void) // if only max size is specified initialize MMTk with a fixed size heap // TODO: We just assume mark threads means GC threads, and ignore the number of concurrent sweep threads. + // If the two values are the same, we can use either. Otherwise, we need to be careful. uintptr_t gcthreads = jl_options.nmarkthreads; if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { - mmtk_gc_init(0, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); + mmtk_gc_init(0, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); } else { - mmtk_gc_init(min_heap_size, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); + mmtk_gc_init(min_heap_size, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); } } diff --git a/src/partr.c b/src/partr.c index bfdc4ed727973..37cf9ca310d24 100644 --- a/src/partr.c +++ b/src/partr.c @@ -168,6 +168,20 @@ void jl_gc_sweep_threadfun(void *arg) } } +#else + +// gc thread mark function +void jl_gc_mark_threadfun(void *arg) +{ + mmtk_unreachable(); +} + +// gc thread sweep function +void jl_gc_sweep_threadfun(void *arg) +{ + mmtk_unreachable(); +} + #endif // thread function: used by all mutator threads except the main thread From 7985bb2f37817a5ec29a34da7f2a140a5c9192d9 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 22 Aug 2023 01:40:26 +0000 Subject: [PATCH 031/116] Fixing issue when setting up the number of stock GC threads - it should be 0 when using MMTk --- src/mmtk-gc.c | 5 +++-- src/threading.c | 11 ++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index d8bae88fc56a1..db2ce338529b5 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -350,8 +350,9 @@ void jl_gc_init(void) max_heap_size = uv_get_free_memory() * 70 / 100; } - // If the two values are the same, we can use either. Otherwise, we need to be careful. - assert(jl_n_gcthreads == jl_options.ngcthreads); + // when using mmtk, we don't spawn any stock GC thread + // and mmtk should use jl_options.ngcthreads to set the number of workers + assert(jl_n_gcthreads == 0); // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined int copy_stacks; diff --git a/src/threading.c b/src/threading.c index d58528fa183be..ddb4850aa074c 100644 --- a/src/threading.c +++ b/src/threading.c @@ -667,6 +667,12 @@ void jl_init_threading(void) } } +#ifdef MMTK_GC + // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. + // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. + ngcthreads = 0; +#endif + jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); jl_n_threads_per_pool[0] = nthreadsi; @@ -684,11 +690,6 @@ void jl_start_threads(void) { int nthreads = jl_atomic_load_relaxed(&jl_n_threads); int ngcthreads = jl_n_gcthreads; -#ifdef MMTK_GC - // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. - // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. - ngcthreads = 0; -#endif int cpumasksize = uv_cpumask_size(); char *cp; int i, exclusive; From 27fc1013a130f7da2ae7f47b69763c4455bb405c Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 22 Aug 2023 02:26:48 +0000 Subject: [PATCH 032/116] Apply lock before schedule finalizers --- src/gc-common.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gc-common.c b/src/gc-common.c index 38f737ada576f..80365ec5e4a97 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -339,12 +339,18 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) jl_ptls_t* gc_all_tls_states; gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + // this is called from `jl_atexit_hook`; threads could still be running + // so we have to guard the finalizers' lists + JL_LOCK_NOGC(&finalizers_lock); schedule_all_finalizers(&finalizer_list_marked); for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) schedule_all_finalizers(&ptls2->finalizers); } + // this is called from `jl_atexit_hook`; threads could still be running + // so we have to guard the finalizers' lists + JL_LOCK_NOGC(&finalizers_lock); gc_n_threads = 0; gc_all_tls_states = NULL; run_finalizers(ct); From e591ad86d475323b2079fc71f99a74ba0750a0cc Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 22 Aug 2023 01:40:26 +0000 Subject: [PATCH 033/116] Fixing issue when setting up the number of stock GC threads - it should be 0 when using MMTk --- src/mmtk-gc.c | 3 +++ src/threading.c | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index a390de3ddffd9..8e87860c7b6ab 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -350,6 +350,9 @@ void jl_gc_init(void) max_heap_size = uv_get_free_memory() * 70 / 100; } + // when using mmtk, we don't spawn any stock GC thread + // and mmtk should use jl_options.ngcthreads to set the number of workers + assert(jl_n_gcthreads == 0); // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined int copy_stacks; diff --git a/src/threading.c b/src/threading.c index 78ecdcc98ae21..d1157a02dada0 100644 --- a/src/threading.c +++ b/src/threading.c @@ -694,6 +694,12 @@ void jl_init_threading(void) } int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads; +#ifdef MMTK_GC + // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. + // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. + ngcthreads = 0; +#endif + jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); jl_n_threads_per_pool[0] = nthreadsi; @@ -711,11 +717,6 @@ void jl_start_threads(void) { int nthreads = jl_atomic_load_relaxed(&jl_n_threads); int ngcthreads = jl_n_gcthreads; -#ifdef MMTK_GC - // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. - // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. - ngcthreads = 0; -#endif int cpumasksize = uv_cpumask_size(); char *cp; int i, exclusive; From 00bab46345dd2fc8dd73d9a94ff7aa57ddd90e62 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 9 Aug 2023 01:15:56 +0000 Subject: [PATCH 034/116] Fixing issue that prevented building Julia or running tests with Julia's debug build --- Makefile | 2 +- src/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 895dbe8100b82..d5cce165dc596 100644 --- a/Makefile +++ b/Makefile @@ -119,7 +119,7 @@ check-whitespace: ifneq ($(NO_GIT), 1) @# Append the directory containing the julia we just built to the end of `PATH`, @# to give us the best chance of being able to run this check. - @PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" julia $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) + @PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" $(JULIA_EXECUTABLE) $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) else $(warn "Skipping whitespace check because git is unavailable") endif diff --git a/src/Makefile b/src/Makefile index 41629e5a686bf..5ea0a3d5cf76b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -179,7 +179,7 @@ DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) ifeq ($(WITH_MMTK), 1) MMTK_SRCS := mmtk_julia MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.o) $(MMTK_LIB_DST) -MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) +MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) $(MMTK_LIB_DST) else MMTK_OBJS := MMTK_DOBJS := From 6d8df8f8c2fa721d4c1b3eef6b641b66969a6625 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 28 Aug 2023 00:12:58 +0000 Subject: [PATCH 035/116] Setting number of stock Julia mutators to 0 and fixing assertion --- src/mmtk-gc.c | 4 ++-- src/threading.c | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index baa5d6642721d..4e7a551dd8381 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -350,8 +350,8 @@ void jl_gc_init(void) max_heap_size = uv_get_free_memory() * 70 / 100; } - // If the two values are the same, we can use either. Otherwise, we need to be careful. - assert(jl_n_gcthreads == jl_options.ngcthreads); + // Assert that the number of stock GC threads is 0; MMTK uses the number of threads in jl_options.ngcthreads + assert(jl_n_gcthreads == 0); // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined int copy_stacks; diff --git a/src/threading.c b/src/threading.c index d58528fa183be..ddb4850aa074c 100644 --- a/src/threading.c +++ b/src/threading.c @@ -667,6 +667,12 @@ void jl_init_threading(void) } } +#ifdef MMTK_GC + // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. + // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. + ngcthreads = 0; +#endif + jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); jl_n_threads_per_pool[0] = nthreadsi; @@ -684,11 +690,6 @@ void jl_start_threads(void) { int nthreads = jl_atomic_load_relaxed(&jl_n_threads); int ngcthreads = jl_n_gcthreads; -#ifdef MMTK_GC - // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. - // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. - ngcthreads = 0; -#endif int cpumasksize = uv_cpumask_size(); char *cp; int i, exclusive; From 8eab37df935555155a919fcbf0a55b9b4a0fa9f0 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 28 Aug 2023 02:04:39 +0000 Subject: [PATCH 036/116] Add write barrier for excstack update --- src/julia_internal.h | 2 +- src/rtutils.c | 7 ++++--- src/task.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index ed8e40bca4b01..737553ec98845 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1264,7 +1264,7 @@ STATIC_INLINE size_t jl_excstack_next(jl_excstack_t *stack, size_t itr) JL_NOTSA return itr-2 - jl_excstack_bt_size(stack, itr); } // Exception stack manipulation -void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, +void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, jl_value_t *exception JL_ROOTED_ARGUMENT, jl_bt_element_t *bt_data, size_t bt_size); diff --git a/src/rtutils.c b/src/rtutils.c index 01ea11014a6db..7a31d37e4175c 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -320,7 +320,7 @@ static void jl_copy_excstack(jl_excstack_t *dest, jl_excstack_t *src) JL_NOTSAFE dest->top = src->top; } -static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT, +static void jl_reserve_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT, size_t reserved_size) { jl_excstack_t *s = *stack; @@ -334,13 +334,14 @@ static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT, if (s) jl_copy_excstack(new_s, s); *stack = new_s; + jl_gc_wb(task, new_s); } -void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, +void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, jl_value_t *exception JL_ROOTED_ARGUMENT, jl_bt_element_t *bt_data, size_t bt_size) { - jl_reserve_excstack(stack, (*stack ? (*stack)->top : 0) + bt_size + 2); + jl_reserve_excstack(task, stack, (*stack ? (*stack)->top : 0) + bt_size + 2); jl_excstack_t *s = *stack; jl_bt_element_t *rawstack = jl_excstack_raw(s); memcpy(rawstack + s->top, bt_data, sizeof(jl_bt_element_t)*bt_size); diff --git a/src/task.c b/src/task.c index 1dab8688cb079..73d9033f0cb50 100644 --- a/src/task.c +++ b/src/task.c @@ -721,7 +721,7 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct) /* The temporary ptls->bt_data is rooted by special purpose code in the\ GC. This exists only for the purpose of preserving bt_data until we \ set ptls->bt_size=0 below. */ \ - jl_push_excstack(&ct->excstack, exception, \ + jl_push_excstack(ct, &ct->excstack, exception, \ ptls->bt_data, ptls->bt_size); \ ptls->bt_size = 0; \ } \ @@ -1224,7 +1224,7 @@ CFI_NORETURN jl_timing_block_task_enter(ct, ptls, NULL); if (jl_atomic_load_relaxed(&ct->_isexception)) { record_backtrace(ptls, 0); - jl_push_excstack(&ct->excstack, ct->result, + jl_push_excstack(ct, &ct->excstack, ct->result, ptls->bt_data, ptls->bt_size); res = ct->result; } From d0cbd133727fb0135826ba09128f259aaf34d403 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 29 Aug 2023 02:30:35 +0000 Subject: [PATCH 037/116] Revert "Apply lock before schedule finalizers" This reverts commit 27fc1013a130f7da2ae7f47b69763c4455bb405c. --- src/gc-common.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 80365ec5e4a97..38f737ada576f 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -339,18 +339,12 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) jl_ptls_t* gc_all_tls_states; gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); - // this is called from `jl_atexit_hook`; threads could still be running - // so we have to guard the finalizers' lists - JL_LOCK_NOGC(&finalizers_lock); schedule_all_finalizers(&finalizer_list_marked); for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) schedule_all_finalizers(&ptls2->finalizers); } - // this is called from `jl_atexit_hook`; threads could still be running - // so we have to guard the finalizers' lists - JL_LOCK_NOGC(&finalizers_lock); gc_n_threads = 0; gc_all_tls_states = NULL; run_finalizers(ct); From 8d0d8b5db22469d2d3f4a0a65af635f67d7701e3 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 29 Aug 2023 05:51:04 +0000 Subject: [PATCH 038/116] Revert "Fixing issue when setting up the number of stock GC threads - it should be 0 when using MMTk" This reverts commit e591ad86d475323b2079fc71f99a74ba0750a0cc. --- src/mmtk-gc.c | 3 --- src/threading.c | 11 +++++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 8e87860c7b6ab..a390de3ddffd9 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -350,9 +350,6 @@ void jl_gc_init(void) max_heap_size = uv_get_free_memory() * 70 / 100; } - // when using mmtk, we don't spawn any stock GC thread - // and mmtk should use jl_options.ngcthreads to set the number of workers - assert(jl_n_gcthreads == 0); // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined int copy_stacks; diff --git a/src/threading.c b/src/threading.c index d1157a02dada0..78ecdcc98ae21 100644 --- a/src/threading.c +++ b/src/threading.c @@ -694,12 +694,6 @@ void jl_init_threading(void) } int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads; -#ifdef MMTK_GC - // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. - // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. - ngcthreads = 0; -#endif - jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); jl_n_threads_per_pool[0] = nthreadsi; @@ -717,6 +711,11 @@ void jl_start_threads(void) { int nthreads = jl_atomic_load_relaxed(&jl_n_threads); int ngcthreads = jl_n_gcthreads; +#ifdef MMTK_GC + // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. + // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. + ngcthreads = 0; +#endif int cpumasksize = uv_cpumask_size(); char *cp; int i, exclusive; From 66a49ccf864bded60b232140f57c69059a503f07 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 7 Sep 2023 01:31:12 +0000 Subject: [PATCH 039/116] Refactoring the code to reuse most of jl_gc_collect in block_for_gc --- src/gc-common.c | 25 +++++++++++++++++++++++++ src/gc.c | 25 ------------------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 38f737ada576f..0f6307c1db98f 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -601,6 +601,31 @@ JL_DLLEXPORT int jl_gc_enable(int on) return prev; } +void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads) +{ + JL_TIMING(GC, GC_Stop); +#ifdef USE_TRACY + TracyCZoneCtx ctx = JL_TIMING_DEFAULT_BLOCK->tracy_ctx; + TracyCZoneColor(ctx, 0x696969); +#endif + assert(gc_n_threads); + if (gc_n_threads > 1) + jl_wake_libuv(); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + if (ptls2 != NULL) { + // This acquire load pairs with the release stores + // in the signal handler of safepoint so we are sure that + // all the stores on those threads are visible. + // We're currently also using atomic store release in mutator threads + // (in jl_gc_state_set), but we may want to use signals to flush the + // memory operations on those threads lazily instead. + while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state)) + jl_cpu_pause(); // yield? + } + } +} + JL_DLLEXPORT int jl_gc_is_enabled(void) { jl_ptls_t ptls = jl_current_task->ptls; diff --git a/src/gc.c b/src/gc.c index 4846549af93e4..924cdce356a4b 100644 --- a/src/gc.c +++ b/src/gc.c @@ -344,31 +344,6 @@ NOINLINE uintptr_t gc_get_stack_ptr(void) #define should_timeout() 0 -void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads) -{ - JL_TIMING(GC, GC_Stop); -#ifdef USE_TRACY - TracyCZoneCtx ctx = JL_TIMING_DEFAULT_BLOCK->tracy_ctx; - TracyCZoneColor(ctx, 0x696969); -#endif - assert(gc_n_threads); - if (gc_n_threads > 1) - jl_wake_libuv(); - for (int i = 0; i < gc_n_threads; i++) { - jl_ptls_t ptls2 = gc_all_tls_states[i]; - if (ptls2 != NULL) { - // This acquire load pairs with the release stores - // in the signal handler of safepoint so we are sure that - // all the stores on those threads are visible. - // We're currently also using atomic store release in mutator threads - // (in jl_gc_state_set), but we may want to use signals to flush the - // memory operations on those threads lazily instead. - while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state)) - jl_cpu_pause(); // yield? - } - } -} - // malloc wrappers, aligned allocation #if defined(_OS_WINDOWS_) From f9da153bddb2b494feb3cd1840021ea574aa7f36 Mon Sep 17 00:00:00 2001 From: Luis Eduardo de Souza Amorim Date: Fri, 15 Sep 2023 06:49:02 +0000 Subject: [PATCH 040/116] Checking if object is managed by mmtk before calling pin function; Pinning owner to avoid introspecting it during scanning --- src/array.c | 9 +++++++++ src/julia.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/src/array.c b/src/array.c index 8a064583bbc9e..7c49889ee4662 100644 --- a/src/array.c +++ b/src/array.c @@ -239,6 +239,9 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, jl_array_t *owner = (jl_array_t*)jl_array_owner(data); jl_array_data_owner(a) = (jl_value_t*)owner; + if(mmtk_object_is_managed_by_mmtk(owner)) { + mmtk_pin_object(owner); + } a->flags.how = 3; a->data = data->data; a->flags.isshared = 1; @@ -287,6 +290,9 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str) a->flags.ptrarray = 0; a->flags.hasptr = 0; jl_array_data_owner(a) = str; + if(mmtk_object_is_managed_by_mmtk(str)) { + mmtk_pin_object(str); + } a->flags.how = 3; a->flags.isshared = 1; size_t l = jl_string_len(str); @@ -683,6 +689,9 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen) else { s = jl_gc_realloc_string(jl_array_data_owner(a), nbytes - (elsz == 1)); } + if(mmtk_object_is_managed_by_mmtk(s)) { + mmtk_pin_object(s); + } jl_array_data_owner(a) = s; jl_gc_wb(a, s); a->data = jl_string_data(s); diff --git a/src/julia.h b/src/julia.h index 87e965bdae621..337e5131eeee7 100644 --- a/src/julia.h +++ b/src/julia.h @@ -10,7 +10,9 @@ extern "C" { extern int mmtk_object_is_managed_by_mmtk(void* addr); extern unsigned char mmtk_pin_object(void* obj); #define PTRHASH_PIN(key) \ + if (mmtk_object_is_managed_by_mmtk(key)) { \ mmtk_pin_object(key); \ + } \ #ifdef __cplusplus } From cfb6d90e1fcba32a088f073ebf71ac6a98f2cf1d Mon Sep 17 00:00:00 2001 From: Luis Eduardo de Souza Amorim Date: Fri, 15 Sep 2023 06:54:26 +0000 Subject: [PATCH 041/116] Fixing duplicate code from merging mistake --- src/threading.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/threading.c b/src/threading.c index 8d13788fe657c..d1157a02dada0 100644 --- a/src/threading.c +++ b/src/threading.c @@ -694,12 +694,6 @@ void jl_init_threading(void) } int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads; -#ifdef MMTK_GC - // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. - // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. - ngcthreads = 0; -#endif - #ifdef MMTK_GC // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. From 6921f6c8934bf99beace5047fe73b1ff9772e9bd Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 5 Oct 2023 03:08:43 +0000 Subject: [PATCH 042/116] Stop using Julia's size classes --- src/array.c | 4 ++-- src/gc-common.c | 10 ---------- src/gc.c | 10 ++++++++++ src/julia_internal.h | 2 +- src/mmtk-gc.c | 11 +++++++++++ 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/array.c b/src/array.c index 8a064583bbc9e..73b6b04669978 100644 --- a/src/array.c +++ b/src/array.c @@ -497,8 +497,8 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); #else int pool_id = jl_gc_szclass_align8(allocsz); - int osize = jl_gc_sizeclasses[pool_id]; - s = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, jl_string_type); + // int osize = jl_gc_sizeclasses[pool_id]; + s = jl_mmtk_gc_alloc_default(ptls, pool_id, allocsz, jl_string_type); #endif } else { diff --git a/src/gc-common.c b/src/gc-common.c index 0f6307c1db98f..6f6c4c5330d74 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -450,16 +450,6 @@ jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize return jl_gc_pool_alloc_inner(ptls, pool_offset, osize); } -int jl_gc_classify_pools(size_t sz, int *osize) -{ - if (sz > GC_MAX_SZCLASS) - return -1; - size_t allocsz = sz + sizeof(jl_taggedvalue_t); - int klass = jl_gc_szclass(allocsz); - *osize = jl_gc_sizeclasses[klass]; - return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]); -} - // TODO: jl_gc_track_malloced_array needed? Eliminate heap.mallocarrays, // heap.mafreelist, mallocarray_t? void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT diff --git a/src/gc.c b/src/gc.c index 924cdce356a4b..5febd88d5f9b6 100644 --- a/src/gc.c +++ b/src/gc.c @@ -902,6 +902,16 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT return fl; } +int jl_gc_classify_pools(size_t sz, int *osize) +{ + if (sz > GC_MAX_SZCLASS) + return -1; + size_t allocsz = sz + sizeof(jl_taggedvalue_t); + int klass = jl_gc_szclass(allocsz); + *osize = jl_gc_sizeclasses[klass]; + return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]); +} + // Size includes the tag and the tag is not cleared!! inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize) diff --git a/src/julia_internal.h b/src/julia_internal.h index 737553ec98845..588ebf7504017 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -495,7 +495,7 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) if (sz <= GC_MAX_SZCLASS) { int pool_id = jl_gc_szclass(allocsz); int osize = jl_gc_sizeclasses[pool_id]; - v = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, ty); + v = jl_mmtk_gc_alloc_default(ptls, pool_id, allocsz, ty); } else { if (allocsz < sz) // overflow in adding offs, size was "negative" diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 1175c6a161750..b40e734d0a63f 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -53,6 +53,17 @@ static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) } } +// allocation +int jl_gc_classify_pools(size_t sz, int *osize) +{ + if (sz > GC_MAX_SZCLASS) + return -1; + size_t allocsz = sz + sizeof(jl_taggedvalue_t); + int klass = jl_gc_szclass(allocsz); + *osize = LLT_ALIGN(allocsz, 16); + return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]); +} + // malloc wrappers, aligned allocation // We currently just duplicate what Julia GC does. We will in the future replace the malloc calls with MMTK's malloc. From 29f59932c45cdd379189c85545fac6980126f60f Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 5 Oct 2023 03:46:32 +0000 Subject: [PATCH 043/116] Removing code related to size classes --- src/array.c | 3 +-- src/julia_internal.h | 6 ++---- src/mmtk-gc.c | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/array.c b/src/array.c index 73b6b04669978..a0166a6479798 100644 --- a/src/array.c +++ b/src/array.c @@ -497,8 +497,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); #else int pool_id = jl_gc_szclass_align8(allocsz); - // int osize = jl_gc_sizeclasses[pool_id]; - s = jl_mmtk_gc_alloc_default(ptls, pool_id, allocsz, jl_string_type); + s = jl_mmtk_gc_alloc_default(ptls, allocsz, jl_string_type); #endif } else { diff --git a/src/julia_internal.h b/src/julia_internal.h index 588ebf7504017..ee32dbe922caa 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -339,7 +339,7 @@ jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize); jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); #ifdef MMTK_GC -JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); +JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, void* ty); JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); JL_DLLIMPORT extern void mmtk_post_alloc(void* mutator, void* obj, size_t bytes, int allocator); JL_DLLIMPORT extern void mmtk_initialize_collection(void* tls); @@ -493,9 +493,7 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) jl_value_t *v; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); - int osize = jl_gc_sizeclasses[pool_id]; - v = jl_mmtk_gc_alloc_default(ptls, pool_id, allocsz, ty); + v = jl_mmtk_gc_alloc_default(ptls, allocsz, ty); } else { if (allocsz < sz) // overflow in adding offs, size was "negative" diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index b40e734d0a63f..9c532379c599f 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -168,7 +168,7 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int o // TODO: drop this okay? // maybe_collect(ptls); - jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, pool_offset, osize, NULL); + jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, osize, NULL); // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_default; enable // here when that's edited? /* From a0e35b52a2492fc4dc0d262ea2af4120e76d1398 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 6 Oct 2023 01:44:50 +0000 Subject: [PATCH 044/116] Minor refactoring of jl_gc_classify_pools --- src/mmtk-gc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 9c532379c599f..d00d763238051 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -56,12 +56,11 @@ static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) // allocation int jl_gc_classify_pools(size_t sz, int *osize) { - if (sz > GC_MAX_SZCLASS) - return -1; + if (sz > GC_MAX_SZCLASS) + return -1; // call big alloc function size_t allocsz = sz + sizeof(jl_taggedvalue_t); - int klass = jl_gc_szclass(allocsz); *osize = LLT_ALIGN(allocsz, 16); - return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]); + return 0; // use MMTk's fastpath logic } // malloc wrappers, aligned allocation From 387814b244daa9cd1f7917e9355972bd91ba53a7 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 6 Oct 2023 03:23:24 +0000 Subject: [PATCH 045/116] Aligning strings to 8 bytes --- src/array.c | 3 +-- src/julia_internal.h | 4 ++-- src/mmtk-gc.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/array.c b/src/array.c index a0166a6479798..e17346947e73d 100644 --- a/src/array.c +++ b/src/array.c @@ -496,8 +496,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); #else - int pool_id = jl_gc_szclass_align8(allocsz); - s = jl_mmtk_gc_alloc_default(ptls, allocsz, jl_string_type); + s = jl_mmtk_gc_alloc_default(ptls, allocsz, 8, jl_string_type); #endif } else { diff --git a/src/julia_internal.h b/src/julia_internal.h index ee32dbe922caa..575e84e9d41a7 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -339,7 +339,7 @@ jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize); jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); #ifdef MMTK_GC -JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, void* ty); +JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void* ty); JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); JL_DLLIMPORT extern void mmtk_post_alloc(void* mutator, void* obj, size_t bytes, int allocator); JL_DLLIMPORT extern void mmtk_initialize_collection(void* tls); @@ -493,7 +493,7 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) jl_value_t *v; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { - v = jl_mmtk_gc_alloc_default(ptls, allocsz, ty); + v = jl_mmtk_gc_alloc_default(ptls, allocsz, 16, ty); } else { if (allocsz < sz) // overflow in adding offs, size was "negative" diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index d00d763238051..060b9f22a0e33 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -167,7 +167,7 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int o // TODO: drop this okay? // maybe_collect(ptls); - jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, osize, NULL); + jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, osize, 16, NULL); // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_default; enable // here when that's edited? /* From 1d97f5dfea84ed463630ce57965c7716cf7403e8 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 6 Oct 2023 03:55:55 +0000 Subject: [PATCH 046/116] Fixing whitespace --- src/mmtk-gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 060b9f22a0e33..efeadc903f71a 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -56,7 +56,7 @@ static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) // allocation int jl_gc_classify_pools(size_t sz, int *osize) { - if (sz > GC_MAX_SZCLASS) + if (sz > GC_MAX_SZCLASS) return -1; // call big alloc function size_t allocsz = sz + sizeof(jl_taggedvalue_t); *osize = LLT_ALIGN(allocsz, 16); From 4792b73132850aa153fe22023b8a45af29a3554f Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 1 Feb 2024 01:44:31 +0000 Subject: [PATCH 047/116] Removing functions to call mmtk's enable and disable collection --- src/mmtk-gc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 1175c6a161750..347ea4634bc9e 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -503,14 +503,6 @@ void jl_gc_threadfun(void *arg) } // added for MMTk integration -void enable_collection(void) -{ - mmtk_enable_collection(); -} -void disable_collection(void) -{ - mmtk_disable_collection(); -} JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT { From 4e057c98921ee71c4b86553df039923857a72c8f Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 1 Feb 2024 01:51:34 +0000 Subject: [PATCH 048/116] Forgot a few other places --- src/gc-common.c | 2 -- src/gc.c | 6 ------ src/julia_internal.h | 2 -- 3 files changed, 10 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 0f6307c1db98f..aae56366d0c2d 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -588,11 +588,9 @@ JL_DLLEXPORT int jl_gc_enable(int on) if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { gc_num.allocd += gc_num.deferred_alloc; gc_num.deferred_alloc = 0; - enable_collection(); } } else if (prev && !on) { - disable_collection(); // enable -> disable jl_atomic_fetch_add(&jl_gc_disable_counter, 1); // check if the GC is running and wait for it to finish diff --git a/src/gc.c b/src/gc.c index 924cdce356a4b..c4023e922b676 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3438,12 +3438,6 @@ void jl_gc_threadfun(void *arg) } // added for MMTk integration -void enable_collection(void) -{ -} -void disable_collection(void) -{ -} JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT { diff --git a/src/julia_internal.h b/src/julia_internal.h index 737553ec98845..ec60dc76f5f16 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -333,8 +333,6 @@ extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED; JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; -extern void enable_collection(void); -extern void disable_collection(void); jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize); jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); From 1348e5009adf133f3631c82875c4096667ca760c Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 1 Feb 2024 01:57:46 +0000 Subject: [PATCH 049/116] Missing reference in thread initialization --- src/threading.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/threading.c b/src/threading.c index d1157a02dada0..b050b3eccd87a 100644 --- a/src/threading.c +++ b/src/threading.c @@ -351,9 +351,6 @@ jl_ptls_t jl_init_threadtls(int16_t tid) ptls->rngseed = jl_rand(); if (tid == 0) { ptls->disable_gc = 1; -#ifdef MMTK_GC - disable_collection(); -#endif } #ifdef _OS_WINDOWS_ if (tid == 0) { From 125d05e1f11dd9e5add538d6c17fcd39474af711 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 1 Feb 2024 02:07:25 +0000 Subject: [PATCH 050/116] Exporting jl_gc_disable_counter --- src/gc-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-common.c b/src/gc-common.c index aae56366d0c2d..5ff30bbba5ac5 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -576,7 +576,7 @@ void gc_premark(jl_ptls_t ptls2) // GC control // --- -_Atomic(uint32_t) jl_gc_disable_counter = 1; +JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter = 1; JL_DLLEXPORT int jl_gc_enable(int on) { From 99aa5dd6289ac5445f9f019c4d8c4284fe5da0ba Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 1 Feb 2024 02:52:20 +0000 Subject: [PATCH 051/116] Exporting jl_gc_disable_counter --- src/gc.h | 2 +- src/julia_internal.h | 2 +- src/mmtk-gc.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gc.h b/src/gc.h index 0630a039f2b94..03b52fb24acbb 100644 --- a/src/gc.h +++ b/src/gc.h @@ -83,7 +83,7 @@ extern const size_t max_collect_interval; extern size_t last_long_collect_interval; extern size_t total_mem; extern memsize_t max_total_memory; -extern _Atomic(uint32_t) jl_gc_disable_counter; +extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; extern jl_mutex_t heapsnapshot_lock; extern uint64_t finalizer_rngState[]; extern int gc_n_threads; diff --git a/src/julia_internal.h b/src/julia_internal.h index ec60dc76f5f16..f5c9e07ce7329 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -936,7 +936,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 3; } extern _Atomic(uint32_t) jl_gc_running; -extern _Atomic(uint32_t) jl_gc_disable_counter; +extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 347ea4634bc9e..db985b5149f8a 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -219,7 +219,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - if (jl_atomic_load_relaxed(&jl_gc_disable_counter)) { + if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval; jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); From a0c895ee0ff25b751475c3015ed82cd84c058b0e Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 9 Feb 2024 01:41:50 +0000 Subject: [PATCH 052/116] Increasing the timeout just in case it's caused by the github runner specs --- test/threads.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/threads.jl b/test/threads.jl index 8189311739e31..376c77347e15f 100644 --- a/test/threads.jl +++ b/test/threads.jl @@ -288,7 +288,7 @@ close(proc.in) proc = run(cmd; wait = false) done = Threads.Atomic{Bool}(false) timeout = false - timer = Timer(100) do _ + timer = Timer(150) do _ timeout = true for sig in [Base.SIGTERM, Base.SIGHUP, Base.SIGKILL] for _ in 1:1000 From 3b4ae537216b59ccaded4f7b946dc7faf2a27bb5 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 12 Feb 2024 02:52:56 +0000 Subject: [PATCH 053/116] Refactoring pinning functions and adding comments --- src/array.c | 24 +++++++++++++++--------- src/builtins.c | 19 ++++++++++--------- src/datatype.c | 4 ++++ src/julia.h | 4 ++-- src/julia_internal.h | 4 ++++ src/mmtk-gc.c | 1 + 6 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/array.c b/src/array.c index 7c49889ee4662..73ba4cc0e8214 100644 --- a/src/array.c +++ b/src/array.c @@ -239,9 +239,11 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, jl_array_t *owner = (jl_array_t*)jl_array_owner(data); jl_array_data_owner(a) = (jl_value_t*)owner; - if(mmtk_object_is_managed_by_mmtk(owner)) { - mmtk_pin_object(owner); - } + // For array objects with an owner point (a->flags.how == 3), we would need to + // introspect the object to update the a->data field. To avoid doing that and + // making scan_object much more complex we simply enforce that both owner and + // buffers are always pinned + mmtk_pin_object(owner); a->flags.how = 3; a->data = data->data; a->flags.isshared = 1; @@ -290,9 +292,11 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str) a->flags.ptrarray = 0; a->flags.hasptr = 0; jl_array_data_owner(a) = str; - if(mmtk_object_is_managed_by_mmtk(str)) { - mmtk_pin_object(str); - } + // For array objects with an owner point (a->flags.how == 3), we would need to + // introspect the object to update the a->data field. To avoid doing that and + // making scan_object much more complex we simply enforce that both owner and + // buffers are always pinned + mmtk_pin_object(str); a->flags.how = 3; a->flags.isshared = 1; size_t l = jl_string_len(str); @@ -689,9 +693,11 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen) else { s = jl_gc_realloc_string(jl_array_data_owner(a), nbytes - (elsz == 1)); } - if(mmtk_object_is_managed_by_mmtk(s)) { - mmtk_pin_object(s); - } + // For array objects with an owner point (a->flags.how == 3), we would need to + // introspect the object to update the a->data field. To avoid doing that and + // making scan_object much more complex we simply enforce that both owner and + // buffers are always pinned + mmtk_pin_object(s); jl_array_data_owner(a) = s; jl_gc_wb(a, s); a->data = jl_string_data(s); diff --git a/src/builtins.c b/src/builtins.c index 0094f4e5a2141..0a2cc9cd42729 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -344,9 +344,9 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN i++; pe = pe->prev; } - if(mmtk_object_is_managed_by_mmtk(v)) { - mmtk_pin_object(v); - } + // FIXME: Pinning objects that get hashed + // until we implement address space hashing. + mmtk_pin_object(v); return inthash((uintptr_t)v); } if (tv == jl_uniontype_type) { @@ -395,9 +395,10 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT return ~h; size_t f, nf = jl_datatype_nfields(dt); if (nf == 0 || (!dt->layout->haspadding && dt->layout->npointers == 0)) { - if(mmtk_object_is_managed_by_mmtk(v)) { - mmtk_pin_object(v); - } + + // FIXME: Pinning objects that get hashed + // until we implement address space hashing. + mmtk_pin_object(v); // operate element-wise if there are unused bits inside, // otherwise just take the whole data block at once // a few select pointers (notably symbol) also have special hash values @@ -459,9 +460,9 @@ static uintptr_t NOINLINE jl_object_id__cold(jl_datatype_t *dt, jl_value_t *v) J return m->hash; } if (dt->name->mutabl) { - if(mmtk_object_is_managed_by_mmtk(v)) { - mmtk_pin_object(v); - } + // FIXME: Pinning objects that get hashed + // until we implement address space hashing. + mmtk_pin_object(v); return inthash((uintptr_t)v); } return immut_id_(dt, v, dt->hash); diff --git a/src/datatype.c b/src/datatype.c index 20c3af1555675..9e6d480985c69 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -65,6 +65,8 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu jl_typename_t *tn = (jl_typename_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typename_t), jl_typename_type); + // Typenames should be pinned since they are used as metadata, and are + // read during scan_object mmtk_pin_object(tn); tn->name = name; tn->module = module; @@ -97,6 +99,8 @@ jl_datatype_t *jl_new_uninitialized_datatype(void) { jl_task_t *ct = jl_current_task; jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type); + // Types should be pinned since they are used as metadata, and are + // read during scan_object mmtk_pin_object(t); jl_set_typetagof(t, jl_datatype_tag, 0); t->hash = 0; diff --git a/src/julia.h b/src/julia.h index 337e5131eeee7..3a33e59e3835a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -9,10 +9,10 @@ extern "C" { extern int mmtk_object_is_managed_by_mmtk(void* addr); extern unsigned char mmtk_pin_object(void* obj); +// FIXME: Pinning objects that get hashed in the ptrhash table +// until we implement address space hashing. #define PTRHASH_PIN(key) \ - if (mmtk_object_is_managed_by_mmtk(key)) { \ mmtk_pin_object(key); \ - } \ #ifdef __cplusplus } diff --git a/src/julia_internal.h b/src/julia_internal.h index 4f90b44c80887..90e6dd6ce1ec1 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -533,6 +533,10 @@ typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz) { jl_gc_tracked_buffer_t *buf = jl_gc_alloc(ptls, sz, (void*)jl_buff_tag); + // For array objects with an owner point (a->flags.how == 3), we would need to + // introspect the object to update the a->data field. To avoid doing that and + // making scan_object much more complex we simply enforce that both owner and + // buffers are always pinned mmtk_pin_object(buf); return buf; } diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 9d8b0f049e11d..05989a6ac335d 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -432,6 +432,7 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) jl_value_t *snew = jl_alloc_string(sz); memcpy(jl_string_data(snew), jl_string_data(s), sz <= len ? sz : len); if(mmtk_is_pinned(s)) { + // if the source string was pinned, we also pin the new one mmtk_pin_object(snew); } return snew; From 6f5f68500c77d4f8daecead6caacf447600dc57a Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 28 Feb 2024 13:45:44 +1300 Subject: [PATCH 054/116] Call initialize_collection after _finish_julia_init (#40) --- src/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/init.c b/src/init.c index 52f4740ccc306..8a379a5922f5a 100644 --- a/src/init.c +++ b/src/init.c @@ -824,9 +824,6 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) jl_ptls_t ptls = jl_init_threadtls(0); -#ifdef MMTK_GC - mmtk_initialize_collection((void *)ptls); -#endif #pragma GCC diagnostic push #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 #pragma GCC diagnostic ignored "-Wdangling-pointer" @@ -836,6 +833,9 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) #pragma GCC diagnostic pop JL_GC_PROMISE_ROOTED(ct); _finish_julia_init(rel, ptls, ct); +#ifdef MMTK_GC + mmtk_initialize_collection((void *)ptls); +#endif } static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct) From d907a0630b4551cfbb321366da6e96242ada25d1 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Fri, 10 May 2024 15:51:14 +1200 Subject: [PATCH 055/116] Port PR #42 and #44 to master (#48) * Call initialize_collection before enabling GC (#44) * Fix build with stock GC: mmtk_pin_object is conditionaly compiled (#42) --- src/array.c | 6 +++--- src/builtins.c | 6 +++--- src/datatype.c | 4 ++-- src/init.c | 6 +++--- src/julia.h | 7 +++++-- src/julia_internal.h | 18 +++++++++++++++++- 6 files changed, 33 insertions(+), 14 deletions(-) diff --git a/src/array.c b/src/array.c index bc73f582d63fa..2877604f7a900 100644 --- a/src/array.c +++ b/src/array.c @@ -243,7 +243,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, // introspect the object to update the a->data field. To avoid doing that and // making scan_object much more complex we simply enforce that both owner and // buffers are always pinned - mmtk_pin_object(owner); + PTR_PIN(owner); a->flags.how = 3; a->data = data->data; a->flags.isshared = 1; @@ -296,7 +296,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str) // introspect the object to update the a->data field. To avoid doing that and // making scan_object much more complex we simply enforce that both owner and // buffers are always pinned - mmtk_pin_object(str); + PTR_PIN(str); a->flags.how = 3; a->flags.isshared = 1; size_t l = jl_string_len(str); @@ -695,7 +695,7 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen) // introspect the object to update the a->data field. To avoid doing that and // making scan_object much more complex we simply enforce that both owner and // buffers are always pinned - mmtk_pin_object(s); + PTR_PIN(s); jl_array_data_owner(a) = s; jl_gc_wb(a, s); a->data = jl_string_data(s); diff --git a/src/builtins.c b/src/builtins.c index 0a2cc9cd42729..d961f36cbc707 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -346,7 +346,7 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN } // FIXME: Pinning objects that get hashed // until we implement address space hashing. - mmtk_pin_object(v); + PTR_PIN(v); return inthash((uintptr_t)v); } if (tv == jl_uniontype_type) { @@ -398,7 +398,7 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT // FIXME: Pinning objects that get hashed // until we implement address space hashing. - mmtk_pin_object(v); + PTR_PIN(v); // operate element-wise if there are unused bits inside, // otherwise just take the whole data block at once // a few select pointers (notably symbol) also have special hash values @@ -462,7 +462,7 @@ static uintptr_t NOINLINE jl_object_id__cold(jl_datatype_t *dt, jl_value_t *v) J if (dt->name->mutabl) { // FIXME: Pinning objects that get hashed // until we implement address space hashing. - mmtk_pin_object(v); + PTR_PIN(v); return inthash((uintptr_t)v); } return immut_id_(dt, v, dt->hash); diff --git a/src/datatype.c b/src/datatype.c index 9e6d480985c69..ae8853f37c688 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -67,7 +67,7 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu jl_typename_type); // Typenames should be pinned since they are used as metadata, and are // read during scan_object - mmtk_pin_object(tn); + PTR_PIN(tn); tn->name = name; tn->module = module; tn->wrapper = NULL; @@ -101,7 +101,7 @@ jl_datatype_t *jl_new_uninitialized_datatype(void) jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type); // Types should be pinned since they are used as metadata, and are // read during scan_object - mmtk_pin_object(t); + PTR_PIN(t); jl_set_typetagof(t, jl_datatype_tag, 0); t->hash = 0; t->hasfreetypevars = 0; diff --git a/src/init.c b/src/init.c index 8a379a5922f5a..faa446a34cf22 100644 --- a/src/init.c +++ b/src/init.c @@ -833,9 +833,6 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) #pragma GCC diagnostic pop JL_GC_PROMISE_ROOTED(ct); _finish_julia_init(rel, ptls, ct); -#ifdef MMTK_GC - mmtk_initialize_collection((void *)ptls); -#endif } static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct) @@ -883,6 +880,9 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_ } jl_start_threads(); +#ifdef MMTK_GC + mmtk_initialize_collection((void *)ptls); +#endif jl_gc_enable(1); if (jl_options.image_file && (!jl_generating_output() || jl_options.incremental) && jl_module_init_order) { diff --git a/src/julia.h b/src/julia.h index 3a33e59e3835a..ed6305715a87c 100644 --- a/src/julia.h +++ b/src/julia.h @@ -11,8 +11,11 @@ extern int mmtk_object_is_managed_by_mmtk(void* addr); extern unsigned char mmtk_pin_object(void* obj); // FIXME: Pinning objects that get hashed in the ptrhash table // until we implement address space hashing. -#define PTRHASH_PIN(key) \ - mmtk_pin_object(key); \ +#ifdef MMTK_GC +#define PTRHASH_PIN(key) mmtk_pin_object(key); +#else +#define PTRHASH_PIN(key) +#endif #ifdef __cplusplus } diff --git a/src/julia_internal.h b/src/julia_internal.h index a8593a23a4e40..25983ea6c0d27 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -3,6 +3,22 @@ #ifndef JL_INTERNAL_H #define JL_INTERNAL_H +#ifdef __cplusplus +extern "C" { +#endif + +extern int mmtk_object_is_managed_by_mmtk(void* addr); +extern unsigned char mmtk_pin_object(void* obj); +#ifdef MMTK_GC +#define PTR_PIN(key) mmtk_pin_object(key); +#else +#define PTR_PIN(key) +#endif + +#ifdef __cplusplus +} +#endif + #include "options.h" #include "julia_assert.h" #include "julia_locks.h" @@ -535,7 +551,7 @@ STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz) // introspect the object to update the a->data field. To avoid doing that and // making scan_object much more complex we simply enforce that both owner and // buffers are always pinned - mmtk_pin_object(buf); + PTR_PIN(buf); return buf; } From f9f38df99af1bfa8e37a0d4e224ed17bf037c7b8 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 18 Jul 2024 04:42:09 +0000 Subject: [PATCH 056/116] Changes to support mmtk --- src/gc-common.c | 37 +++++++++++++++++++++++++++++ src/gc-page-profiler.c | 4 ++++ src/gc-stacks.c | 2 ++ src/gc.c | 43 +++------------------------------- src/gc.h | 30 ++++++++++++------------ src/julia.h | 5 ++-- src/llvm-final-gc-lowering.cpp | 25 ++++++++------------ src/llvm-late-gc-lowering.cpp | 8 +++---- src/llvm-pass-helpers.cpp | 32 +++++++++++++++++++++++++ src/mmtk-gc.c | 42 +++++++++++---------------------- src/scheduler.c | 14 +++++++++++ 11 files changed, 137 insertions(+), 105 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 7462cb06c3cf3..d1e87b5741384 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -3,6 +3,7 @@ #include "gc.h" jl_gc_num_t gc_num = {0}; +gc_heapstatus_t gc_heap_stats = {0}; size_t last_long_collect_interval; int gc_n_threads; jl_ptls_t* gc_all_tls_states; @@ -484,7 +485,43 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT jl_batch_accum_heap_size(ptls, sz); } +void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT +{ + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc) + sz); +} + +void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT +{ + jl_batch_accum_free_size(jl_current_task->ptls, sz); +} +void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT +{ + assert(jl_is_genericmemory(v)); + jl_genericmemory_t *m = (jl_genericmemory_t*)v; + assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2); + char *d = (char*)m->ptr; + if (isaligned) + jl_free_aligned(d); + else + free(d); + jl_atomic_store_relaxed(&gc_heap_stats.heap_size, + jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_genericmemory_nbytes(m)); + gc_num.freed += jl_genericmemory_nbytes(m); + gc_num.freecall++; +} + +void jl_free_thread_gc_state(jl_ptls_t ptls) +{ + jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue; + ws_queue_t *cq = &mq->chunk_queue; + free_ws_array(jl_atomic_load_relaxed(&cq->array)); + jl_atomic_store_relaxed(&cq->array, NULL); + ws_queue_t *q = &mq->ptr_queue; + free_ws_array(jl_atomic_load_relaxed(&q->array)); + jl_atomic_store_relaxed(&q->array, NULL); + arraylist_free(&mq->reclaim_set); +} // GCNum, statistics manipulation // --- diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c index 2e876e4b7b4d6..fe7a52b4d1f8a 100644 --- a/src/gc-page-profiler.c +++ b/src/gc-page-profiler.c @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#ifndef MMTK_GC + #include "gc-page-profiler.h" #ifdef __cplusplus @@ -177,3 +179,5 @@ JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) #ifdef __cplusplus } #endif + +#endif // !MMTK_GC \ No newline at end of file diff --git a/src/gc-stacks.c b/src/gc-stacks.c index e00e954c105e0..465dce7fda26b 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -325,6 +325,8 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT } } +extern int gc_first_tid; + JL_DLLEXPORT jl_array_t *jl_live_tasks(void) { size_t nthreads = jl_atomic_load_acquire(&jl_n_threads); diff --git a/src/gc.c b/src/gc.c index edf90e8741498..520a8b8cb608c 100644 --- a/src/gc.c +++ b/src/gc.c @@ -193,8 +193,6 @@ static _Atomic(int) support_conservative_marking = 0; * have proper support of GC transition in codegen, we should execute the * finalizers in unmanaged (GC safe) mode. */ - -gc_heapstatus_t gc_heap_stats = {0}; int next_sweep_full = 0; // List of marked big objects. Not per-thread. Accessed only by master thread. @@ -600,10 +598,7 @@ STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTS } } -STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT -{ - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc) + sz); -} + // big value list @@ -697,28 +692,7 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT } // tracking Memorys with malloc'd storage -void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT -{ - jl_batch_accum_free_size(jl_current_task->ptls, sz); -} - - -static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT -{ - assert(jl_is_genericmemory(v)); - jl_genericmemory_t *m = (jl_genericmemory_t*)v; - assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2); - char *d = (char*)m->ptr; - if (isaligned) - jl_free_aligned(d); - else - free(d); - jl_atomic_store_relaxed(&gc_heap_stats.heap_size, - jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_genericmemory_nbytes(m)); - gc_num.freed += jl_genericmemory_nbytes(m); - gc_num.freecall++; -} - +extern void jl_gc_free_memory(jl_value_t *v, int isaligned); static void sweep_malloced_memory(void) JL_NOTSAFEPOINT { gc_time_mallocd_memory_start(); @@ -3380,18 +3354,6 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); } -void jl_free_thread_gc_state(jl_ptls_t ptls) -{ - jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue; - ws_queue_t *cq = &mq->chunk_queue; - free_ws_array(jl_atomic_load_relaxed(&cq->array)); - jl_atomic_store_relaxed(&cq->array, NULL); - ws_queue_t *q = &mq->ptr_queue; - free_ws_array(jl_atomic_load_relaxed(&q->array)); - jl_atomic_store_relaxed(&q->array, NULL); - arraylist_free(&mq->reclaim_set); -} - void jl_deinit_thread_heap(jl_ptls_t ptls) { // Do nothing @@ -3478,6 +3440,7 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) return data; } +extern void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); diff --git a/src/gc.h b/src/gc.h index eb724985b599d..ea2766a646127 100644 --- a/src/gc.h +++ b/src/gc.h @@ -71,6 +71,14 @@ extern uint64_t finalizer_rngState[]; extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; +#ifdef GC_SMALL_PAGE +#define GC_PAGE_LG2 12 // log2(size of a page) +#else +#define GC_PAGE_LG2 14 // log2(size of a page) +#endif +#define GC_PAGE_SZ (1 << GC_PAGE_LG2) +#define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT)) + // This struct must be kept in sync with the Julia type of the same name in base/timing.jl typedef struct { int64_t allocd; @@ -99,6 +107,13 @@ typedef struct { uint64_t last_incremental_sweep; } jl_gc_num_t; +typedef struct { + _Atomic(size_t) bytes_mapped; + _Atomic(size_t) bytes_resident; + _Atomic(size_t) heap_size; + _Atomic(size_t) heap_target; +} gc_heapstatus_t; + extern jl_gc_num_t gc_num; // data structure for tracking malloc'd arrays. @@ -192,14 +207,6 @@ typedef struct { extern "C" { #endif -#ifdef GC_SMALL_PAGE -#define GC_PAGE_LG2 12 // log2(size of a page) -#else -#define GC_PAGE_LG2 14 // log2(size of a page) -#endif -#define GC_PAGE_SZ (1 << GC_PAGE_LG2) -#define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT)) - #define jl_malloc_tag ((void*)0xdeadaa01) #define jl_singleton_tag ((void*)0xdeadaa02) @@ -428,13 +435,6 @@ typedef struct { pagetable1_t *meta1[REGION2_PG_COUNT]; } pagetable_t; -typedef struct { - _Atomic(size_t) bytes_mapped; - _Atomic(size_t) bytes_resident; - _Atomic(size_t) heap_size; - _Atomic(size_t) heap_target; -} gc_heapstatus_t; - #define GC_PAGE_UNMAPPED 0 #define GC_PAGE_ALLOCATED 1 #define GC_PAGE_LAZILY_FREED 2 diff --git a/src/julia.h b/src/julia.h index aab7512b6cc03..a16785ee0e9d3 100644 --- a/src/julia.h +++ b/src/julia.h @@ -646,12 +646,11 @@ typedef struct _jl_binding_t { _Atomic(struct _jl_binding_t*) owner; // for individual imported bindings (NULL until 'resolved') _Atomic(jl_value_t*) ty; // binding type uint8_t constp:1; - uint8_t exportp:1; // `public foo` sets `publicp`, `export foo` sets both `publicp` and `exportp` - uint8_t publicp:1; // exportp without publicp is not allowed. + uint8_t exportp:1; uint8_t imported:1; uint8_t usingfailed:1; uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package - uint8_t padding:1; + uint8_t padding:2; } jl_binding_t; typedef struct { diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 83944f63bacee..cbc26da892403 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -307,7 +307,7 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) builder.CreateStore(new_cursor, cursor_ptr); // ptls->gc_num.allocd += osize; - auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num)); + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); @@ -322,11 +322,13 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) phiNode->addIncoming(v_as_ptr, fastpath); phiNode->takeName(target); - return phiNode; + target->replaceAllUsesWith(phiNode); + target->eraseFromParent(); + return; } else { auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 }); - derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize); + newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type }); + derefBytes = sizeof(void*); } #endif // MMTK_GC } @@ -368,13 +370,6 @@ bool FinalLowerGC::runOnFunction(Function &F) allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped); T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); -#ifdef MMTK_GC - auto writeBarrier1Func = getOrNull(jl_intrinsics::writeBarrier1); - auto writeBarrier2Func = getOrNull(jl_intrinsics::writeBarrier2); - auto writeBarrier1SlowFunc = getOrNull(jl_intrinsics::writeBarrier1Slow); - auto writeBarrier2SlowFunc = getOrNull(jl_intrinsics::writeBarrier2Slow); -#endif - // Lower all calls to supported intrinsics. for (auto &BB : F) { for (auto &I : make_early_inc_range(BB)) { @@ -403,10 +398,10 @@ bool FinalLowerGC::runOnFunction(Function &F) #ifdef MMTK_GC - LOWER_INTRINSIC(writeBarrier1Func, lowerWriteBarrier1); - LOWER_INTRINSIC(writeBarrier2Func, lowerWriteBarrier2); - LOWER_INTRINSIC(writeBarrier1SlowFunc, lowerWriteBarrier1Slow); - LOWER_INTRINSIC(writeBarrier2SlowFunc, lowerWriteBarrier2Slow); + LOWER_INTRINSIC(writeBarrier1, lowerWriteBarrier1); + LOWER_INTRINSIC(writeBarrier2, lowerWriteBarrier2); + LOWER_INTRINSIC(writeBarrier1Slow, lowerWriteBarrier1Slow); + LOWER_INTRINSIC(writeBarrier2Slow, lowerWriteBarrier2Slow); #endif diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 1e2e0d3933783..f257afd2c6211 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2575,15 +2575,15 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { if (CFGModified) { *CFGModified = true; } + + IRBuilder<> builder(CI); + builder.SetCurrentDebugLocation(CI->getDebugLoc()); +#ifndef MMTK_GC auto DebugInfoMeta = F.getParent()->getModuleFlag("julia.debug_level"); int debug_info = 1; if (DebugInfoMeta != nullptr) { debug_info = cast(cast(DebugInfoMeta)->getValue())->getZExtValue(); } - - IRBuilder<> builder(CI); - builder.SetCurrentDebugLocation(CI->getDebugLoc()); -#ifndef MMTK_GC auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED); setName(parBits, "parent_bits", debug_info); auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED)); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index 7f701cb2db639..6d6c3898e875c 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -275,7 +275,11 @@ namespace jl_intrinsics { false), Function::ExternalLinkage, WRITE_BARRIER_1_NAME); +#if JL_LLVM_VERSION >= 160000 + intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif return intrinsic; }); const IntrinsicDescription writeBarrier2( @@ -290,7 +294,11 @@ namespace jl_intrinsics { false), Function::ExternalLinkage, WRITE_BARRIER_2_NAME); +#if JL_LLVM_VERSION >= 160000 + intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif return intrinsic; }); const IntrinsicDescription writeBarrier1Slow( @@ -305,7 +313,11 @@ namespace jl_intrinsics { false), Function::ExternalLinkage, WRITE_BARRIER_1_SLOW_NAME); +#if JL_LLVM_VERSION >= 160000 + intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif return intrinsic; }); const IntrinsicDescription writeBarrier2Slow( @@ -320,7 +332,11 @@ namespace jl_intrinsics { false), Function::ExternalLinkage, WRITE_BARRIER_2_SLOW_NAME); +#if JL_LLVM_VERSION >= 160000 + intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif return intrinsic; }); #endif @@ -423,7 +439,11 @@ namespace jl_well_known { false), Function::ExternalLinkage, GC_WB_1_NAME); +#if JL_LLVM_VERSION >= 160000 + func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif return func; }); @@ -439,7 +459,11 @@ namespace jl_well_known { false), Function::ExternalLinkage, GC_WB_2_NAME); +#if JL_LLVM_VERSION >= 160000 + func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif return func; }); @@ -455,7 +479,11 @@ namespace jl_well_known { false), Function::ExternalLinkage, GC_WB_1_SLOW_NAME); +#if JL_LLVM_VERSION >= 160000 + func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif return func; }); @@ -471,7 +499,11 @@ namespace jl_well_known { false), Function::ExternalLinkage, GC_WB_2_SLOW_NAME); +#if JL_LLVM_VERSION >= 160000 + func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif return func; }); #endif diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 0691a1db776e8..8a7d95871d7c6 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -182,19 +182,6 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int o return v; } -void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT -{ - if (a->flags.how == 2) { - char *d = (char*)a->data - a->offset*a->elsize; - if (a->flags.isaligned) - jl_free_aligned(d); - else - free(d); - gc_num.freed += jl_array_nbytes(a); - gc_num.freecall++; - } -} - // roots // --- @@ -204,7 +191,7 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) } // TODO: exported, but not MMTk-specific? -JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT +JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const void *stored, jl_datatype_t *dt) JL_NOTSAFEPOINT { mmtk_unreachable(); } @@ -233,10 +220,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { - size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval; - jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); - jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); + jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; } mmtk_handle_user_collection_request(ptls, collection); @@ -247,32 +234,31 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) // TODO: remove `gc_cache`? void jl_init_thread_heap(jl_ptls_t ptls) { - jl_thread_heap_t *heap = &ptls->heap; + jl_thread_heap_t *heap = &ptls->gc_tls.heap; jl_gc_pool_t *p = heap->norm_pools; for (int i = 0; i < JL_GC_N_POOLS; i++) { p[i].osize = jl_gc_sizeclasses[i]; p[i].freelist = NULL; p[i].newpages = NULL; } - arraylist_new(&heap->weak_refs, 0); - arraylist_new(&heap->live_tasks, 0); + small_arraylist_new(&heap->weak_refs, 0); + small_arraylist_new(&heap->live_tasks, 0); + for (int i = 0; i < JL_N_STACK_POOLS; i++) + small_arraylist_new(&heap->free_stacks[i], 0); heap->mallocarrays = NULL; heap->mafreelist = NULL; heap->big_objects = NULL; - heap->remset = &heap->_remset[0]; - heap->last_remset = &heap->_remset[1]; - arraylist_new(heap->remset, 0); - arraylist_new(heap->last_remset, 0); + arraylist_new(&heap->remset, 0); arraylist_new(&ptls->finalizers, 0); - arraylist_new(&ptls->sweep_objs, 0); + arraylist_new(&ptls->gc_tls.sweep_objs, 0); - jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache; + jl_gc_mark_cache_t *gc_cache = &ptls->gc_tls.gc_cache; gc_cache->perm_scanned_bytes = 0; gc_cache->scanned_bytes = 0; gc_cache->nbig_obj = 0; - memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); // Clear the malloc sz count jl_atomic_store_relaxed(&ptls->malloc_sz_since_last_poll, 0); diff --git a/src/scheduler.c b/src/scheduler.c index 2c7dbd63ef4a4..5c885dd2f3b76 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -112,6 +112,8 @@ void jl_init_threadinginfra(void) void JL_NORETURN jl_finish_task(jl_task_t *ct); +#ifndef MMTK_GC + static inline int may_mark(void) JL_NOTSAFEPOINT { return (jl_atomic_load(&gc_n_threads_marking) > 0); @@ -185,6 +187,18 @@ void jl_concurrent_gc_threadfun(void *arg) } } +#else +void jl_parallel_gc_threadfun(void *arg) +{ + mmtk_unreachable(); +} + +void jl_concurrent_gc_threadfun(void *arg) +{ + mmtk_unreachable(); +} +#endif + // thread function: used by all mutator threads except the main thread void jl_threadfun(void *arg) { From b64c1e4721036606bfa8555d77f91d6e4a0a1a88 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 22 Jul 2024 21:53:50 +0000 Subject: [PATCH 057/116] Final changes to support the binding --- src/gc-common.c | 14 ++++++++++ src/gc.c | 14 ---------- src/gc.h | 1 - src/genericmemory.c | 1 + src/julia.h | 7 ++--- src/llvm-final-gc-lowering.cpp | 49 +++++++++++++++------------------- 6 files changed, 41 insertions(+), 45 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index d1e87b5741384..640f2ec1de29e 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -477,6 +477,20 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) +{ + int n_threads = jl_atomic_load_acquire(&jl_n_threads); + jl_ptls_t *all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + int64_t pool_live_bytes = 0; + for (int i = 0; i < n_threads; i++) { + jl_ptls_t ptls2 = all_tls_states[i]; + if (ptls2 != NULL) { + pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes); + } + } + return pool_live_bytes; +} + void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; diff --git a/src/gc.c b/src/gc.c index 520a8b8cb608c..a189ac24b9f95 100644 --- a/src/gc.c +++ b/src/gc.c @@ -2789,20 +2789,6 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) -{ - int n_threads = jl_atomic_load_acquire(&jl_n_threads); - jl_ptls_t *all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); - int64_t pool_live_bytes = 0; - for (int i = 0; i < n_threads; i++) { - jl_ptls_t ptls2 = all_tls_states[i]; - if (ptls2 != NULL) { - pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes); - } - } - return pool_live_bytes; -} - uint64_t jl_gc_smooth(uint64_t old_val, uint64_t new_val, double factor) { double est = factor * old_val + (1 - factor) * new_val; diff --git a/src/gc.h b/src/gc.h index ea2766a646127..c9320a6dbd837 100644 --- a/src/gc.h +++ b/src/gc.h @@ -47,7 +47,6 @@ extern jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int o extern void jl_rng_split(uint64_t to[JL_RNG_SIZE], uint64_t from[JL_RNG_SIZE]); extern void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t *owner, int8_t can_collect); -extern size_t jl_array_nbytes(jl_array_t *a); extern void run_finalizers(jl_task_t *ct, int finalizers_thread); #define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT) diff --git a/src/genericmemory.c b/src/genericmemory.c index ea52fca66ba48..24db8f29f1a12 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -54,6 +54,7 @@ jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t is tot = sizeof(jl_genericmemory_t) + sizeof(void*); } m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tot, mtype); + if (pooled) { data = (char*)m + JL_SMALL_BYTE_ALIGNMENT; } diff --git a/src/julia.h b/src/julia.h index a16785ee0e9d3..b4ff97daae150 100644 --- a/src/julia.h +++ b/src/julia.h @@ -646,11 +646,12 @@ typedef struct _jl_binding_t { _Atomic(struct _jl_binding_t*) owner; // for individual imported bindings (NULL until 'resolved') _Atomic(jl_value_t*) ty; // binding type uint8_t constp:1; - uint8_t exportp:1; + uint8_t exportp:1; // `public foo` sets `publicp`, `export foo` sets both `publicp` and `exportp` + uint8_t publicp:1; // exportp without publicp is not allowed. uint8_t imported:1; uint8_t usingfailed:1; uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package - uint8_t padding:2; + uint8_t padding:1; } jl_binding_t; typedef struct { @@ -809,7 +810,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index cbc26da892403..9339cbff1ec61 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -103,7 +103,6 @@ void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F) builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe); target->replaceAllUsesWith(gcframe); - target->eraseFromParent(); } void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) @@ -131,7 +130,6 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) gcframe, pgcstack, Align(sizeof(void*))); - target->eraseFromParent(); } void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F) @@ -150,7 +148,6 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F) pgcstack, Align(sizeof(void*))); inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); - target->eraseFromParent(); } void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F) @@ -170,7 +167,6 @@ void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F) auto gep = builder.CreateInBoundsGEP(T_prjlvalue, gcframe, index); gep->takeName(target); target->replaceAllUsesWith(gep); - target->eraseFromParent(); } void FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F) @@ -187,7 +183,6 @@ void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F) IRBuilder<> builder(target); Value* signal_page = target->getOperand(0); builder.CreateLoad(T_size, signal_page, true); - target->eraseFromParent(); } #ifdef MMTK_GC @@ -252,7 +247,7 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk. // Setting this to false will increase allocation overhead a lot, and should only be used for debugging. - const bool INLINE_FASTPATH_ALLOCATION = true; + const bool INLINE_FASTPATH_ALLOCATION = false; if (INLINE_FASTPATH_ALLOCATION) { // Assuming we use the first immix allocator. @@ -307,12 +302,12 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) builder.CreateStore(new_cursor, cursor_ptr); // ptls->gc_num.allocd += osize; - auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); - auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); - auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); - auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); - auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); - builder.CreateStore(pool_allocd_total, pool_alloc_tls); + // auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); + // auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); + // auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); + // auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); + // auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); + // builder.CreateStore(pool_allocd_total, pool_alloc_tls); auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); @@ -321,14 +316,14 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) phiNode->addIncoming(new_call, slowpath); phiNode->addIncoming(v_as_ptr, fastpath); phiNode->takeName(target); - + target->replaceAllUsesWith(phiNode); - target->eraseFromParent(); return; } else { auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type }); - derefBytes = sizeof(void*); + if (sz > 0) + derefBytes = sz; } #endif // MMTK_GC } @@ -346,7 +341,6 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) newI->addDereferenceableRetAttr(derefBytes); newI->takeName(target); target->replaceAllUsesWith(newI); - target->eraseFromParent(); } bool FinalLowerGC::runOnFunction(Function &F) @@ -372,21 +366,23 @@ bool FinalLowerGC::runOnFunction(Function &F) // Lower all calls to supported intrinsics. for (auto &BB : F) { - for (auto &I : make_early_inc_range(BB)) { - auto *CI = dyn_cast(&I); - if (!CI) + for (auto it = BB.begin(); it != BB.end();) { + auto *CI = dyn_cast(&*it); + if (!CI) { + ++it; continue; + } Value *callee = CI->getCalledOperand(); assert(callee); #define LOWER_INTRINSIC(INTRINSIC, LOWER_INTRINSIC_FUNC) \ - do { \ - auto intrinsic = getOrNull(jl_intrinsics::INTRINSIC); \ - if (intrinsic == callee) { \ - LOWER_INTRINSIC_FUNC(CI, F); \ - } \ - } while (0) + auto INTRINSIC = getOrNull(jl_intrinsics::INTRINSIC); \ + if (INTRINSIC == callee) { \ + LOWER_INTRINSIC_FUNC(CI, F); \ + it = CI->eraseFromParent(); \ + continue; \ + } \ LOWER_INTRINSIC(newGCFrame, lowerNewGCFrame); LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame); @@ -396,14 +392,13 @@ bool FinalLowerGC::runOnFunction(Function &F) LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot); LOWER_INTRINSIC(safepoint, lowerSafepoint); - #ifdef MMTK_GC LOWER_INTRINSIC(writeBarrier1, lowerWriteBarrier1); LOWER_INTRINSIC(writeBarrier2, lowerWriteBarrier2); LOWER_INTRINSIC(writeBarrier1Slow, lowerWriteBarrier1Slow); LOWER_INTRINSIC(writeBarrier2Slow, lowerWriteBarrier2Slow); #endif - + ++it; #undef LOWER_INTRINSIC } From 8bb0895bb38a4371387169941595c84d896a70fa Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 25 Jul 2024 00:52:28 +0000 Subject: [PATCH 058/116] Immix working; inlining fastpath allocation is not --- src/datatype.c | 8 ++++++++ src/gc-common.c | 22 ++++++++++------------ src/gc-debug.c | 10 ---------- src/gc-page-profiler.c | 2 +- src/gc.c | 12 ++++++++++++ src/jitlayers.h | 7 ++++++- src/llvm-final-gc-lowering.cpp | 21 ++++++++++++++------- src/llvm-late-gc-lowering.cpp | 2 +- src/mmtk-gc.c | 32 +++++++++++++++++++++++++++----- 9 files changed, 79 insertions(+), 37 deletions(-) diff --git a/src/datatype.c b/src/datatype.c index 422e9a4e897ed..cb10ef7719dd5 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -291,6 +291,10 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz, if ((void*)ret == HT_NOTFOUND) { if (!should_malloc) { char *perm_mem = (char *)jl_gc_perm_alloc(flddesc_sz, 0, 4, 0); +#ifdef MMTK_GC + jl_ptls_t ptls = jl_current_task->ptls; + mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(perm_mem), flddesc_sz); +#endif assert(perm_mem); ret = (jl_datatype_layout_t *)perm_mem; memcpy(perm_mem, flddesc, flddesc_sz); @@ -968,6 +972,10 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name, jl_datatype_layout_t *layout = (jl_datatype_layout_t *) jl_gc_perm_alloc(sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t), 0, 4, 0); +#ifdef MMTK_GC + jl_ptls_t ptls = jl_current_task->ptls; + mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(layout), sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t)); +#endif layout->size = large ? GC_MAX_SZCLASS+1 : 0; layout->nfields = 0; layout->alignment = sizeof(void *); diff --git a/src/gc-common.c b/src/gc-common.c index 640f2ec1de29e..98ef3f62125f9 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -525,18 +525,6 @@ void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT gc_num.freecall++; } -void jl_free_thread_gc_state(jl_ptls_t ptls) -{ - jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue; - ws_queue_t *cq = &mq->chunk_queue; - free_ws_array(jl_atomic_load_relaxed(&cq->array)); - jl_atomic_store_relaxed(&cq->array, NULL); - ws_queue_t *q = &mq->ptr_queue; - free_ws_array(jl_atomic_load_relaxed(&q->array)); - jl_atomic_store_relaxed(&q->array, NULL); - arraylist_free(&mq->reclaim_set); -} - // GCNum, statistics manipulation // --- // Only safe to update the heap inside the GC @@ -642,6 +630,16 @@ JL_DLLEXPORT int jl_gc_is_enabled(void) return !ptls->disable_gc; } +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT { jl_gc_num_t num = gc_num; diff --git a/src/gc-debug.c b/src/gc-debug.c index 41607638fa5df..19348b380e145 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1142,16 +1142,6 @@ NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_markqueue_t *mq, int off // return (slot - start) / elsize; // } -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c index fe7a52b4d1f8a..05666c7a86af2 100644 --- a/src/gc-page-profiler.c +++ b/src/gc-page-profiler.c @@ -180,4 +180,4 @@ JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) } #endif -#endif // !MMTK_GC \ No newline at end of file +#endif // !MMTK_GC diff --git a/src/gc.c b/src/gc.c index a189ac24b9f95..ed7188a1b449a 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3345,6 +3345,18 @@ void jl_deinit_thread_heap(jl_ptls_t ptls) // Do nothing } +void jl_free_thread_gc_state(jl_ptls_t ptls) +{ + jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue; + ws_queue_t *cq = &mq->chunk_queue; + free_ws_array(jl_atomic_load_relaxed(&cq->array)); + jl_atomic_store_relaxed(&cq->array, NULL); + ws_queue_t *q = &mq->ptr_queue; + free_ws_array(jl_atomic_load_relaxed(&q->array)); + jl_atomic_store_relaxed(&q->array, NULL); + arraylist_free(&mq->reclaim_set); +} + // System-wide initializations void jl_gc_init(void) { diff --git a/src/jitlayers.h b/src/jitlayers.h index 393e6d81e418d..aed88f05a1cfb 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -320,7 +320,12 @@ class MaxAlignedAllocImpl LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) { Align MaxAlign = alignment(Size); assert(Alignment < MaxAlign); (void)Alignment; - return jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset); + void* result = jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset); +#ifdef MMTK_GC + jl_ptls_t ptls = jl_current_task->ptls; + mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(result), Size); +#endif + return result; } inline LLVM_ATTRIBUTE_RETURNS_NONNULL diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 9339cbff1ec61..ac2e6c385d0a5 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -302,12 +302,12 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) builder.CreateStore(new_cursor, cursor_ptr); // ptls->gc_num.allocd += osize; - // auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); - // auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); - // auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); - // auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); - // auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); - // builder.CreateStore(pool_allocd_total, pool_alloc_tls); + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); + auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); + auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); + auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); + auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); + builder.CreateStore(pool_allocd_total, pool_alloc_tls); auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); @@ -316,7 +316,7 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) phiNode->addIncoming(new_call, slowpath); phiNode->addIncoming(v_as_ptr, fastpath); phiNode->takeName(target); - + target->replaceAllUsesWith(phiNode); return; } else { @@ -364,6 +364,13 @@ bool FinalLowerGC::runOnFunction(Function &F) allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped); T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); +#ifdef MMTK_GC + writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1); + writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2); + writeBarrier1SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier1Slow); + writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow); +#endif + // Lower all calls to supported intrinsics. for (auto &BB : F) { for (auto it = BB.begin(); it != BB.end();) { diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index f257afd2c6211..7ce2732e4280b 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2617,7 +2617,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { // FIXME: Currently we call write barrier with the src object (parent). // This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all. // But for other MMTk plans, we need to be careful. - const bool INLINE_WRITE_BARRIER = true; + const bool INLINE_WRITE_BARRIER = false; if (CI->getCalledOperand() == write_barrier_func) { if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { if (INLINE_WRITE_BARRIER) { diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 8a7d95871d7c6..c2b9aff11ac3e 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -32,7 +32,23 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_fre JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable) { } +JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) +{ +} + +JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS]; + +STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT +{ + // FIXME: MMTk would have to provide its own stats +} +#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants + +JL_DLLEXPORT uint64_t jl_get_pg_size(void) +{ + return MMTK_GC_PAGE_SZ; +} inline void maybe_collect(jl_ptls_t ptls) { @@ -271,6 +287,10 @@ void jl_init_thread_heap(jl_ptls_t ptls) mmtk_post_bind_mutator(&ptls->mmtk_mutator, mmtk_mutator); } +void jl_free_thread_gc_state(jl_ptls_t ptls) +{ +} + void jl_deinit_thread_heap(jl_ptls_t ptls) { mmtk_destroy_mutator(&ptls->mmtk_mutator); @@ -380,24 +400,26 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); jl_task_t *ct = jl_current_task; - if (pgcstack && ct->world_age) { + void *data = malloc(sz); + if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; malloc_maybe_collect(ptls, sz); jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz); } - return malloc(sz); + return data; } JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); jl_task_t *ct = jl_current_task; - if (pgcstack && ct->world_age) { + void *data = calloc(nm, sz); + if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; malloc_maybe_collect(ptls, nm * sz); jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz); } - return calloc(nm, sz); + return data; } JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) @@ -405,7 +427,7 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) jl_gcframe_t **pgcstack = jl_get_pgcstack(); jl_task_t *ct = jl_current_task; free(p); - if (pgcstack && ct->world_age) { + if (pgcstack != NULL && ct->world_age) { jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, -sz); } } From 4a17579236fec2d207860b5368311709f860bdcf Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 25 Jul 2024 04:30:56 +0000 Subject: [PATCH 059/116] Fix test in make-Profile --- src/mmtk-gc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index c2b9aff11ac3e..5a104c4856c54 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -32,8 +32,16 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_fre JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable) { } + +// mutex for page profile +uv_mutex_t page_profile_lock; + JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) { + uv_mutex_lock(&page_profile_lock); + const char *str = "Page profiler in unsupported in MMTk."; + ios_write(stream, str, strlen(str)); + uv_mutex_unlock(&page_profile_lock); } JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS]; From 6fee739cdb3c00599ba4faee49895cc0094e91a3 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 31 Jul 2024 06:38:35 +0000 Subject: [PATCH 060/116] Fixing macro to lower intrinsics properly --- src/llvm-final-gc-lowering.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index ac2e6c385d0a5..f8802a8f62514 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -396,14 +396,24 @@ bool FinalLowerGC::runOnFunction(Function &F) LOWER_INTRINSIC(popGCFrame, lowerPopGCFrame); LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot); LOWER_INTRINSIC(GCAllocBytes, lowerGCAllocBytes); - LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot); LOWER_INTRINSIC(safepoint, lowerSafepoint); +// These lowerings preserve the CI and do not erase them from the parent +#define LOWER_WB_INTRINSIC(INTRINSIC, LOWER_INTRINSIC_FUNC) \ + auto INTRINSIC = getOrNull(jl_intrinsics::INTRINSIC); \ + if (INTRINSIC == callee) { \ + LOWER_INTRINSIC_FUNC(CI, F); \ + ++it; \ + continue; \ + } \ + + LOWER_WB_INTRINSIC(queueGCRoot, lowerQueueGCRoot); + #ifdef MMTK_GC - LOWER_INTRINSIC(writeBarrier1, lowerWriteBarrier1); - LOWER_INTRINSIC(writeBarrier2, lowerWriteBarrier2); - LOWER_INTRINSIC(writeBarrier1Slow, lowerWriteBarrier1Slow); - LOWER_INTRINSIC(writeBarrier2Slow, lowerWriteBarrier2Slow); + LOWER_WB_INTRINSIC(writeBarrier1, lowerWriteBarrier1); + LOWER_WB_INTRINSIC(writeBarrier2, lowerWriteBarrier2); + LOWER_WB_INTRINSIC(writeBarrier1Slow, lowerWriteBarrier1Slow); + LOWER_WB_INTRINSIC(writeBarrier2Slow, lowerWriteBarrier2Slow); #endif ++it; From 708e4355c2f9397a0012110c04a8bd1d581969e1 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 31 Jul 2024 06:40:12 +0000 Subject: [PATCH 061/116] Remove jl_gc_wb_buf and jl_gc_wb_binding functions --- src/julia_internal.h | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index 493d75f10eebf..530dc3db8e567 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -667,34 +667,6 @@ void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT; void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT; -#ifndef MMTK_GC -STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* -{ - jl_gc_wb(bnd, val); -} - -STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* -{ - // if parent is marked and buf is not - if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) { - jl_task_t *ct = jl_current_task; - gc_setmark_buf(ct->ptls, bufptr, 3, minsz); - } -} - -#else // MMTK_GC - -STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* -{ - mmtk_gc_wb(bnd, val); -} - -STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* -{ - mmtk_gc_wb(parent, (void*)0); -} -#endif // MMTK_GC - JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT; void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT; From f525fef1cdf20579a1404cdfd126ecad8fba6374 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 2 Aug 2024 00:41:29 +0000 Subject: [PATCH 062/116] Pinning generic memory and owners (wip) --- src/genericmemory.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/genericmemory.c b/src/genericmemory.c index 24db8f29f1a12..f3fa3e2d77320 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -54,6 +54,7 @@ jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t is tot = sizeof(jl_genericmemory_t) + sizeof(void*); } m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tot, mtype); + PTR_PIN(m); if (pooled) { data = (char*)m + JL_SMALL_BYTE_ALIGNMENT; @@ -107,9 +108,11 @@ JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str) jl_task_t *ct = jl_current_task; int tsz = sizeof(jl_genericmemory_t) + sizeof(void*); jl_genericmemory_t *m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, jl_memory_uint8_type); + PTR_PIN(m); m->length = jl_string_len(str); m->ptr = jl_string_data(str); jl_genericmemory_data_owner_field(m) = str; + PTR_PIN(str); return m; } @@ -160,6 +163,7 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: too large for system address width"); int tsz = sizeof(jl_genericmemory_t) + sizeof(void*); m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, mtype); + PTR_PIN(m); m->ptr = data; m->length = nel; jl_genericmemory_data_owner_field(m) = own_buffer ? (jl_value_t*)m : NULL; @@ -249,9 +253,11 @@ JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_slice(jl_genericmemory_t *mem, } jl_task_t *ct = jl_current_task; jl_genericmemory_t *newmem = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, sizeof(jl_genericmemory_t) + sizeof(void*), dt); + PTR_PIN(newmem); newmem->length = len; newmem->ptr = data; jl_genericmemory_data_owner_field(newmem) = jl_genericmemory_owner(mem); + PTR_PIN(jl_genericmemory_owner(mem)); return newmem; } From 5cf0dae060a45b1d93fe88d3e0028375c3d9e5eb Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 2 Aug 2024 00:42:02 +0000 Subject: [PATCH 063/116] Inlining write barrier --- src/llvm-late-gc-lowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 7ce2732e4280b..f257afd2c6211 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2617,7 +2617,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { // FIXME: Currently we call write barrier with the src object (parent). // This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all. // But for other MMTk plans, we need to be careful. - const bool INLINE_WRITE_BARRIER = false; + const bool INLINE_WRITE_BARRIER = true; if (CI->getCalledOperand() == write_barrier_func) { if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { if (INLINE_WRITE_BARRIER) { From ec77b2b0e725e35733e81f71b281bb18dc9293b6 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 2 Aug 2024 03:33:09 +0000 Subject: [PATCH 064/116] Adding wb on array copying; undef new macro as well --- src/genericmemory.c | 1 + src/llvm-final-gc-lowering.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/genericmemory.c b/src/genericmemory.c index f3fa3e2d77320..d98c8302d3573 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -273,6 +273,7 @@ JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destda _Atomic(void*) * dest_p = (_Atomic(void*)*)destdata; _Atomic(void*) * src_p = (_Atomic(void*)*)srcdata; jl_value_t *owner = jl_genericmemory_owner(dest); + mmtk_gc_wb(owner, NULL); if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) { jl_value_t *src_owner = jl_genericmemory_owner(src); ssize_t done = 0; diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index f8802a8f62514..401ad983f8c81 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -418,6 +418,7 @@ bool FinalLowerGC::runOnFunction(Function &F) ++it; #undef LOWER_INTRINSIC +#undef LOWER_WB_INTRINSIC } } From 7cc64d592612ec61259e0ed93f7ba64f22277f95 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 2 Aug 2024 05:10:54 +0000 Subject: [PATCH 065/116] Enabled inlined fastpath allocation again --- src/llvm-final-gc-lowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 401ad983f8c81..9090460662c73 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -247,7 +247,7 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk. // Setting this to false will increase allocation overhead a lot, and should only be used for debugging. - const bool INLINE_FASTPATH_ALLOCATION = false; + const bool INLINE_FASTPATH_ALLOCATION = true; if (INLINE_FASTPATH_ALLOCATION) { // Assuming we use the first immix allocator. From 9dc444834e093cd26737f4b0cbf1211400a8701c Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 6 Aug 2024 11:25:59 +0000 Subject: [PATCH 066/116] Enabling fastpath allocation --- src/llvm-final-gc-lowering.cpp | 139 +++++---------------------------- src/llvm-late-gc-lowering.cpp | 139 +++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+), 118 deletions(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 9090460662c73..8bfb5e3b32a5e 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -103,6 +103,7 @@ void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F) builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe); target->replaceAllUsesWith(gcframe); + target->eraseFromParent(); } void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) @@ -130,6 +131,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) gcframe, pgcstack, Align(sizeof(void*))); + target->eraseFromParent(); } void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F) @@ -148,6 +150,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F) pgcstack, Align(sizeof(void*))); inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); + target->eraseFromParent(); } void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F) @@ -167,6 +170,7 @@ void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F) auto gep = builder.CreateInBoundsGEP(T_prjlvalue, gcframe, index); gep->takeName(target); target->replaceAllUsesWith(gep); + target->eraseFromParent(); } void FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F) @@ -183,6 +187,7 @@ void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F) IRBuilder<> builder(target); Value* signal_page = target->getOperand(0); builder.CreateLoad(T_size, signal_page, true); + target->eraseFromParent(); } #ifdef MMTK_GC @@ -209,7 +214,6 @@ void FinalLowerGC::lowerWriteBarrier2Slow(CallInst *target, Function &F) assert(target->arg_size() == 2); target->setCalledFunction(writeBarrier2SlowFunc); } - #endif void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) @@ -235,97 +239,11 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) derefBytes = sz; } else { - #ifndef MMTK_GC auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset); auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize, type }); if (sz > 0) derefBytes = sz; - #else // MMTK_GC - auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); - auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); - - // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk. - // Setting this to false will increase allocation overhead a lot, and should only be used for debugging. - const bool INLINE_FASTPATH_ALLOCATION = true; - - if (INLINE_FASTPATH_ALLOCATION) { - // Assuming we use the first immix allocator. - // FIXME: We should get the allocator index and type from MMTk. - auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); - - auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); - auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit)); - - auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); - auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); - auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); - - // offset = 8 - auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); - auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); - auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); - // alignment 16 (15 = 16 - 1) - auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); - auto result = builder.CreateNSWAdd(cursor, delta, "result"); - - auto new_cursor = builder.CreateNSWAdd(result, pool_osize); - - auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos); - auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr"); - auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit"); - - auto gt_limit = builder.CreateICmpSGT(new_cursor, limit); - - auto current_block = target->getParent(); - builder.SetInsertPoint(target->getNextNode()); - auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow"); - auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont"); - - auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction()); - auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont); - - auto next_br = current_block->getTerminator(); - next_br->eraseFromParent(); - builder.SetInsertPoint(current_block); - builder.CreateCondBr(gt_limit, slowpath, fastpath); - - // slowpath - builder.SetInsertPoint(slowpath); - auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type }); - new_call->setAttributes(new_call->getCalledFunction()->getAttributes()); - builder.CreateBr(top_cont); - - // // fastpath - builder.SetInsertPoint(fastpath); - builder.CreateStore(new_cursor, cursor_ptr); - - // ptls->gc_num.allocd += osize; - auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); - auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); - auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); - auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); - auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); - builder.CreateStore(pool_allocd_total, pool_alloc_tls); - - auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); - auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); - builder.CreateBr(top_cont); - - phiNode->addIncoming(new_call, slowpath); - phiNode->addIncoming(v_as_ptr, fastpath); - phiNode->takeName(target); - - target->replaceAllUsesWith(phiNode); - return; - } else { - auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type }); - if (sz > 0) - derefBytes = sz; - } - #endif // MMTK_GC } } else { auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size); @@ -333,7 +251,6 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) newI = builder.CreateCall(allocTypedFunc, { ptls, size, type }); derefBytes = sizeof(void*); } - newI->setAttributes(newI->getCalledFunction()->getAttributes()); unsigned align = std::max((unsigned)target->getRetAlign().valueOrOne().value(), (unsigned)sizeof(void*)); newI->addRetAttr(Attribute::getWithAlignment(F.getContext(), Align(align))); @@ -341,6 +258,7 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) newI->addDereferenceableRetAttr(derefBytes); newI->takeName(target); target->replaceAllUsesWith(newI); + target->eraseFromParent(); } bool FinalLowerGC::runOnFunction(Function &F) @@ -362,63 +280,48 @@ bool FinalLowerGC::runOnFunction(Function &F) poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc); bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc); allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped); - T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); - #ifdef MMTK_GC writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1); writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2); writeBarrier1SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier1Slow); writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow); #endif + T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); // Lower all calls to supported intrinsics. for (auto &BB : F) { - for (auto it = BB.begin(); it != BB.end();) { - auto *CI = dyn_cast(&*it); - if (!CI) { - ++it; + for (auto &I : make_early_inc_range(BB)) { + auto *CI = dyn_cast(&I); + if (!CI) continue; - } Value *callee = CI->getCalledOperand(); assert(callee); #define LOWER_INTRINSIC(INTRINSIC, LOWER_INTRINSIC_FUNC) \ - auto INTRINSIC = getOrNull(jl_intrinsics::INTRINSIC); \ - if (INTRINSIC == callee) { \ - LOWER_INTRINSIC_FUNC(CI, F); \ - it = CI->eraseFromParent(); \ - continue; \ - } \ + do { \ + auto intrinsic = getOrNull(jl_intrinsics::INTRINSIC); \ + if (intrinsic == callee) { \ + LOWER_INTRINSIC_FUNC(CI, F); \ + } \ + } while (0) LOWER_INTRINSIC(newGCFrame, lowerNewGCFrame); LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame); LOWER_INTRINSIC(popGCFrame, lowerPopGCFrame); LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot); LOWER_INTRINSIC(GCAllocBytes, lowerGCAllocBytes); + LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot); LOWER_INTRINSIC(safepoint, lowerSafepoint); -// These lowerings preserve the CI and do not erase them from the parent -#define LOWER_WB_INTRINSIC(INTRINSIC, LOWER_INTRINSIC_FUNC) \ - auto INTRINSIC = getOrNull(jl_intrinsics::INTRINSIC); \ - if (INTRINSIC == callee) { \ - LOWER_INTRINSIC_FUNC(CI, F); \ - ++it; \ - continue; \ - } \ - - LOWER_WB_INTRINSIC(queueGCRoot, lowerQueueGCRoot); - #ifdef MMTK_GC - LOWER_WB_INTRINSIC(writeBarrier1, lowerWriteBarrier1); - LOWER_WB_INTRINSIC(writeBarrier2, lowerWriteBarrier2); - LOWER_WB_INTRINSIC(writeBarrier1Slow, lowerWriteBarrier1Slow); - LOWER_WB_INTRINSIC(writeBarrier2Slow, lowerWriteBarrier2Slow); + LOWER_INTRINSIC(writeBarrier1, lowerNewGCFrame); + LOWER_INTRINSIC(writeBarrier2, lowerNewGCFrame); + LOWER_INTRINSIC(writeBarrier1Slow, lowerNewGCFrame); + LOWER_INTRINSIC(writeBarrier2Slow, lowerNewGCFrame); #endif - ++it; #undef LOWER_INTRINSIC -#undef LOWER_WB_INTRINSIC } } diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index f257afd2c6211..eb63e1196e8ae 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -323,6 +324,7 @@ struct LateLowerGCFrame: private JuliaPassContext { private: CallInst *pgcstack; + Function *poolAllocFunc; void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef &SafepointsSoFar, SmallVector &&RefinedPtr = SmallVector()); @@ -359,6 +361,10 @@ struct LateLowerGCFrame: private JuliaPassContext { void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef CalleeRoots); Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V); Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V); + +#ifdef MMTK_GC + Value* lowerGCAllocBytesLate(CallInst *target, Function &F); +#endif }; static unsigned getValueAddrSpace(Value *V) { @@ -2880,8 +2886,118 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl &Colors, St } } +Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) +{ + assert(target->arg_size() == 3); + + IRBuilder<> builder(target); + auto ptls = target->getArgOperand(0); + auto type = target->getArgOperand(2); + if (auto CI = dyn_cast(target->getArgOperand(1))) { + size_t sz = (size_t)CI->getZExtValue(); + // This is strongly architecture and OS dependent + int osize; + int offset = jl_gc_classify_pools(sz, &osize); + if (offset >= 0) { + // In this case julia.gc_alloc_bytes will simply become a call to jl_gc_pool_alloc in the final GC lowering pass + auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); + auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); + + // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk. + // Setting this to false will increase allocation overhead a lot, and should only be used for debugging. + const bool INLINE_FASTPATH_ALLOCATION = true; + + if (INLINE_FASTPATH_ALLOCATION) { + // Assuming we use the first immix allocator. + // FIXME: We should get the allocator index and type from MMTk. + auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); + + auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); + auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit)); + + auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); + auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); + auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); + + // offset = 8 + auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); + auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); + auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); + // alignment 16 (15 = 16 - 1) + auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); + auto result = builder.CreateNSWAdd(cursor, delta, "result"); + + auto new_cursor = builder.CreateNSWAdd(result, pool_osize); + + auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos); + auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr"); + auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit"); + + auto gt_limit = builder.CreateICmpSGT(new_cursor, limit); + + auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction()); + auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction()); + + auto next_instr = target->getNextNode(); + DomTreeUpdater dtu = DomTreeUpdater(&GetDT(), llvm::DomTreeUpdater::UpdateStrategy::Lazy); + MDBuilder MDB(F.getContext()); + SmallVector Weights{1, 9}; + SplitBlockAndInsertIfThenElse(gt_limit, next_instr, &slowpath, &fastpath, false, false, MDB.createBranchWeights(Weights), &dtu); + + builder.SetInsertPoint(next_instr); + auto phiNode = builder.CreatePHI(target->getCalledFunction()->getReturnType(), 2, "phi_fast_slow"); + + // slowpath + builder.SetInsertPoint(slowpath); + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type }); + new_call->setAttributes(new_call->getCalledFunction()->getAttributes()); + builder.CreateBr(next_instr->getParent()); + + // // fastpath + builder.SetInsertPoint(fastpath); + builder.CreateStore(new_cursor, cursor_ptr); + + // ptls->gc_num.allocd += osize; + // auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num)); + // auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); + // auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); + // auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); + // auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); + // builder.CreateStore(pool_allocd_total, pool_alloc_tls); + + auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); + auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); + builder.CreateBr(next_instr->getParent()); + + phiNode->addIncoming(new_call, slowpath); + phiNode->addIncoming(v_as_ptr, fastpath); + phiNode->takeName(target); + return phiNode; + } + } + } + return target; +} + +template +static void replaceInstruction( + Instruction *oldInstruction, + Value *newInstruction, + TIterator &it) +{ + if (newInstruction != oldInstruction) { + oldInstruction->replaceAllUsesWith(newInstruction); + it = oldInstruction->eraseFromParent(); + } + else { + ++it; + } +} + bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { initAll(*F.getParent()); + poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc); LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n"); if (!pgcstack_getter && !adoptthread_func) return CleanupIR(F, nullptr, CFGModified); @@ -2896,6 +3012,29 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { std::map> CallFrames; // = OptimizeCallFrames(S, Ordering); PlaceRootsAndUpdateCalls(Colors, S, CallFrames); CleanupIR(F, &S, CFGModified); + +#ifdef MMTK_GC + // We lower the julia.gc_alloc_bytes intrinsic in this pass to insert slowpath/fastpath blocks for MMTk + for (BasicBlock &BB : F) { + for (auto it = BB.begin(); it != BB.end();) { + auto *CI = dyn_cast(&*it); + if (!CI) { + ++it; + continue; + } + + Value *callee = CI->getCalledOperand(); + assert(callee); + + auto GCAllocBytes = getOrNull(jl_intrinsics::GCAllocBytes); + if (GCAllocBytes == callee) { + replaceInstruction(CI, lowerGCAllocBytesLate(CI, F), it); + continue; + } + ++it; + } + } +#endif return true; } From df66882e342deac11ac7c824c0452edfc0c8a375 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 7 Aug 2024 02:24:01 +0000 Subject: [PATCH 067/116] Fixing typos when lowering write barriers --- src/llvm-final-gc-lowering.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 8bfb5e3b32a5e..dd4dd05a89101 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -315,10 +315,10 @@ bool FinalLowerGC::runOnFunction(Function &F) LOWER_INTRINSIC(safepoint, lowerSafepoint); #ifdef MMTK_GC - LOWER_INTRINSIC(writeBarrier1, lowerNewGCFrame); - LOWER_INTRINSIC(writeBarrier2, lowerNewGCFrame); - LOWER_INTRINSIC(writeBarrier1Slow, lowerNewGCFrame); - LOWER_INTRINSIC(writeBarrier2Slow, lowerNewGCFrame); + LOWER_INTRINSIC(writeBarrier1, lowerWriteBarrier1); + LOWER_INTRINSIC(writeBarrier2, lowerWriteBarrier2); + LOWER_INTRINSIC(writeBarrier1Slow, lowerWriteBarrier1Slow); + LOWER_INTRINSIC(writeBarrier2Slow, lowerWriteBarrier2Slow); #endif #undef LOWER_INTRINSIC From 8ef0c1547ea49c5fe4033ccf2362bd2932c2a226 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 7 Aug 2024 03:32:28 +0000 Subject: [PATCH 068/116] Updating fastpath allocation to count number of alloced bytes --- src/llvm-late-gc-lowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index eb63e1196e8ae..2630afede0f7b 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2959,12 +2959,12 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) builder.CreateStore(new_cursor, cursor_ptr); // ptls->gc_num.allocd += osize; - // auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num)); - // auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); - // auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); - // auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); - // auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); - // builder.CreateStore(pool_allocd_total, pool_alloc_tls); + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); + auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); + auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); + auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); + auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); + builder.CreateStore(pool_allocd_total, pool_alloc_tls); auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); From b56b167845583bc9adbbffee64741bb9ffab80bd Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 7 Aug 2024 05:00:56 +0000 Subject: [PATCH 069/116] Updating write barrier block splitting --- src/llvm-late-gc-lowering.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 2630afede0f7b..5923214a47652 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -363,7 +363,7 @@ struct LateLowerGCFrame: private JuliaPassContext { Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V); #ifdef MMTK_GC - Value* lowerGCAllocBytesLate(CallInst *target, Function &F); + Value* lowerGCAllocBytesLate(CallInst *target, Function &F, State &S); #endif }; @@ -2655,7 +2655,11 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { // object_reference_write_slow_call((void*) src, (void*) slot, (void*) target); MDBuilder MDB(F.getContext()); SmallVector Weights{1, 9}; - auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights)); + if (!S->DT) { + S->DT = &GetDT(); + } + DomTreeUpdater dtu = DomTreeUpdater(S->DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy); + auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights), &dtu); builder.SetInsertPoint(mayTriggerSlowpath); builder.CreateCall(getOrDeclare(jl_intrinsics::writeBarrier1Slow), { parent }); } else { @@ -2886,7 +2890,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl &Colors, St } } -Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) +Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F, State &S) { assert(target->arg_size() == 3); @@ -2939,7 +2943,10 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction()); auto next_instr = target->getNextNode(); - DomTreeUpdater dtu = DomTreeUpdater(&GetDT(), llvm::DomTreeUpdater::UpdateStrategy::Lazy); + if (!S.DT) { + S.DT = &GetDT(); + } + DomTreeUpdater dtu = DomTreeUpdater(S.DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy); MDBuilder MDB(F.getContext()); SmallVector Weights{1, 9}; SplitBlockAndInsertIfThenElse(gt_limit, next_instr, &slowpath, &fastpath, false, false, MDB.createBranchWeights(Weights), &dtu); @@ -3028,7 +3035,7 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { auto GCAllocBytes = getOrNull(jl_intrinsics::GCAllocBytes); if (GCAllocBytes == callee) { - replaceInstruction(CI, lowerGCAllocBytesLate(CI, F), it); + replaceInstruction(CI, lowerGCAllocBytesLate(CI, F, S), it); continue; } ++it; From 5bbfd16ef5e10c074a5c5739801a47f459932500 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 15 Aug 2024 05:55:12 +0000 Subject: [PATCH 070/116] Only pin owners, not all generic memory objects --- src/genericmemory.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/genericmemory.c b/src/genericmemory.c index d98c8302d3573..6851e9131e534 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -54,7 +54,6 @@ jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t is tot = sizeof(jl_genericmemory_t) + sizeof(void*); } m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tot, mtype); - PTR_PIN(m); if (pooled) { data = (char*)m + JL_SMALL_BYTE_ALIGNMENT; @@ -108,7 +107,6 @@ JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str) jl_task_t *ct = jl_current_task; int tsz = sizeof(jl_genericmemory_t) + sizeof(void*); jl_genericmemory_t *m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, jl_memory_uint8_type); - PTR_PIN(m); m->length = jl_string_len(str); m->ptr = jl_string_data(str); jl_genericmemory_data_owner_field(m) = str; @@ -163,11 +161,11 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: too large for system address width"); int tsz = sizeof(jl_genericmemory_t) + sizeof(void*); m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, mtype); - PTR_PIN(m); m->ptr = data; m->length = nel; jl_genericmemory_data_owner_field(m) = own_buffer ? (jl_value_t*)m : NULL; if (own_buffer) { + PTR_PIN(m); int isaligned = 0; // TODO: allow passing memalign'd buffers jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned); jl_gc_count_allocd(nel*elsz); From df35d17559c67c7fc61ca683ea10fc80addb71bb Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 28 Aug 2024 00:44:09 +0000 Subject: [PATCH 071/116] Refactoring to be considered before adding MMTk --- src/gc-common.c | 156 +++++++++++++++++++++++++++++++++++++++++++ src/gc-common.h | 6 ++ src/gc-debug.c | 41 +----------- src/gc-interface.h | 12 ++++ src/gc-stacks.c | 4 +- src/gc-stock.c | 156 ++++++++++++------------------------------- src/gc-stock.h | 21 ------ src/julia.h | 2 +- src/julia_internal.h | 26 +------- src/scheduler.c | 11 +++ src/stackwalk.c | 4 +- src/staticdata.c | 2 + 12 files changed, 237 insertions(+), 204 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index ee461b576ea9e..2ec167caa667a 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -20,6 +20,11 @@ extern "C" { jl_gc_num_t gc_num = {0}; +JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) +{ + return gc_num.total_time; +} + // =========================================================================== // // GC Callbacks // =========================================================================== // @@ -489,6 +494,87 @@ jl_ptls_t* gc_all_tls_states; // MISC // =========================================================================== // +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_new_weakref_th(ptls, value); +} + +JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc(ptls, sz, ty); +} + +JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sz, NULL); +} + +// allocation wrappers that save the size of allocations, to allow using +// jl_gc_counted_* functions with a libc-compatible API. + +JL_DLLEXPORT void *jl_malloc(size_t sz) +{ + int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); + if (p == NULL) + return NULL; + p[0] = sz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); + if (p == NULL) + return NULL; + p[0] = nmsz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +{ + if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) + return NULL; + return _unchecked_calloc(nm, sz); +} + +JL_DLLEXPORT void jl_free(void *p) +{ + if (p != NULL) { + int64_t *pp = (int64_t *)p - 2; + size_t sz = pp[0]; + jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); + } +} + +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +{ + int64_t *pp; + size_t szold; + if (p == NULL) { + pp = NULL; + szold = 0; + } + else { + pp = (int64_t *)p - 2; + szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; + } + int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); + if (pnew == NULL) + return NULL; + pnew[0] = sz; + return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +// allocator entry points + +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_(ptls, sz, ty); +} + const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { @@ -501,6 +587,76 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } +size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; + size_t sz = layout->size * m->length; + if (layout->flags.arrayelem_isunion) + // account for isbits Union array selector bytes + sz += m->length; + return sz; +} + +// tracking Memorys with malloc'd storage +void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ + // This is **NOT** a GC safe point. + mallocmemory_t *ma; + if (ptls->gc_tls.heap.mafreelist == NULL) { + ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); + } + else { + ma = ptls->gc_tls.heap.mafreelist; + ptls->gc_tls.heap.mafreelist = ma->next; + } + ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); + ma->next = ptls->gc_tls.heap.mallocarrays; + ptls->gc_tls.heap.mallocarrays = ma; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + +// gc-debug common functions +// --- + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + #ifdef __cplusplus } #endif diff --git a/src/gc-common.h b/src/gc-common.h index 4d53830442a7d..154b9659e9ccb 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -53,6 +53,12 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure; // malloc wrappers, aligned allocation // =========================================================================== // +// data structure for tracking malloc'd genericmemory. +typedef struct _mallocmemory_t { + jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory + struct _mallocmemory_t *next; +} mallocmemory_t; + #if defined(_OS_WINDOWS_) STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { diff --git a/src/gc-debug.c b/src/gc-debug.c index 19dd93af5f236..d05fb4b49e9f7 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,46 +1105,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} +extern int gc_logging_enabled; void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { diff --git a/src/gc-interface.h b/src/gc-interface.h index e543b4b5879f1..682f22344d69d 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -128,6 +128,13 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void); // Allocation // ========================================================================= // +// On GCC, this function is inlined when sz is constant (see julia_internal.h) +// In general, this function should implement allocation and should use the specific GC's logic +// to decide whether to allocate a small or a large object. Finally, note that this function +// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record +// an allocation of that type in the allocation profiler. +struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); + // Allocates small objects and increments Julia allocation counterst. Size of the object // header must be included in the object size. The (possibly unused in some implementations) // offset to the arena in which we're allocating is passed in the second parameter, and the @@ -211,6 +218,11 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; +// This function notifies the GC about memory addresses that are set when loading the boot image. +// The GC may use that information to, for instance, determine that such objects should +// be treated as marked and belonged to the old generation in nursery collections. +void jl_gc_notify_image_load(const char* img_data, size_t len); + // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 783129ea97693..8c44b65284386 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -46,7 +46,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { VirtualFree(stkbuf, 0, MEM_RELEASE); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); @@ -81,7 +81,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT return stk; } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { munmap(stkbuf, bufsz); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); diff --git a/src/gc-stock.c b/src/gc-stock.c index d25f8917f302d..4a8c6fe7decc5 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -553,24 +553,6 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT gc_time_big_end(); } -// tracking Memorys with malloc'd storage - -void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ - // This is **NOT** a GC safe point. - mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { - ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); - } - else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; - } - ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; -} - - void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; @@ -647,17 +629,6 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT reset_thread_gc_counts(); } -size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT -{ - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; - size_t sz = layout->size * m->length; - if (layout->flags.arrayelem_isunion) - // account for isbits Union array selector bytes - sz += m->length; - return sz; -} - - static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT { assert(jl_is_genericmemory(v)); @@ -816,6 +787,29 @@ jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) { return jl_gc_small_alloc_inner(ptls, offset, osize); } +// Size does NOT include the type tag!! +inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + int pool_id = jl_gc_szclass(allocsz); + jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; + int osize = jl_gc_sizeclasses[pool_id]; + // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in + // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) + v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_gc_big_alloc_noinline(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) @@ -2792,6 +2786,21 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } +int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -2830,11 +2839,6 @@ JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); } -JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) -{ - return gc_num.total_time; -} - JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; @@ -3386,13 +3390,6 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) gc_mark_roots(mq); } -// allocator entry points - -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc_(ptls, sz, ty); -} - // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { @@ -3674,63 +3671,6 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size return data; } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) -{ - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) -{ - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); -} - -JL_DLLEXPORT void jl_free(void *p) -{ - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } -} - -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) -{ - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - // allocating blocks for Arrays and Strings JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) @@ -3864,18 +3804,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - -JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sz, NULL); -} - JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { if (jl_is_initialized()) { @@ -4003,14 +3931,14 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) } -JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { - return jl_gc_alloc(ptls, sz, ty); + arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +void jl_gc_notify_image_load(const char* img_data, size_t len) { - arraylist_push(&ptls->gc_tls.sweep_objs, obj); + // Do nothing } #ifdef __cplusplus diff --git a/src/gc-stock.h b/src/gc-stock.h index 45c93bf4289ae..3f3900b349bcf 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -106,12 +106,6 @@ JL_EXTENSION typedef struct _bigval_t { // must be 64-byte aligned here, in 32 & 64 bit modes } bigval_t; -// data structure for tracking malloc'd genericmemory. -typedef struct _mallocmemory_t { - jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory - struct _mallocmemory_t *next; -} mallocmemory_t; - // pool page metadata typedef struct _jl_gc_pagemeta_t { // next metadata structure in per-thread list @@ -428,21 +422,6 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } -STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; -} - STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index abb8a57ff13b0..db57db1fbeb38 100644 --- a/src/julia.h +++ b/src/julia.h @@ -850,7 +850,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index f00667d016796..edddb68754fc3 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -513,30 +513,6 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE #define GC_MAX_SZCLASS (2032-sizeof(void*)) static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); - -// Size does NOT include the type tag!! -STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) -{ - jl_value_t *v; - const size_t allocsz = sz + sizeof(jl_taggedvalue_t); - if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); - jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; - int osize = jl_gc_sizeclasses[pool_id]; - // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in - // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) - v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); - } - else { - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - v = jl_gc_big_alloc_noinline(ptls, allocsz); - } - jl_set_typeof(v, ty); - maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); - return v; -} - /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a * gc frame, until it has been fully initialized. An uninitialized value in a * gc frame can crash upon encountering the first safepoint. By delaying use of @@ -1074,7 +1050,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern _Atomic(uint32_t) jl_gc_disable_counter; +extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..b85a481588e4f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,9 +80,20 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } +<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; +======= +// parallel task runtime +// --- + +JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return cong(max, &ptls->rngseed); +} +>>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) diff --git a/src/stackwalk.c b/src/stackwalk.c index 6aa36fa8b499c..5f28b61c4a8fe 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -5,7 +5,7 @@ utilities for walking the stack and looking up information about code addresses */ #include -#include "gc-stock.h" +#include "gc-common.h" #include "julia.h" #include "julia_internal.h" #include "threading.h" @@ -1294,6 +1294,8 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; +extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; +extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT diff --git a/src/staticdata.c b/src/staticdata.c index 363aa46b62221..e07a5365bf06f 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -654,6 +654,7 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -3899,6 +3900,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From d2f2b8d9c477514e93009d0b99e2ffe65bcc9831 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 29 Aug 2024 04:57:59 +0000 Subject: [PATCH 072/116] Removing jl_gc_notify_image_load, since it's a new function and not part of the refactoring --- src/gc-interface.h | 5 ----- src/gc-stock.c | 5 ----- src/staticdata.c | 2 -- 3 files changed, 12 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 682f22344d69d..25ffed4524f0c 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -218,11 +218,6 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; -// This function notifies the GC about memory addresses that are set when loading the boot image. -// The GC may use that information to, for instance, determine that such objects should -// be treated as marked and belonged to the old generation in nursery collections. -void jl_gc_notify_image_load(const char* img_data, size_t len); - // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stock.c b/src/gc-stock.c index 4a8c6fe7decc5..9b633cacd7870 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3936,11 +3936,6 @@ JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *o arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -void jl_gc_notify_image_load(const char* img_data, size_t len) -{ - // Do nothing -} - #ifdef __cplusplus } #endif diff --git a/src/staticdata.c b/src/staticdata.c index e07a5365bf06f..363aa46b62221 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -654,7 +654,6 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -3900,7 +3899,6 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From a42cb6410cf4f3e1773b0e41ecb5c696bc9cf836 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 2 Sep 2024 01:27:08 +0000 Subject: [PATCH 073/116] Moving gc_enable code to gc-common.c --- src/gc-common.c | 30 ++++++++++++++++++++++++++++++ src/gc-stock.c | 30 ------------------------------ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 2ec167caa667a..03c046bc300f2 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -613,6 +613,36 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// collector entry point and control +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) +{ + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + } + } + else if (prev && !on) { + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + int gc_logging_enabled = 0; JL_DLLEXPORT void jl_enable_gc_logging(int enable) { diff --git a/src/gc-stock.c b/src/gc-stock.c index 9b633cacd7870..61fc8d4e83a3a 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2801,36 +2801,6 @@ int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT return tid == concurrent_collector_thread_id; } -// collector entry point and control -_Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) -{ - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; - } - } - else if (prev && !on) { - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; -} - -JL_DLLEXPORT int jl_gc_is_enabled(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT { jl_gc_num_t num = gc_num; From 92563918292056178d6f6ed12c58a9f998ef2d54 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 16 Sep 2024 06:38:02 +0000 Subject: [PATCH 074/116] Addressing PR comments --- src/gc-common.c | 134 +++++++++++++++++++++++++------------------ src/gc-common.h | 6 ++ src/gc-debug.c | 2 - src/gc-interface.h | 30 +--------- src/gc-stock.c | 18 +----- src/gc-stock.h | 15 +++++ src/julia.h | 2 +- src/julia_internal.h | 4 +- src/stackwalk.c | 10 +--- 9 files changed, 110 insertions(+), 111 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 03c046bc300f2..046feae6aa4c5 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -491,15 +491,9 @@ int gc_n_threads; jl_ptls_t* gc_all_tls_states; // =========================================================================== // -// MISC +// Allocation // =========================================================================== // -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) { return jl_gc_alloc(ptls, sz, ty); @@ -575,17 +569,9 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) return jl_gc_alloc_(ptls, sz, ty); } -const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 -JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT -{ - return jl_buff_tag; -} - -// callback for passing OOM errors from gmp -JL_DLLEXPORT void jl_throw_out_of_memory_error(void) -{ - jl_throw(jl_memory_exception); -} +// =========================================================================== // +// Generic Memory +// =========================================================================== // size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT { @@ -613,6 +599,66 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// =========================================================================== // +// GC Debug +// =========================================================================== // + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + +// =========================================================================== // +// GC Control +// =========================================================================== // + +JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) { + return jl_atomic_load_acquire(&jl_gc_disable_counter); +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -637,54 +683,30 @@ JL_DLLEXPORT int jl_gc_enable(int on) return prev; } -JL_DLLEXPORT int jl_gc_is_enabled(void) +// =========================================================================== // +// MISC +// =========================================================================== // + +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) { jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - -int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; + return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; +JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { + return ijl_small_typeof; } -// gc-debug common functions -// --- - -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; + return jl_buff_tag; } -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +// callback for passing OOM errors from gmp +JL_DLLEXPORT void jl_throw_out_of_memory_error(void) { - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; + jl_throw(jl_memory_exception); } #ifdef __cplusplus diff --git a/src/gc-common.h b/src/gc-common.h index 154b9659e9ccb..32b7470b13a58 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -179,4 +179,10 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; +// =========================================================================== // +// Logging +// =========================================================================== // + +extern int gc_logging_enabled; + #endif // JL_GC_COMMON_H diff --git a/src/gc-debug.c b/src/gc-debug.c index d05fb4b49e9f7..7c479484cde45 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,8 +1105,6 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -extern int gc_logging_enabled; - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; diff --git a/src/gc-interface.h b/src/gc-interface.h index 25ffed4524f0c..0e9ce32697f35 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -94,6 +94,8 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); // should run a collection cycle again (e.g. a full mark right after a full sweep to ensure // we do a full heap traversal). JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); +// Returns whether the thread with `tid` is a collector thread +JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; // ========================================================================= // // Metrics @@ -162,26 +164,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz); JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz); // Wrapper around Libc realloc that updates Julia allocation counters. JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); -// Wrapper around Libc malloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_malloc(size_t sz); -// Wrapper around Libc calloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz); -// Wrapper around Libc free that takes a pointer to the payload of a memory region allocated -// with jl_malloc or jl_calloc, and uses the size information stored in the first machine -// words of the memory buffer update Julia allocation counters, and then frees the -// corresponding memory buffer. -JL_DLLEXPORT void jl_free(void *p); -// Wrapper around Libc realloc that takes a memory region allocated with jl_malloc or -// jl_calloc, and uses the size information stored in the first machine words of the memory -// buffer to update Julia allocation counters, reallocating the corresponding memory buffer -// in the end. -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz); // Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and // Strings. It increments Julia allocation counters and should check whether we're close to // the Julia heap target, and therefore, whether we should run a collection. Note that this @@ -195,14 +177,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); // thread-local allocator of the thread referenced by the first jl_ptls_t argument. JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls, struct _jl_value_t *value); -// Allocates a new weak-reference, assigns its value and increments Julia allocation -// counters. If thread-local allocators are used, then this function should allocate in the -// thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref(struct _jl_value_t *value); -// Allocates an object whose size is specified by the function argument and increments Julia -// allocation counters. If thread-local allocators are used, then this function should -// allocate in the thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_value_t *jl_gc_allocobj(size_t sz); // Permanently allocates a memory slot of the size specified by the first parameter. This // block of memory is allocated in an immortal region that is never swept. The second // parameter specifies whether the memory should be filled with zeros. The third and fourth diff --git a/src/gc-stock.c b/src/gc-stock.c index 61fc8d4e83a3a..3ff37566dc6c7 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2786,19 +2786,8 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; +int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT { + return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid); } JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT @@ -3182,8 +3171,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // free empty GC state for threads that have exited if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit - assert(!gc_is_parallel_collector_thread(t_i)); - assert(!gc_is_concurrent_collector_thread(t_i)); + assert(!gc_is_collector_thread(t_i)); jl_thread_heap_t *heap = &ptls2->gc_tls.heap; if (heap->weak_refs.len == 0) small_arraylist_free(&heap->weak_refs); diff --git a/src/gc-stock.h b/src/gc-stock.h index 3f3900b349bcf..50eca3aadbd86 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -422,6 +422,21 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } +STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index db57db1fbeb38..abb8a57ff13b0 100644 --- a/src/julia.h +++ b/src/julia.h @@ -850,7 +850,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index edddb68754fc3..e677f40907dfd 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -367,6 +367,8 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; +extern void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; + JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; @@ -1050,7 +1052,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; +extern _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/stackwalk.c b/src/stackwalk.c index 5f28b61c4a8fe..a1de3a6d61a07 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1294,8 +1294,6 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; -extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; -extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT @@ -1304,12 +1302,8 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); for (size_t i = 0; i < nthreads; i++) { jl_ptls_t ptls2 = allstates[i]; - if (gc_is_parallel_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for parallel GC thread %zu\n", i + 1); - continue; - } - if (gc_is_concurrent_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for concurrent GC thread %zu\n", i + 1); + if (gc_is_collector_thread(i)) { + jl_safe_printf("==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1); continue; } if (ptls2 == NULL) { From ec398e1a98cf713a77f908a459ed37fd4b25af27 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 19 Sep 2024 04:18:13 +0000 Subject: [PATCH 075/116] Push resolution of merge conflict --- src/scheduler.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index b85a481588e4f..bb2f85b52283f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,20 +80,9 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; -======= -// parallel task runtime -// --- - -JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return cong(max, &ptls->rngseed); -} ->>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) From 68e5e11a229f253ec6de966a321bd9d3de453a3b Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 01:10:31 +0000 Subject: [PATCH 076/116] Removing jl_gc_mark_queue_obj_explicit extern definition from scheduler.c --- src/scheduler.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..7e23f654c2566 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,10 +80,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -// GC functions used -extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, - jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; - // initialize the threading infrastructure // (called only by the main thread) void jl_init_threadinginfra(void) From c23f0db8347f475e1eb2b37261dd4816537210fa Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 02:50:25 +0000 Subject: [PATCH 077/116] Don't need the getter function since it's possible to use jl_small_typeof directly --- src/gc-common.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 046feae6aa4c5..417f12f26d64d 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -693,10 +693,6 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { - return ijl_small_typeof; -} - const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { From 4bfcfe5df056bb5066a545e29c29463722678892 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 27 Aug 2024 06:47:41 +0000 Subject: [PATCH 078/116] WIP: Adding support for MMTk/Immix --- Make.inc | 47 ++ contrib/refresh_checksums.mk | 2 +- src/Makefile | 43 +- src/builtins.c | 1 + src/gc-common.c | 70 +++ src/gc-debug.c | 4 +- src/gc-heap-snapshot.cpp | 1 - src/gc-interface.h | 3 + src/gc-mmtk.c | 843 +++++++++++++++++++++++++++++++++++ src/gc-mmtk.h | 34 ++ src/gc-page-profiler.c | 4 +- src/gc-pages.c | 4 +- src/gc-stock.c | 14 +- src/gc-stock.h | 18 +- src/gc-tls-mmtk.h | 49 ++ src/gc-tls.h | 4 + src/julia.h | 2 +- src/julia_internal.h | 2 +- src/julia_threads.h | 4 + src/stackwalk.c | 2 + src/staticdata.c | 2 + src/threading.c | 4 + 22 files changed, 1123 insertions(+), 34 deletions(-) create mode 100644 src/gc-mmtk.c create mode 100644 src/gc-mmtk.h create mode 100644 src/gc-tls-mmtk.h diff --git a/Make.inc b/Make.inc index f078a0c84f806..039755ce34098 100644 --- a/Make.inc +++ b/Make.inc @@ -86,6 +86,9 @@ HAVE_SSP := 0 WITH_GC_VERIFY := 0 WITH_GC_DEBUG_ENV := 0 +# Use MMTk GC +WITH_MMTK ?= 0 + # Enable DTrace support WITH_DTRACE := 0 @@ -790,6 +793,44 @@ JCXXFLAGS += -DGC_DEBUG_ENV JCFLAGS += -DGC_DEBUG_ENV endif +ifeq ($(WITH_MMTK), 1) +ifeq (${MMTK_JULIA_DIR},) +$(error MMTK_JULIA_DIR must be set to use MMTk) +endif +JCXXFLAGS += -DMMTK_GC +JCFLAGS += -DMMTK_GC +ifeq (${MMTK_BUILD},) +ifeq (debug,$(findstring debug,$(MAKECMDGOALS))) +MMTK_BUILD = debug +else +MMTK_BUILD = release +endif +endif +ifeq (${MMTK_PLAN},Immix) +JCXXFLAGS += -DMMTK_PLAN_IMMIX +JCFLAGS += -DMMTK_PLAN_IMMIX +endif +ifeq (${MMTK_PLAN},StickyImmix) +JCXXFLAGS += -DMMTK_PLAN_STICKYIMMIX +JCFLAGS += -DMMTK_PLAN_STICKYIMMIX +endif +MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk +MMTK_API_INC = $(MMTK_DIR)/api +MMTK_JULIA_INC = ${MMTK_JULIA_DIR}/julia +ifeq ($(OS),Linux) +MMTK_LIB_NAME := libmmtk_julia.so +else +$(error "Unsupported OS for MMTk") +endif +MMTK_LIB_SRC := $(MMTK_DIR)/target/$(MMTK_BUILD)/$(MMTK_LIB_NAME) +MMTK_LIB_DST := $(BUILDROOT)/usr/lib/$(MMTK_LIB_NAME) +MMTK_LIB := -lmmtk_julia +LDFLAGS += -Wl,-rpath=$(MMTK_DIR)/target/$(MMTK_BUILD)/ +else +MMTK_JULIA_INC := +MMTK_LIB := +endif + ifeq ($(WITH_DTRACE), 1) JCXXFLAGS += -DUSE_DTRACE JCFLAGS += -DUSE_DTRACE @@ -1777,6 +1818,9 @@ PRINT_PERL = printf ' %b %b\n' $(PERLCOLOR)PERL$(ENDCOLOR) $(BINCOLOR)$(GOAL) PRINT_FLISP = printf ' %b %b\n' $(FLISPCOLOR)FLISP$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) PRINT_JULIA = printf ' %b %b\n' $(JULIACOLOR)JULIA$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) PRINT_DTRACE = printf ' %b %b\n' $(DTRACECOLOR)DTRACE$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) +ifeq ($(WITH_MMTK), 1) +PRINT_MMTK = printf ' %b %b\n' $(LINKCOLOR)MMTK$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) +endif else QUIET_MAKE = @@ -1787,6 +1831,9 @@ PRINT_PERL = echo '$(subst ','\'',$(1))'; $(1) PRINT_FLISP = echo '$(subst ','\'',$(1))'; $(1) PRINT_JULIA = echo '$(subst ','\'',$(1))'; $(1) PRINT_DTRACE = echo '$(subst ','\'',$(1))'; $(1) +ifeq ($(WITH_MMTK), 1) +PRINT_MMTK = echo '$(subst ','\'',$(1))'; $(1) +endif endif diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk index f67088141ccd4..bf99c0fad9da2 100644 --- a/contrib/refresh_checksums.mk +++ b/contrib/refresh_checksums.mk @@ -24,7 +24,7 @@ CLANG_TRIPLETS=$(filter %-darwin %-freebsd,$(TRIPLETS)) NON_CLANG_TRIPLETS=$(filter-out %-darwin %-freebsd,$(TRIPLETS)) # These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded: -BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libtracyclient +BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libtracyclient libmmtk_julia BB_GCC_EXPANDED_PROJECTS=openblas csl BB_CXX_EXPANDED_PROJECTS=gmp llvm clang llvm-tools lld # These are non-BB source-only deps diff --git a/src/Makefile b/src/Makefile index 52e673aa6cc1a..c01848c16adf7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -29,6 +29,10 @@ ifeq ($(USECLANG),1) FLAGS += -Wno-return-type-c-linkage -Wno-atomic-alignment endif +ifeq ($(WITH_MMTK), 1) +FLAGS += -I$(MMTK_API_INC) -I$(MMTK_JULIA_INC) +endif + FLAGS += -DJL_BUILD_ARCH='"$(ARCH)"' ifeq ($(OS),WINNT) FLAGS += -DJL_BUILD_UNAME='"NT"' @@ -44,8 +48,8 @@ SRCS := \ jltypes gf typemap smallintset ast builtins module interpreter symbol \ dlload sys init task array genericmemory staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile jloptions mtarraylist \ - threading scheduler stackwalk gc-common gc-stock gc-debug gc-pages gc-stacks gc-alloc-profiler gc-page-profiler method \ - jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \ + threading scheduler stackwalk gc-common gc-stock gc-mmtk gc-debug gc-pages gc-stacks gc-alloc-profiler gc-page-profiler \ + method jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \ crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall engine RT_LLVMLINK := @@ -103,7 +107,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0) UV_HEADERS += uv.h UV_HEADERS += uv/*.h endif -PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) +PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h gc-tls-mmtk.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) ifeq ($(OS),WINNT) PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h) endif @@ -168,8 +172,8 @@ LIBJULIA_PATH_REL := libjulia endif COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir) -RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) -CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) +RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) $(MMTK_LIB) +CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) $(MMTK_LIB) RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS) CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS) @@ -178,6 +182,15 @@ CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia -ljulia-internal OBJS := $(SRCS:%=$(BUILDDIR)/%.o) DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) +ifeq ($(WITH_MMTK), 1) +MMTK_SRCS := mmtk_julia +MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.o) $(MMTK_LIB_DST) +MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) $(MMTK_LIB_DST) +else +MMTK_OBJS := +MMTK_DOBJS := +endif + CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o) CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj) @@ -226,6 +239,16 @@ $(BUILDDIR)/%.h.gen : $(SRCDIR)/%.d sed 's/JULIA_/JL_PROBE_/' $@ > $@.tmp mv $@.tmp $@ +# Compile files from the binding side and copy so file into lib folder +ifeq ($(WITH_MMTK), 1) +$(MMTK_JULIA_INC)/%.o: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) + @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@) +$(MMTK_JULIA_INC)/%.dbg.obj: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) + @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@) +$(MMTK_LIB_DST): $(MMTK_LIB_SRC) + @$(call PRINT_MMTK, cp $< $@) +endif + $(BUILDDIR)/jl_internal_funcs.inc: $(SRCDIR)/jl_exported_funcs.inc # Generate `.inc` file that contains a list of `#define` macros to rename functions defined in `libjulia-internal` # to have a `ijl_` prefix instead of `jl_`, to denote that they are coming from `libjulia-internal`. This avoids @@ -318,6 +341,7 @@ $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,de $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h +$(BUILDDIR)/gc-mmtk.o $(BUILDDIR)/mmtk-gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(BUILDDIR)/gc-stock.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(SRCDIR)/gc-page-profiler.h $(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc-heap-snapshot.h $(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc-alloc-profiler.h @@ -389,14 +413,14 @@ $(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in $(JULIAHOME)/VERSION $(LLVM_ sed <'$<' >'$@' -e "s/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/" \ -e "s/@LLVM_SHLIB_SYMBOL_VERSION@/$(LLVM_SHLIB_SYMBOL_VERSION)/" -$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) - @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \ +$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(MMTK_OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) + @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(MMTK_OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ -$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) - @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ \ +$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(DOBJS) $(MMTK_DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) + @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(MMTK_DOBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ @@ -455,6 +479,7 @@ clean: -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen -rm -f $(BUILDDIR)/julia.expmap -rm -f $(BUILDDIR)/julia_version.h + -rm -f $(MMTK_OBJS) $(MMTK_DOBJS) clean-flisp: -$(MAKE) -C $(SRCDIR)/flisp clean BUILDDIR='$(abspath $(BUILDDIR)/flisp)' diff --git a/src/builtins.c b/src/builtins.c index 96c4cec0f5087..4a778035de405 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -22,6 +22,7 @@ #include #include "julia.h" #include "julia_internal.h" +#include "gc-interface.h" #include "builtin_proto.h" #include "intrinsics.h" #include "julia_assert.h" diff --git a/src/gc-common.c b/src/gc-common.c index 417f12f26d64d..17f6f1330743b 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -705,6 +705,76 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } +size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; + size_t sz = layout->size * m->length; + if (layout->flags.arrayelem_isunion) + // account for isbits Union array selector bytes + sz += m->length; + return sz; +} + +// tracking Memorys with malloc'd storage +void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ + // This is **NOT** a GC safe point. + mallocmemory_t *ma; + if (ptls->gc_tls.heap.mafreelist == NULL) { + ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); + } + else { + ma = ptls->gc_tls.heap.mafreelist; + ptls->gc_tls.heap.mafreelist = ma->next; + } + ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); + ma->next = ptls->gc_tls.heap.mallocarrays; + ptls->gc_tls.heap.mallocarrays = ma; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + +// gc-debug common functions +// --- + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + #ifdef __cplusplus } #endif diff --git a/src/gc-debug.c b/src/gc-debug.c index 7c479484cde45..ecd7f2328cada 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1,5 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license - +#ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" #include "julia.h" @@ -1129,3 +1129,5 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index fcda11dad4f8a..d3cb1e98d84a4 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -1,5 +1,4 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license - #include "gc-heap-snapshot.h" #include "julia.h" diff --git a/src/gc-interface.h b/src/gc-interface.h index 0e9ce32697f35..72a57f4944156 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -192,6 +192,9 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; +// FIXME: add description here +void jl_gc_notify_image_load(const char* img_data, size_t len); + // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c new file mode 100644 index 0000000000000..e459b0f12c41d --- /dev/null +++ b/src/gc-mmtk.c @@ -0,0 +1,843 @@ +#ifdef MMTK_GC + +#include "mmtk_julia.h" +#include "gc-common.h" +#include "mmtkMutator.h" +#include "gc-mmtk.h" +#include "threading.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// For now we're using the same values as stock-gc. However +// for the heap size we use 70% of the free memory available +// since that is actually a hard limit in MMTk. + +// max_total_memory is a suggestion. We try very hard to stay +// under this limit, but we will go above it rather than halting. +#ifdef _P64 +typedef uint64_t memsize_t; +static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*); +// We expose this to the user/ci as jl_gc_set_max_memory +static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024; +#else +typedef uint32_t memsize_t; +static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); +// Work really hard to stay within 2GB +// Alternative is to risk running out of address space +// on 32 bit architectures. +#define MAX32HEAP 1536 * 1024 * 1024 +static memsize_t max_total_memory = (memsize_t) MAX32HEAP; +#endif + +void jl_gc_init(void) { + // TODO: use jl_options.heap_size_hint to set MMTk's fixed heap size? (see issue: https://github.com/mmtk/mmtk-julia/issues/167) + + JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); + + arraylist_new(&to_finalize, 0); + arraylist_new(&finalizer_list_marked, 0); + + gc_num.allocd = 0; + gc_num.max_pause = 0; + gc_num.max_memory = 0; + + long long min_heap_size; + long long max_heap_size; + char* min_size_def = getenv("MMTK_MIN_HSIZE"); + char* min_size_gb = getenv("MMTK_MIN_HSIZE_G"); + + char* max_size_def = getenv("MMTK_MAX_HSIZE"); + char* max_size_gb = getenv("MMTK_MAX_HSIZE_G"); + + // default min heap currently set as Julia's default_collect_interval + if (min_size_def != NULL) { + char *p; + double min_size = strtod(min_size_def, &p); + min_heap_size = (long) 1024 * 1024 * min_size; + } else if (min_size_gb != NULL) { + char *p; + double min_size = strtod(min_size_gb, &p); + min_heap_size = (long) 1024 * 1024 * 1024 * min_size; + } else { + min_heap_size = default_collect_interval; + } + + // default max heap currently set as 70% the free memory in the system + if (max_size_def != NULL) { + char *p; + double max_size = strtod(max_size_def, &p); + max_heap_size = (long) 1024 * 1024 * max_size; + } else if (max_size_gb != NULL) { + char *p; + double max_size = strtod(max_size_gb, &p); + max_heap_size = (long) 1024 * 1024 * 1024 * max_size; + } else { + max_heap_size = uv_get_free_memory() * 70 / 100; + } + + // Assert that the number of stock GC threads is 0; MMTK uses the number of threads in jl_options.ngcthreads + assert(jl_n_gcthreads == 0); + + // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined + int copy_stacks; + +#ifdef COPY_STACKS + copy_stacks = 1; +#else + copy_stacks = 0; +#endif + + mmtk_julia_copy_stack_check(copy_stacks); + + // if only max size is specified initialize MMTk with a fixed size heap + // TODO: We just assume mark threads means GC threads, and ignore the number of concurrent sweep threads. + // If the two values are the same, we can use either. Otherwise, we need to be careful. + uintptr_t gcthreads = jl_options.nmarkthreads; + if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { + mmtk_gc_init(0, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); + } else { + mmtk_gc_init(min_heap_size, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); + } +} + +void jl_start_gc_threads(void) { + jl_ptls_t ptls = jl_current_task->ptls; + mmtk_initialize_collection((void *)ptls); + // int nthreads = jl_atomic_load_relaxed(&jl_n_threads); + // int ngcthreads = jl_n_gcthreads; + // int nmutator_threads = nthreads - ngcthreads; + // printf("nthreads = %d, ngcthreads = %d, nmutator_threads = %d\n", nthreads, ngcthreads, nmutator_threads); +} + +void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT { + jl_thread_heap_t *heap = &ptls->gc_tls.heap; + small_arraylist_new(&heap->weak_refs, 0); + small_arraylist_new(&heap->live_tasks, 0); + for (int i = 0; i < JL_N_STACK_POOLS; i++) + small_arraylist_new(&heap->free_stacks[i], 0); + heap->mallocarrays = NULL; + heap->mafreelist = NULL; + arraylist_new(&ptls->finalizers, 0); + // Clear the malloc sz count + jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0); + // Create mutator + MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid); + // Copy the mutator to the thread local storage + memcpy(&ptls->gc_tls.mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext)); + // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed) + mmtk_post_bind_mutator(&ptls->gc_tls.mmtk_mutator, mmtk_mutator); + memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); +} + +void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls) { + mmtk_destroy_mutator(&ptls->gc_tls.mmtk_mutator); +} + +// FIXME: mmtk uses the same code as stock to enable/disable the GC +// Should this be moved to gc-common.c? + +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) { + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + } + } + else if (prev && !on) { + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) { + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) { + // MMTk currently does not allow setting the heap size at runtime +} + +JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) { + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); + jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); + return; + } + mmtk_handle_user_collection_request(ptls, collection); +} + +// same as above, some of these are identical to the implementation in gc stock +static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT +{ + int gc_n_threads; + jl_ptls_t* gc_all_tls_states; + gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); + gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls = gc_all_tls_states[i]; + if (ptls) { + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + if (update_heap) { + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + } + } + } +} + + +void reset_thread_gc_counts(void) JL_NOTSAFEPOINT +{ + int gc_n_threads; + jl_ptls_t* gc_all_tls_states; + gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); + gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls = gc_all_tls_states[i]; + if (ptls != NULL) { + // don't reset `pool_live_bytes` here + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + } + } +} + +// weak references +// --- +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) +{ + jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); + wr->value = value; // NOTE: wb not needed here + mmtk_add_weak_candidate(wr); + return wr; +} + + +// allocation +int jl_gc_classify_pools(size_t sz, int *osize) +{ + if (sz > GC_MAX_SZCLASS) + return -1; // call big alloc function + size_t allocsz = sz + sizeof(jl_taggedvalue_t); + *osize = LLT_ALIGN(allocsz, 16); + return 0; // use MMTk's fastpath logic +} + +int64_t last_gc_total_bytes = 0; +int64_t last_live_bytes = 0; // live_bytes at last collection +int64_t live_bytes = 0; + +// Retrieves Julia's `GC_Num` (structure that stores GC statistics). +JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { + jl_gc_num_t num = gc_num; + combine_thread_gc_counts(&num, 0); + return num; +} + +JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT { + int64_t oldtb = last_gc_total_bytes; + int64_t newtb; + jl_gc_get_total_bytes(&newtb); + last_gc_total_bytes = newtb; + return newtb - oldtb; +} + +JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT +{ + int64_t oldtb = last_gc_total_bytes; + int64_t newtb; + jl_gc_get_total_bytes(&newtb); + last_gc_total_bytes = newtb - offset; + return newtb - oldtb; +} + +JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) { + return 0; +} + +void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT +{ + jl_ptls_t ptls = jl_current_task->ptls; + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); +} + +void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT +{ +} + +int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT +{ + jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc); + return live_bytes += inc; +} + +void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT +{ + combine_thread_gc_counts(&gc_num, 0); + inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd); + gc_num.allocd = 0; + gc_num.deferred_alloc = 0; + reset_thread_gc_counts(); +} + +JL_DLLEXPORT int64_t jl_gc_live_bytes(void) { + return last_live_bytes; +} + +JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT +{ + jl_gc_num_t num = gc_num; + combine_thread_gc_counts(&num, 0); + // Sync this logic with `base/util.jl:GC_Diff` + *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); +} + +JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void) +{ + // FIXME: should probably return MMTk's heap size + return max_total_memory; +} + +extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); +extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); +extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator); +extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator); + + +extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; +extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS; + +// These need to be constants. + +#define MMTK_OBJECT_BARRIER (1) +// Stickyimmix needs write barrier. Immix does not need write barrier. +#ifdef MMTK_PLAN_IMMIX +#define MMTK_NEEDS_WRITE_BARRIER (0) +#endif +#ifdef MMTK_PLAN_STICKYIMMIX +#define MMTK_NEEDS_WRITE_BARRIER (1) +#endif + +#ifdef MMTK_CONSERVATIVE_SCAN +#define MMTK_NEEDS_VO_BIT (1) +#else +#define MMTK_NEEDS_VO_BIT (0) +#endif + +#define MMTK_DEFAULT_IMMIX_ALLOCATOR (0) +#define MMTK_IMMORTAL_BUMP_ALLOCATOR (0) + +// Directly call into MMTk for write barrier (debugging only) +inline void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_post(&ptls->gc_tls.mmtk_mutator, parent, ptr); +} + +// Fastpath. Return 1 if we should go to slowpath +inline int mmtk_gc_wb_fast_check(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + intptr_t addr = (intptr_t) (void*) parent; + uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); + intptr_t shift = (addr >> 3) & 0b111; + uint8_t byte_val = *meta_addr; + return ((byte_val >> shift) & 1) == 1; + } else { + return 0; + } +} + +// Slowpath. +inline void mmtk_gc_wb_slow(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr); + } +} + +inline void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + if (mmtk_gc_wb_fast_check(parent, ptr)) { + mmtk_gc_wb_slow(parent, ptr); + } +} + +inline void mmtk_gc_wb_binding(const void *bnd, const void *val) JL_NOTSAFEPOINT +{ + if (mmtk_gc_wb_fast_check(bnd, val)) { + jl_astaggedvalue(bnd)->bits.gc = 2; // to indicate that the buffer is a binding + mmtk_gc_wb_slow(bnd, val); + } +} + +#define MMTK_MIN_ALIGNMENT 4 +// MMTk assumes allocation size is aligned to min alignment. +inline size_t mmtk_align_alloc_sz(size_t sz) JL_NOTSAFEPOINT +{ + return (sz + MMTK_MIN_ALIGNMENT - 1) & ~(MMTK_MIN_ALIGNMENT - 1); +} + +inline void* bump_alloc_fast(MMTkMutatorContext* mutator, uintptr_t* cursor, uintptr_t limit, size_t size, size_t align, size_t offset, int allocator) { + intptr_t delta = (-offset - *cursor) & (align - 1); + uintptr_t result = *cursor + (uintptr_t)delta; + + if (__unlikely(result + size > limit)) { + return (void*) mmtk_alloc(mutator, size, align, offset, allocator); + } else{ + *cursor = result + size; + return (void*)result; + } +} + +inline void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { + ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR]; + return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0); +} + +inline void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, size_t size) { + mmtk_post_alloc(mutator, obj, size, 0); +} + +inline void mmtk_set_vo_bit(void* obj) { + intptr_t addr = (intptr_t) obj; + intptr_t shift = (addr >> 3) & 0b111; + uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6); + uint8_t new_val = (*vo_meta_addr) | (1 << shift); + (*vo_meta_addr) = new_val; +} + +inline void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { + if (MMTK_NEEDS_VO_BIT) { + // set VO bit + mmtk_set_vo_bit(obj); + } +} + +inline void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { + BumpAllocator* allocator = &mutator->allocators.bump_pointer[MMTK_IMMORTAL_BUMP_ALLOCATOR]; + return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1); +} + +inline void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { + if (MMTK_NEEDS_VO_BIT) { + // set VO bit + mmtk_set_vo_bit(obj); + } + + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + intptr_t addr = (intptr_t) obj; + intptr_t shift = (addr >> 3) & 0b111; + uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); + while(1) { + uint8_t old_val = *meta_addr; + uint8_t new_val = old_val | (1 << shift); + if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) { + break; + } + } + } +} + +// mutex for page profile +uv_mutex_t page_profile_lock; + +JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) +{ + uv_mutex_lock(&page_profile_lock); + const char *str = "Page profiler in unsupported in MMTk."; + ios_write(stream, str, strlen(str)); + uv_mutex_unlock(&page_profile_lock); +} + +// this seems to be needed by the gc tests +#define JL_GC_N_MAX_POOLS 51 +JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS]; + +STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT +{ + // FIXME: MMTk would have to provide its own stats +} + +#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants + +JL_DLLEXPORT uint64_t jl_get_pg_size(void) +{ + return MMTK_GC_PAGE_SZ; +} + + +extern void mmtk_store_obj_size_c(void* obj, size_t size); + +inline void maybe_collect(jl_ptls_t ptls) +{ + // Just do a safe point for general maybe_collect + jl_gc_safepoint_(ptls); +} + +// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll), +// is expensive. So we only check for every few allocations. +static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) +{ + // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to + // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage + // as much as we can. + if (ptls->gc_tls.malloc_sz_since_last_poll > 4096) { + jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0); + mmtk_gc_poll(ptls); + } else { + jl_atomic_fetch_add_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, sz); + jl_gc_safepoint_(ptls); + } +} + +// allocation wrappers that track allocation and let collection run + +JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + void *data = malloc(sz); + if (data != NULL && pgcstack != NULL && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + malloc_maybe_collect(ptls, sz); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz); + } + return data; +} + +JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + void *data = calloc(nm, sz); + if (data != NULL && pgcstack != NULL && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + malloc_maybe_collect(ptls, nm * sz); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz); + } + return data; +} + +JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + free(p); + if (pgcstack != NULL && ct->world_age) { + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, -sz); + } +} + +JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + malloc_maybe_collect(ptls, sz); + if (sz < old) + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, old - sz); + else + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz - old); + } + return realloc(p, sz); +} + +void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) +{ + jl_ptls_t ptls = jl_current_task->ptls; + size_t allocsz = mmtk_align_alloc_sz(sz); + void* addr = mmtk_immortal_alloc_fast(&ptls->gc_tls.mmtk_mutator, allocsz, align, offset); + return addr; +} + +void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) +{ + return jl_gc_perm_alloc_nolock(sz, zero, align, offset); +} + +jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT +{ + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ? + sizeof(void*) * 2 : 16)); + jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align, + sizeof(void*) % align); + + jl_ptls_t ptls = jl_current_task->ptls; + mmtk_immortal_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, jl_valueof(o), allocsz); + o->header = (uintptr_t)ty; + return jl_valueof(o); +} + + +JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty) +{ + // safepoint + jl_gc_safepoint_(ptls); + + jl_value_t *v; + if ((uintptr_t)ty != jl_buff_tag) { + // v needs to be 16 byte aligned, therefore v_tagged needs to be offset accordingly to consider the size of header + jl_taggedvalue_t *v_tagged = (jl_taggedvalue_t *)mmtk_immix_alloc_fast(&ptls->gc_tls.mmtk_mutator, LLT_ALIGN(osize, align), align, sizeof(jl_taggedvalue_t)); + v = jl_valueof(v_tagged); + mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize, align)); + } else { + // allocating an extra word to store the size of buffer objects + jl_taggedvalue_t *v_tagged = (jl_taggedvalue_t *)mmtk_immix_alloc_fast(&ptls->gc_tls.mmtk_mutator, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align), align, 0); + jl_value_t* v_tagged_aligned = ((jl_value_t*)((char*)(v_tagged) + sizeof(jl_taggedvalue_t))); + v = jl_valueof(v_tagged_aligned); + mmtk_store_obj_size_c(v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align)); + mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align)); + } + + ptls->gc_tls.gc_num.allocd += osize; + ptls->gc_tls.gc_num.poolalloc++; + + return v; +} + +void jl_gc_notify_image_load(const char* img_data, size_t len) +{ + mmtk_set_vm_space((void*)img_data, len); +} + +JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t sz) +{ + // safepoint + jl_gc_safepoint_(ptls); + + size_t offs = offsetof(bigval_t, header); + assert(sz >= sizeof(jl_taggedvalue_t) && "sz must include tag"); + static_assert(offsetof(bigval_t, header) >= sizeof(void*), "Empty bigval header?"); + static_assert(sizeof(bigval_t) % JL_HEAP_ALIGNMENT == 0, ""); + size_t allocsz = LLT_ALIGN(sz + offs, JL_CACHE_BYTE_ALIGNMENT); + if (allocsz < sz) { // overflow in adding offs, size was "negative" + assert(0 && "Error when allocating big object"); + jl_throw(jl_memory_exception); + } + + bigval_t *v = (bigval_t*)mmtk_alloc_large(&ptls->gc_tls.mmtk_mutator, allocsz, JL_CACHE_BYTE_ALIGNMENT, 0, 2); + + if (v == NULL) { + assert(0 && "Allocation failed"); + jl_throw(jl_memory_exception); + } + v->sz = allocsz; + + ptls->gc_tls.gc_num.allocd += allocsz; + ptls->gc_tls.gc_num.bigalloc++; + + jl_value_t *result = jl_valueof(&v->header); + mmtk_post_alloc(&ptls->gc_tls.mmtk_mutator, result, allocsz, 2); + + return result; +} + +// Instrumented version of jl_gc_small_alloc_inner, called into by LLVM-generated code. +JL_DLLEXPORT jl_value_t *jl_gc_small_alloc(jl_ptls_t ptls, int offset, int osize, jl_value_t* type) +{ + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); + + jl_value_t *val = jl_mmtk_gc_alloc_default(ptls, osize, 16, NULL); + maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type); + return val; +} + +// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code. +JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type) +{ + // TODO: assertion needed here? + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); + + jl_value_t *val = jl_mmtk_gc_alloc_big(ptls, sz); + maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type); + return val; +} + +inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + v = jl_mmtk_gc_alloc_default(ptls, allocsz, 16, ty); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_mmtk_gc_alloc_big(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + +JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + maybe_collect(ptls); + size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *b = malloc_cache_align(allocsz); + if (b == NULL) + jl_throw(jl_memory_exception); + + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + // FIXME: Should these be part of mmtk's heap? + // malloc_maybe_collect(ptls, sz); + // jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, allocsz); +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + // jl_gc_managed_malloc is currently always used for allocating array buffers. + maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag); + return b; +} + +// Not used by mmtk +// Number of GC threads that may run parallel marking +int jl_n_markthreads; +// Number of GC threads that may run concurrent sweeping (0 or 1) +int jl_n_sweepthreads; +// `tid` of first GC thread +int gc_first_tid; + +JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT +{ + mmtk_unreachable(); +} + +JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored, + struct _jl_datatype_t *dt) JL_NOTSAFEPOINT +{ + mmtk_unreachable(); +} + +// marking +// --- + +JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) +{ + mmtk_unreachable(); + return 0; +} +JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent, + jl_value_t **objs, size_t nobjs) +{ + mmtk_unreachable(); +} + +JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void) +{ + // TODO: meaningful for MMTk? + return GC_MAX_SZCLASS; +} + +JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +{ + // FIXME: do we need to implement this? +} + +// gc-debug functions +// --- + +JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p) +{ + return NULL; +} + +void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT +{ +} + +int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return 0; +} + +int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return 0; +} + +void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT +{ + // May not be accurate but should be helpful enough + uint64_t pool_count = gc_num.poolalloc; + uint64_t big_count = gc_num.bigalloc; + jl_safe_printf("Allocations: %" PRIu64 " " + "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n", + pool_count + big_count, pool_count, big_count, gc_num.pause); +} + +JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) +{ + return sizeof(bigval_t); +} + +void jl_print_gc_stats(JL_STREAM *s) +{ +} + +JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) +{ + return 0; +} + +JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void) +{ + return 0; +} + +// TODO: if this is needed, it can be added in MMTk +JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) +{ + return NULL; +} + +#ifdef __cplusplus +} +#endif + +#endif // MMTK_GC diff --git a/src/gc-mmtk.h b/src/gc-mmtk.h new file mode 100644 index 0000000000000..6c2c7a40bc81f --- /dev/null +++ b/src/gc-mmtk.h @@ -0,0 +1,34 @@ +#ifdef MMTK_GC + +#ifdef __cplusplus +extern "C" { +#endif + +extern jl_mutex_t finalizers_lock; +extern arraylist_t to_finalize; +extern arraylist_t finalizer_list_marked; + +JL_EXTENSION typedef struct _bigval_t { + size_t sz; +#ifdef _P64 // Add padding so that the value is 64-byte aligned + // (8 pointers of 8 bytes each) - (2 other pointers in struct) + void *_padding[8 - 2]; +#else + // (16 pointers of 4 bytes each) - (2 other pointers in struct) + void *_padding[16 - 2]; +#endif + //struct jl_taggedvalue_t <>; + union { + uintptr_t header; + struct { + uintptr_t gc:2; + } bits; + }; + // must be 64-byte aligned here, in 32 & 64 bit modes +} bigval_t; + +#ifdef __cplusplus +} +#endif + +#endif // MMTK_GC diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c index 2625fa812781a..bfd1c74247df8 100644 --- a/src/gc-page-profiler.c +++ b/src/gc-page-profiler.c @@ -1,5 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license - +#ifndef MMTK_GC #include "gc-page-profiler.h" #include "julia.h" @@ -178,3 +178,5 @@ JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc-pages.c b/src/gc-pages.c index 71d59de29166f..ed6e0ed20ba1c 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -1,5 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license - +#ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" #ifndef _OS_WINDOWS_ @@ -205,3 +205,5 @@ void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc-stock.c b/src/gc-stock.c index 3ff37566dc6c7..164d3067a31de 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -1,5 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license - +#ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" #include "gc-alloc-profiler.h" @@ -405,7 +405,6 @@ static void sweep_weak_refs(void) } } - STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz; @@ -453,7 +452,6 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) return jl_valueof(&v->header); } - // Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code. JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type) { @@ -3888,12 +3886,22 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) return sizeof(bigval_t); } +<<<<<<< HEAD +======= +>>>>>>> c48a701f54 (WIP: Adding support for MMTk/Immix) JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); } +void jl_gc_notify_image_load(const char* img_data, size_t len) +{ + // Do nothing +} + #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc-stock.h b/src/gc-stock.h index 50eca3aadbd86..8e563f32ab9d3 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -5,6 +5,7 @@ . non-moving, precise mark and sweep collector . pool-allocates small objects, keeps big objects on a simple list */ +#ifndef MMTK_GC #ifndef JL_GC_H #define JL_GC_H @@ -422,21 +423,6 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } -STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; -} - STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); @@ -712,3 +698,5 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect #endif #endif + +#endif // !MMTK_GC diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h new file mode 100644 index 0000000000000..2eb5f2a6a44d9 --- /dev/null +++ b/src/gc-tls-mmtk.h @@ -0,0 +1,49 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#ifdef MMTK_GC + +#include +#include "mmtkMutator.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + // variable for tracking weak references + small_arraylist_t weak_refs; + // live tasks started on this thread + // that are holding onto a stack from the pool + small_arraylist_t live_tasks; + + // variables for tracking malloc'd arrays + struct _mallocmemory_t *mallocarrays; + struct _mallocmemory_t *mafreelist; + +#define JL_N_STACK_POOLS 16 + small_arraylist_t free_stacks[JL_N_STACK_POOLS]; +} jl_thread_heap_t; + +typedef struct { + _Atomic(int64_t) allocd; + _Atomic(int64_t) pool_live_bytes; + _Atomic(uint64_t) malloc; + _Atomic(uint64_t) realloc; + _Atomic(uint64_t) poolalloc; + _Atomic(uint64_t) bigalloc; + _Atomic(int64_t) free_acc; + _Atomic(uint64_t) alloc_acc; +} jl_thread_gc_num_t; + +typedef struct { + jl_thread_heap_t heap; + jl_thread_gc_num_t gc_num; + MMTkMutatorContext mmtk_mutator; + size_t malloc_sz_since_last_poll; +} jl_gc_tls_states_t; + +#ifdef __cplusplus +} +#endif + +#endif // MMTK_GC diff --git a/src/gc-tls.h b/src/gc-tls.h index 9e4b09404db84..43adfb8a7ff2a 100644 --- a/src/gc-tls.h +++ b/src/gc-tls.h @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#ifndef MMTK_GC + // Meant to be included in "julia_threads.h" #ifndef JL_GC_TLS_H #define JL_GC_TLS_H @@ -90,3 +92,5 @@ typedef struct { #endif #endif // JL_GC_TLS_H + +#endif // MMTK_GC diff --git a/src/julia.h b/src/julia.h index abb8a57ff13b0..db57db1fbeb38 100644 --- a/src/julia.h +++ b/src/julia.h @@ -850,7 +850,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index e677f40907dfd..d5013601a9124 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1052,7 +1052,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern _Atomic(uint32_t) jl_gc_disable_counter; +extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/julia_threads.h b/src/julia_threads.h index b697a0bf030ed..641c50386c555 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -4,7 +4,11 @@ #ifndef JL_THREADS_H #define JL_THREADS_H +#ifndef MMTK_GC #include "gc-tls.h" +#else +#include "gc-tls-mmtk.h" +#endif #include "julia_atomics.h" #ifndef _OS_WINDOWS_ #include "pthread.h" diff --git a/src/stackwalk.c b/src/stackwalk.c index a1de3a6d61a07..e6fc2c7bbf56a 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1294,6 +1294,8 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; +extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; +extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT diff --git a/src/staticdata.c b/src/staticdata.c index 363aa46b62221..e07a5365bf06f 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -654,6 +654,7 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -3899,6 +3900,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); diff --git a/src/threading.c b/src/threading.c index 44b1192528531..df62ea107bf04 100644 --- a/src/threading.c +++ b/src/threading.c @@ -743,6 +743,10 @@ void jl_init_threading(void) } int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads; +#ifdef MMTK_GC + ngcthreads = 0; +#endif + jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); jl_n_threads_per_pool[0] = nthreadsi; From b488bbeb22847c3740459d015878368587ecb847 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 28 Aug 2024 00:44:09 +0000 Subject: [PATCH 079/116] Refactoring to be considered before adding MMTk --- src/gc-interface.h | 4 +++- src/gc-stock.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 72a57f4944156..b1f3ab9d6908d 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -192,7 +192,9 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; -// FIXME: add description here +// This function notifies the GC about memory addresses that are set when loading the boot image. +// The GC may use that information to, for instance, determine that such objects should +// be treated as marked and belonged to the old generation in nursery collections. void jl_gc_notify_image_load(const char* img_data, size_t len); // ========================================================================= // diff --git a/src/gc-stock.c b/src/gc-stock.c index 164d3067a31de..019ae481ce189 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3888,8 +3888,11 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) <<<<<<< HEAD +<<<<<<< HEAD ======= >>>>>>> c48a701f54 (WIP: Adding support for MMTk/Immix) +======= +>>>>>>> 0aee3ba32a (Refactoring to be considered before adding MMTk) JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); From a4cf8e7c754fc72c9612750ccce65b87eaeb720b Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 29 Aug 2024 05:37:53 +0000 Subject: [PATCH 080/116] Adding fastpath allocation --- src/llvm-gc-interface-passes.h | 5 ++ src/llvm-late-gc-lowering.cpp | 139 +++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+) diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index d33567e887118..ed6b94dcdc3fc 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -328,6 +329,7 @@ struct LateLowerGCFrame: private JuliaPassContext { private: CallInst *pgcstack; + Function *smallAllocFunc; void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef &SafepointsSoFar, SmallVector &&RefinedPtr = SmallVector()); @@ -365,6 +367,9 @@ struct LateLowerGCFrame: private JuliaPassContext { void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef CalleeRoots); Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V); Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V); +#ifdef MMTK_GC + Value* lowerGCAllocBytesLate(CallInst *target, Function &F); +#endif }; // The final GC lowering pass. This pass lowers platform-agnostic GC diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 1d390a5115207..d395771f6df0c 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2452,8 +2452,122 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl &Colors, St } } +#ifdef MMTK_GC +Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) +{ + assert(target->arg_size() == 3); + + IRBuilder<> builder(target); + auto ptls = target->getArgOperand(0); + auto type = target->getArgOperand(2); + if (auto CI = dyn_cast(target->getArgOperand(1))) { + size_t sz = (size_t)CI->getZExtValue(); + // This is strongly architecture and OS dependent + int osize; + int offset = jl_gc_classify_pools(sz, &osize); + if (offset >= 0) { + // In this case instead of lowering julia.gc_alloc_bytes to jl_gc_small_alloc + // We do a slowpath/fastpath check and lower it only on the slowpath, returning + // the cursor and updating it in the fastpath. + auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); + auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); + + // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk. + // Setting this to false will increase allocation overhead a lot, and should only be used for debugging. + const bool INLINE_FASTPATH_ALLOCATION = true; + + if (INLINE_FASTPATH_ALLOCATION) { + // Assuming we use the first immix allocator. + // FIXME: We should get the allocator index and type from MMTk. + auto allocator_offset = offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); + + auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); + auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit)); + + auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); + auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); + auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); + + // offset = 8 + auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); + auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); + auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); + // alignment 16 (15 = 16 - 1) + auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); + auto result = builder.CreateNSWAdd(cursor, delta, "result"); + + auto new_cursor = builder.CreateNSWAdd(result, pool_osize); + + auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos); + auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr"); + auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit"); + + auto gt_limit = builder.CreateICmpSGT(new_cursor, limit); + + auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction()); + auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction()); + + auto next_instr = target->getNextNode(); + SmallVector Weights{1, 9}; + + MDBuilder MDB(F.getContext()); + SplitBlockAndInsertIfThenElse(gt_limit, next_instr, &slowpath, &fastpath, false, false, MDB.createBranchWeights(Weights)); + + builder.SetInsertPoint(next_instr); + auto phiNode = builder.CreatePHI(target->getCalledFunction()->getReturnType(), 2, "phi_fast_slow"); + + // slowpath + builder.SetInsertPoint(slowpath); + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + auto new_call = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize_i32, type }); + new_call->setAttributes(new_call->getCalledFunction()->getAttributes()); + builder.CreateBr(next_instr->getParent()); + + // fastpath + builder.SetInsertPoint(fastpath); + builder.CreateStore(new_cursor, cursor_ptr); + + // ptls->gc_tls.gc_num.allocd += osize; + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); + auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); + auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); + auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); + auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); + builder.CreateStore(pool_allocd_total, pool_alloc_tls); + + auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); + auto v_as_ptr = builder.CreateIntToPtr(v_raw, smallAllocFunc->getReturnType()); + builder.CreateBr(next_instr->getParent()); + + phiNode->addIncoming(new_call, slowpath); + phiNode->addIncoming(v_as_ptr, fastpath); + phiNode->takeName(target); + return phiNode; + } + } + } + return target; +} + +template +static void replaceInstruction( + Instruction *oldInstruction, + Value *newInstruction, + TIterator &it) +{ + if (newInstruction != oldInstruction) { + oldInstruction->replaceAllUsesWith(newInstruction); + it = oldInstruction->eraseFromParent(); + } + else { + ++it; + } +} +#endif + bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { initAll(*F.getParent()); + smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc); LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n"); if (!pgcstack_getter && !adoptthread_func) return CleanupIR(F, nullptr, CFGModified); @@ -2468,6 +2582,31 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { std::map> CallFrames; // = OptimizeCallFrames(S, Ordering); PlaceRootsAndUpdateCalls(Colors, S, CallFrames); CleanupIR(F, &S, CFGModified); + +#ifdef MMTK_GC + // We lower the julia.gc_alloc_bytes intrinsic in this pass to insert slowpath/fastpath blocks for MMTk + for (BasicBlock &BB : F) { + for (auto it = BB.begin(); it != BB.end();) { + auto *CI = dyn_cast(&*it); + if (!CI) { + ++it; + continue; + } + + Value *callee = CI->getCalledOperand(); + assert(callee); + + auto GCAllocBytes = getOrNull(jl_intrinsics::GCAllocBytes); + if (GCAllocBytes == callee) { + *CFGModified = true; + replaceInstruction(CI, lowerGCAllocBytesLate(CI, F), it); + continue; + } + ++it; + } + } +#endif + return true; } From ecb675a597ab3dcd57fc053c995252618b6b0edd Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 29 Aug 2024 05:51:26 +0000 Subject: [PATCH 081/116] Fixing removed newlines --- src/gc-debug.c | 1 + src/gc-heap-snapshot.cpp | 1 + src/gc-page-profiler.c | 1 + src/gc-pages.c | 1 + src/gc-stock.c | 7 +++++++ 5 files changed, 11 insertions(+) diff --git a/src/gc-debug.c b/src/gc-debug.c index ecd7f2328cada..2c8e1c6055414 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1,4 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license + #ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index d3cb1e98d84a4..fcda11dad4f8a 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -1,4 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license + #include "gc-heap-snapshot.h" #include "julia.h" diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c index bfd1c74247df8..e5c6b91978731 100644 --- a/src/gc-page-profiler.c +++ b/src/gc-page-profiler.c @@ -1,4 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license + #ifndef MMTK_GC #include "gc-page-profiler.h" #include "julia.h" diff --git a/src/gc-pages.c b/src/gc-pages.c index ed6e0ed20ba1c..976fc461d5b95 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -1,4 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license + #ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" diff --git a/src/gc-stock.c b/src/gc-stock.c index 019ae481ce189..05f2f5930448c 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -1,4 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license + #ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" @@ -405,6 +406,7 @@ static void sweep_weak_refs(void) } } + STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz; @@ -452,6 +454,7 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) return jl_valueof(&v->header); } + // Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code. JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type) { @@ -3886,6 +3889,7 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) return sizeof(bigval_t); } +<<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD @@ -3893,6 +3897,9 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) >>>>>>> c48a701f54 (WIP: Adding support for MMTk/Immix) ======= >>>>>>> 0aee3ba32a (Refactoring to be considered before adding MMTk) +======= + +>>>>>>> 30ac6f081d (Fixing removed newlines) JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); From 77db2039905d73c9d6a30bef583d7ad15aea9ca1 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 28 Aug 2024 00:44:09 +0000 Subject: [PATCH 082/116] Refactoring to be considered before adding MMTk --- src/gc-stock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gc-stock.c b/src/gc-stock.c index 05f2f5930448c..5fd3b7efafead 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3892,6 +3892,7 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD ======= >>>>>>> c48a701f54 (WIP: Adding support for MMTk/Immix) @@ -3900,6 +3901,8 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) ======= >>>>>>> 30ac6f081d (Fixing removed newlines) +======= +>>>>>>> 2efcdf8335 (Refactoring to be considered before adding MMTk) JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); From c5d3a40880cc08014ec6347372ea35c3249f8709 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 2 Sep 2024 06:07:02 +0000 Subject: [PATCH 083/116] Adding a few comments; Moving some functions to be closer together --- src/gc-common.c | 70 ----------- src/gc-mmtk.c | 311 ++++++++++++++-------------------------------- src/gc-tls-mmtk.h | 2 + 3 files changed, 94 insertions(+), 289 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 17f6f1330743b..417f12f26d64d 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -705,76 +705,6 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } -size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT -{ - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; - size_t sz = layout->size * m->length; - if (layout->flags.arrayelem_isunion) - // account for isbits Union array selector bytes - sz += m->length; - return sz; -} - -// tracking Memorys with malloc'd storage -void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ - // This is **NOT** a GC safe point. - mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { - ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); - } - else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; - } - ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; -} - -int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} - -// gc-debug common functions -// --- - -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - #ifdef __cplusplus } #endif diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index e459b0f12c41d..98a5612871be0 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -10,9 +10,10 @@ extern "C" { #endif -// For now we're using the same values as stock-gc. However -// for the heap size we use 70% of the free memory available -// since that is actually a hard limit in MMTk. +// FIXME: Should the values below be shared between both GC's? +// Note that MMTk uses a hard max heap limit, which is set by default +// as 70% of the free available memory. The min heap is set as the +// default_collect_interval variable below. // max_total_memory is a suggestion. We try very hard to stay // under this limit, but we will go above it rather than halting. @@ -33,7 +34,6 @@ static memsize_t max_total_memory = (memsize_t) MAX32HEAP; void jl_gc_init(void) { // TODO: use jl_options.heap_size_hint to set MMTk's fixed heap size? (see issue: https://github.com/mmtk/mmtk-julia/issues/167) - JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); arraylist_new(&to_finalize, 0); @@ -105,10 +105,6 @@ void jl_gc_init(void) { void jl_start_gc_threads(void) { jl_ptls_t ptls = jl_current_task->ptls; mmtk_initialize_collection((void *)ptls); - // int nthreads = jl_atomic_load_relaxed(&jl_n_threads); - // int ngcthreads = jl_n_gcthreads; - // int nmutator_threads = nthreads - ngcthreads; - // printf("nthreads = %d, ngcthreads = %d, nmutator_threads = %d\n", nthreads, ngcthreads, nmutator_threads); } void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT { @@ -135,38 +131,31 @@ void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls) { mmtk_destroy_mutator(&ptls->gc_tls.mmtk_mutator); } -// FIXME: mmtk uses the same code as stock to enable/disable the GC -// Should this be moved to gc-common.c? - -_Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) { - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; - } - } - else if (prev && !on) { - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; +JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) { + // MMTk currently does not allow setting the heap size at runtime } -JL_DLLEXPORT int jl_gc_is_enabled(void) { - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; + +inline void maybe_collect(jl_ptls_t ptls) +{ + // Just do a safe point for general maybe_collect + jl_gc_safepoint_(ptls); } -JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) { - // MMTk currently does not allow setting the heap size at runtime +// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll), +// is expensive. So we only check for every few allocations. +static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) +{ + // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to + // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage + // as much as we can. + if (ptls->gc_tls.malloc_sz_since_last_poll > 4096) { + jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0); + mmtk_gc_poll(ptls); + } else { + jl_atomic_fetch_add_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, sz); + jl_gc_safepoint_(ptls); + } } JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) { @@ -182,7 +171,12 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) { mmtk_handle_user_collection_request(ptls, collection); } -// same as above, some of these are identical to the implementation in gc stock +// FIXME: The functions combine_thread_gc_counts and reset_thread_gc_counts +// are currently nearly identical for mmtk and for stock. However, the stats +// are likely different (e.g., MMTk doesn't track the bytes allocated in the fastpath, +// but only when the slowpath is called). We might need to adapt these later so that +// the statistics are the same or as close as possible for each GC. + static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT { int gc_n_threads; @@ -228,31 +222,6 @@ void reset_thread_gc_counts(void) JL_NOTSAFEPOINT } } -// weak references -// --- -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) -{ - jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); - wr->value = value; // NOTE: wb not needed here - mmtk_add_weak_candidate(wr); - return wr; -} - - -// allocation -int jl_gc_classify_pools(size_t sz, int *osize) -{ - if (sz > GC_MAX_SZCLASS) - return -1; // call big alloc function - size_t allocsz = sz + sizeof(jl_taggedvalue_t); - *osize = LLT_ALIGN(allocsz, 16); - return 0; // use MMTk's fastpath logic -} - -int64_t last_gc_total_bytes = 0; -int64_t last_live_bytes = 0; // live_bytes at last collection -int64_t live_bytes = 0; - // Retrieves Julia's `GC_Num` (structure that stores GC statistics). JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; @@ -260,6 +229,10 @@ JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { return num; } +int64_t last_gc_total_bytes = 0; +int64_t last_live_bytes = 0; // live_bytes at last collection +int64_t live_bytes = 0; + JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT { int64_t oldtb = last_gc_total_bytes; int64_t newtb; @@ -325,82 +298,38 @@ JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void) return max_total_memory; } +// weak references +// --- +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) +{ + jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); + wr->value = value; // NOTE: wb not needed here + mmtk_add_weak_candidate(wr); + return wr; +} + +// allocation + extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator); extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator); - - extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS; - -// These need to be constants. - -#define MMTK_OBJECT_BARRIER (1) -// Stickyimmix needs write barrier. Immix does not need write barrier. -#ifdef MMTK_PLAN_IMMIX -#define MMTK_NEEDS_WRITE_BARRIER (0) -#endif -#ifdef MMTK_PLAN_STICKYIMMIX -#define MMTK_NEEDS_WRITE_BARRIER (1) -#endif - -#ifdef MMTK_CONSERVATIVE_SCAN -#define MMTK_NEEDS_VO_BIT (1) -#else -#define MMTK_NEEDS_VO_BIT (0) -#endif +extern void mmtk_store_obj_size_c(void* obj, size_t size); #define MMTK_DEFAULT_IMMIX_ALLOCATOR (0) #define MMTK_IMMORTAL_BUMP_ALLOCATOR (0) -// Directly call into MMTk for write barrier (debugging only) -inline void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - jl_task_t *ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_post(&ptls->gc_tls.mmtk_mutator, parent, ptr); -} - -// Fastpath. Return 1 if we should go to slowpath -inline int mmtk_gc_wb_fast_check(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { - intptr_t addr = (intptr_t) (void*) parent; - uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); - intptr_t shift = (addr >> 3) & 0b111; - uint8_t byte_val = *meta_addr; - return ((byte_val >> shift) & 1) == 1; - } else { - return 0; - } -} - -// Slowpath. -inline void mmtk_gc_wb_slow(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { - jl_task_t *ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr); - } -} -inline void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - if (mmtk_gc_wb_fast_check(parent, ptr)) { - mmtk_gc_wb_slow(parent, ptr); - } -} - -inline void mmtk_gc_wb_binding(const void *bnd, const void *val) JL_NOTSAFEPOINT +int jl_gc_classify_pools(size_t sz, int *osize) { - if (mmtk_gc_wb_fast_check(bnd, val)) { - jl_astaggedvalue(bnd)->bits.gc = 2; // to indicate that the buffer is a binding - mmtk_gc_wb_slow(bnd, val); - } + if (sz > GC_MAX_SZCLASS) + return -1; // call big alloc function + size_t allocsz = sz + sizeof(jl_taggedvalue_t); + *osize = LLT_ALIGN(allocsz, 16); + return 0; // use MMTk's fastpath logic } - #define MMTK_MIN_ALIGNMENT 4 // MMTk assumes allocation size is aligned to min alignment. inline size_t mmtk_align_alloc_sz(size_t sz) JL_NOTSAFEPOINT @@ -429,19 +358,9 @@ inline void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, s mmtk_post_alloc(mutator, obj, size, 0); } -inline void mmtk_set_vo_bit(void* obj) { - intptr_t addr = (intptr_t) obj; - intptr_t shift = (addr >> 3) & 0b111; - uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6); - uint8_t new_val = (*vo_meta_addr) | (1 << shift); - (*vo_meta_addr) = new_val; -} - inline void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - if (MMTK_NEEDS_VO_BIT) { - // set VO bit - mmtk_set_vo_bit(obj); - } + // FIXME: for now, we do nothing + // but when supporting moving, this is where we set the valid object (VO) bit } inline void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { @@ -450,79 +369,12 @@ inline void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, } inline void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - if (MMTK_NEEDS_VO_BIT) { - // set VO bit - mmtk_set_vo_bit(obj); - } - - if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { - intptr_t addr = (intptr_t) obj; - intptr_t shift = (addr >> 3) & 0b111; - uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); - while(1) { - uint8_t old_val = *meta_addr; - uint8_t new_val = old_val | (1 << shift); - if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) { - break; - } - } - } -} - -// mutex for page profile -uv_mutex_t page_profile_lock; - -JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) -{ - uv_mutex_lock(&page_profile_lock); - const char *str = "Page profiler in unsupported in MMTk."; - ios_write(stream, str, strlen(str)); - uv_mutex_unlock(&page_profile_lock); -} - -// this seems to be needed by the gc tests -#define JL_GC_N_MAX_POOLS 51 -JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS]; - -STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT -{ - // FIXME: MMTk would have to provide its own stats -} - -#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants - -JL_DLLEXPORT uint64_t jl_get_pg_size(void) -{ - return MMTK_GC_PAGE_SZ; -} - - -extern void mmtk_store_obj_size_c(void* obj, size_t size); - -inline void maybe_collect(jl_ptls_t ptls) -{ - // Just do a safe point for general maybe_collect - jl_gc_safepoint_(ptls); -} - -// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll), -// is expensive. So we only check for every few allocations. -static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) -{ - // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to - // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage - // as much as we can. - if (ptls->gc_tls.malloc_sz_since_last_poll > 4096) { - jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0); - mmtk_gc_poll(ptls); - } else { - jl_atomic_fetch_add_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, sz); - jl_gc_safepoint_(ptls); - } + // FIXME: Similarly, for now, we do nothing + // but when supporting moving, this is where we set the valid object (VO) bit + // and log (old gen) bit } // allocation wrappers that track allocation and let collection run - JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); @@ -601,7 +453,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } - JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty) { // safepoint @@ -628,11 +479,6 @@ JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, siz return v; } -void jl_gc_notify_image_load(const char* img_data, size_t len) -{ - mmtk_set_vm_space((void*)img_data, len); -} - JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t sz) { // safepoint @@ -735,6 +581,38 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) return b; } +void jl_gc_notify_image_load(const char* img_data, size_t len) +{ + mmtk_set_vm_space((void*)img_data, len); +} + +// mutex for page profile +uv_mutex_t page_profile_lock; + +JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) +{ + uv_mutex_lock(&page_profile_lock); + const char *str = "Page profiler in unsupported in MMTk."; + ios_write(stream, str, strlen(str)); + uv_mutex_unlock(&page_profile_lock); +} + +// this seems to be needed by the gc tests +#define JL_GC_N_MAX_POOLS 51 +JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS]; + +STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT +{ + // FIXME: MMTk would have to provide its own stats +} + +#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants + +JL_DLLEXPORT uint64_t jl_get_pg_size(void) +{ + return MMTK_GC_PAGE_SZ; +} + // Not used by mmtk // Number of GC threads that may run parallel marking int jl_n_markthreads; @@ -791,12 +669,7 @@ void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT { } -int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return 0; -} - -int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT { return 0; } diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h index 2eb5f2a6a44d9..64a1bae192445 100644 --- a/src/gc-tls-mmtk.h +++ b/src/gc-tls-mmtk.h @@ -9,6 +9,8 @@ extern "C" { #endif +// This mostly remove some fields that are not used by MMTk + typedef struct { // variable for tracking weak references small_arraylist_t weak_refs; From c26632ed5d2be1effebe86bfa5ca844195933095 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 01:20:29 +0000 Subject: [PATCH 084/116] Fixing merge conflicts --- src/gc-stock.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/gc-stock.c b/src/gc-stock.c index 5fd3b7efafead..078635f18e3ce 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3889,20 +3889,6 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) return sizeof(bigval_t); } -<<<<<<< HEAD -<<<<<<< HEAD - -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> c48a701f54 (WIP: Adding support for MMTk/Immix) -======= ->>>>>>> 0aee3ba32a (Refactoring to be considered before adding MMTk) -======= - ->>>>>>> 30ac6f081d (Fixing removed newlines) -======= ->>>>>>> 2efcdf8335 (Refactoring to be considered before adding MMTk) JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); From c283442edf340d882b13c9ec887a6d9bd44b2527 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 01:24:44 +0000 Subject: [PATCH 085/116] Applying changes from refactoring before adding MMTk --- src/gc-stock.h | 16 ++++++++++++++++ src/julia.h | 2 +- src/julia_internal.h | 2 +- src/stackwalk.c | 2 -- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/gc-stock.h b/src/gc-stock.h index 8e563f32ab9d3..6f75dcd014176 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -423,6 +423,21 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } +STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); @@ -699,4 +714,5 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect #endif + #endif // !MMTK_GC diff --git a/src/julia.h b/src/julia.h index db57db1fbeb38..abb8a57ff13b0 100644 --- a/src/julia.h +++ b/src/julia.h @@ -850,7 +850,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index d5013601a9124..e677f40907dfd 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1052,7 +1052,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; +extern _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/stackwalk.c b/src/stackwalk.c index e6fc2c7bbf56a..a1de3a6d61a07 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1294,8 +1294,6 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; -extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; -extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT From 01aa62331858a7810efbcf5857edfda990a93e72 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 28 Aug 2024 00:44:09 +0000 Subject: [PATCH 086/116] Refactoring to be considered before adding MMTk --- src/gc-common.c | 156 +++++++++++++++++++++++++++++++++++++++++++ src/gc-common.h | 6 ++ src/gc-debug.c | 41 +----------- src/gc-interface.h | 12 ++++ src/gc-stacks.c | 4 +- src/gc-stock.c | 156 ++++++++++++------------------------------- src/gc-stock.h | 21 ------ src/julia.h | 2 +- src/julia_internal.h | 26 +------- src/scheduler.c | 11 +++ src/stackwalk.c | 4 +- src/staticdata.c | 2 + 12 files changed, 237 insertions(+), 204 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index ee461b576ea9e..2ec167caa667a 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -20,6 +20,11 @@ extern "C" { jl_gc_num_t gc_num = {0}; +JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) +{ + return gc_num.total_time; +} + // =========================================================================== // // GC Callbacks // =========================================================================== // @@ -489,6 +494,87 @@ jl_ptls_t* gc_all_tls_states; // MISC // =========================================================================== // +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_new_weakref_th(ptls, value); +} + +JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc(ptls, sz, ty); +} + +JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sz, NULL); +} + +// allocation wrappers that save the size of allocations, to allow using +// jl_gc_counted_* functions with a libc-compatible API. + +JL_DLLEXPORT void *jl_malloc(size_t sz) +{ + int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); + if (p == NULL) + return NULL; + p[0] = sz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); + if (p == NULL) + return NULL; + p[0] = nmsz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +{ + if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) + return NULL; + return _unchecked_calloc(nm, sz); +} + +JL_DLLEXPORT void jl_free(void *p) +{ + if (p != NULL) { + int64_t *pp = (int64_t *)p - 2; + size_t sz = pp[0]; + jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); + } +} + +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +{ + int64_t *pp; + size_t szold; + if (p == NULL) { + pp = NULL; + szold = 0; + } + else { + pp = (int64_t *)p - 2; + szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; + } + int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); + if (pnew == NULL) + return NULL; + pnew[0] = sz; + return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +// allocator entry points + +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_(ptls, sz, ty); +} + const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { @@ -501,6 +587,76 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } +size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; + size_t sz = layout->size * m->length; + if (layout->flags.arrayelem_isunion) + // account for isbits Union array selector bytes + sz += m->length; + return sz; +} + +// tracking Memorys with malloc'd storage +void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ + // This is **NOT** a GC safe point. + mallocmemory_t *ma; + if (ptls->gc_tls.heap.mafreelist == NULL) { + ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); + } + else { + ma = ptls->gc_tls.heap.mafreelist; + ptls->gc_tls.heap.mafreelist = ma->next; + } + ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); + ma->next = ptls->gc_tls.heap.mallocarrays; + ptls->gc_tls.heap.mallocarrays = ma; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + +// gc-debug common functions +// --- + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + #ifdef __cplusplus } #endif diff --git a/src/gc-common.h b/src/gc-common.h index 4d53830442a7d..154b9659e9ccb 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -53,6 +53,12 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure; // malloc wrappers, aligned allocation // =========================================================================== // +// data structure for tracking malloc'd genericmemory. +typedef struct _mallocmemory_t { + jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory + struct _mallocmemory_t *next; +} mallocmemory_t; + #if defined(_OS_WINDOWS_) STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { diff --git a/src/gc-debug.c b/src/gc-debug.c index 19dd93af5f236..d05fb4b49e9f7 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,46 +1105,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} +extern int gc_logging_enabled; void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { diff --git a/src/gc-interface.h b/src/gc-interface.h index e543b4b5879f1..682f22344d69d 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -128,6 +128,13 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void); // Allocation // ========================================================================= // +// On GCC, this function is inlined when sz is constant (see julia_internal.h) +// In general, this function should implement allocation and should use the specific GC's logic +// to decide whether to allocate a small or a large object. Finally, note that this function +// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record +// an allocation of that type in the allocation profiler. +struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); + // Allocates small objects and increments Julia allocation counterst. Size of the object // header must be included in the object size. The (possibly unused in some implementations) // offset to the arena in which we're allocating is passed in the second parameter, and the @@ -211,6 +218,11 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; +// This function notifies the GC about memory addresses that are set when loading the boot image. +// The GC may use that information to, for instance, determine that such objects should +// be treated as marked and belonged to the old generation in nursery collections. +void jl_gc_notify_image_load(const char* img_data, size_t len); + // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 783129ea97693..8c44b65284386 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -46,7 +46,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { VirtualFree(stkbuf, 0, MEM_RELEASE); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); @@ -81,7 +81,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT return stk; } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { munmap(stkbuf, bufsz); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); diff --git a/src/gc-stock.c b/src/gc-stock.c index 6b97881909bbd..6ebac8a0c079e 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -555,24 +555,6 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT gc_time_big_end(); } -// tracking Memorys with malloc'd storage - -void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ - // This is **NOT** a GC safe point. - mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { - ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); - } - else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; - } - ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; -} - - void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; @@ -649,17 +631,6 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT reset_thread_gc_counts(); } -size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT -{ - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; - size_t sz = layout->size * m->length; - if (layout->flags.arrayelem_isunion) - // account for isbits Union array selector bytes - sz += m->length; - return sz; -} - - static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT { assert(jl_is_genericmemory(v)); @@ -818,6 +789,29 @@ jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) { return jl_gc_small_alloc_inner(ptls, offset, osize); } +// Size does NOT include the type tag!! +inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + int pool_id = jl_gc_szclass(allocsz); + jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; + int osize = jl_gc_sizeclasses[pool_id]; + // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in + // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) + v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_gc_big_alloc_noinline(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) @@ -2794,6 +2788,21 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } +int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -2832,11 +2841,6 @@ JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); } -JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) -{ - return gc_num.total_time; -} - JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; @@ -3397,13 +3401,6 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) gc_mark_roots(mq); } -// allocator entry points - -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc_(ptls, sz, ty); -} - // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { @@ -3685,63 +3682,6 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size return data; } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) -{ - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) -{ - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); -} - -JL_DLLEXPORT void jl_free(void *p) -{ - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } -} - -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) -{ - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - // allocating blocks for Arrays and Strings JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) @@ -3875,18 +3815,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - -JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sz, NULL); -} - JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { if (jl_is_initialized()) { @@ -4014,14 +3942,14 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) } -JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { - return jl_gc_alloc(ptls, sz, ty); + arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +void jl_gc_notify_image_load(const char* img_data, size_t len) { - arraylist_push(&ptls->gc_tls.sweep_objs, obj); + // Do nothing } #ifdef __cplusplus diff --git a/src/gc-stock.h b/src/gc-stock.h index 46f7d3e11e105..cc661ce6e1600 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -106,12 +106,6 @@ JL_EXTENSION typedef struct _bigval_t { // must be 64-byte aligned here, in 32 & 64 bit modes } bigval_t; -// data structure for tracking malloc'd genericmemory. -typedef struct _mallocmemory_t { - jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory - struct _mallocmemory_t *next; -} mallocmemory_t; - // pool page metadata typedef struct _jl_gc_pagemeta_t { // next metadata structure in per-thread list @@ -428,21 +422,6 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } -STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; -} - STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index ed3d9bf825658..b74de3060d26a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -858,7 +858,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index 20d90fede3d5e..04857d440b643 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -513,30 +513,6 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE #define GC_MAX_SZCLASS (2032-sizeof(void*)) static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); - -// Size does NOT include the type tag!! -STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) -{ - jl_value_t *v; - const size_t allocsz = sz + sizeof(jl_taggedvalue_t); - if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); - jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; - int osize = jl_gc_sizeclasses[pool_id]; - // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in - // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) - v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); - } - else { - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - v = jl_gc_big_alloc_noinline(ptls, allocsz); - } - jl_set_typeof(v, ty); - maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); - return v; -} - /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a * gc frame, until it has been fully initialized. An uninitialized value in a * gc frame can crash upon encountering the first safepoint. By delaying use of @@ -1077,7 +1053,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern _Atomic(uint32_t) jl_gc_disable_counter; +extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..b85a481588e4f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,9 +80,20 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } +<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; +======= +// parallel task runtime +// --- + +JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return cong(max, &ptls->rngseed); +} +>>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) diff --git a/src/stackwalk.c b/src/stackwalk.c index 6aa36fa8b499c..5f28b61c4a8fe 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -5,7 +5,7 @@ utilities for walking the stack and looking up information about code addresses */ #include -#include "gc-stock.h" +#include "gc-common.h" #include "julia.h" #include "julia_internal.h" #include "threading.h" @@ -1294,6 +1294,8 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; +extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; +extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT diff --git a/src/staticdata.c b/src/staticdata.c index 0a8cbe6db7c67..bba35e6dcb5f9 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -657,6 +657,7 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -4054,6 +4055,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From e10e3caef963bd1086deb3fb7d42f014ca2a3771 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 29 Aug 2024 04:57:59 +0000 Subject: [PATCH 087/116] Removing jl_gc_notify_image_load, since it's a new function and not part of the refactoring --- src/gc-interface.h | 5 ----- src/gc-stock.c | 5 ----- src/staticdata.c | 2 -- 3 files changed, 12 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 682f22344d69d..25ffed4524f0c 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -218,11 +218,6 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; -// This function notifies the GC about memory addresses that are set when loading the boot image. -// The GC may use that information to, for instance, determine that such objects should -// be treated as marked and belonged to the old generation in nursery collections. -void jl_gc_notify_image_load(const char* img_data, size_t len); - // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stock.c b/src/gc-stock.c index 6ebac8a0c079e..88b201a687eba 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3947,11 +3947,6 @@ JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *o arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -void jl_gc_notify_image_load(const char* img_data, size_t len) -{ - // Do nothing -} - #ifdef __cplusplus } #endif diff --git a/src/staticdata.c b/src/staticdata.c index bba35e6dcb5f9..0a8cbe6db7c67 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -657,7 +657,6 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -4055,7 +4054,6 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From d4c4360ab89dc9052cd87933b1f4b9e3581f4daa Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 2 Sep 2024 01:27:08 +0000 Subject: [PATCH 088/116] Moving gc_enable code to gc-common.c --- src/gc-common.c | 30 ++++++++++++++++++++++++++++++ src/gc-stock.c | 30 ------------------------------ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 2ec167caa667a..03c046bc300f2 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -613,6 +613,36 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// collector entry point and control +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) +{ + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + } + } + else if (prev && !on) { + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + int gc_logging_enabled = 0; JL_DLLEXPORT void jl_enable_gc_logging(int enable) { diff --git a/src/gc-stock.c b/src/gc-stock.c index 88b201a687eba..55499bce61182 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2803,36 +2803,6 @@ int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT return tid == concurrent_collector_thread_id; } -// collector entry point and control -_Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) -{ - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; - } - } - else if (prev && !on) { - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; -} - -JL_DLLEXPORT int jl_gc_is_enabled(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT { jl_gc_num_t num = gc_num; From d07cae75b0b36b34a1b5150feab2b52d62a0c1ad Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 16 Sep 2024 06:38:02 +0000 Subject: [PATCH 089/116] Addressing PR comments --- src/gc-common.c | 134 +++++++++++++++++++++++++------------------ src/gc-common.h | 6 ++ src/gc-debug.c | 2 - src/gc-interface.h | 30 +--------- src/gc-stock.c | 18 +----- src/gc-stock.h | 15 +++++ src/julia.h | 2 +- src/julia_internal.h | 4 +- src/stackwalk.c | 10 +--- 9 files changed, 110 insertions(+), 111 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 03c046bc300f2..046feae6aa4c5 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -491,15 +491,9 @@ int gc_n_threads; jl_ptls_t* gc_all_tls_states; // =========================================================================== // -// MISC +// Allocation // =========================================================================== // -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) { return jl_gc_alloc(ptls, sz, ty); @@ -575,17 +569,9 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) return jl_gc_alloc_(ptls, sz, ty); } -const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 -JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT -{ - return jl_buff_tag; -} - -// callback for passing OOM errors from gmp -JL_DLLEXPORT void jl_throw_out_of_memory_error(void) -{ - jl_throw(jl_memory_exception); -} +// =========================================================================== // +// Generic Memory +// =========================================================================== // size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT { @@ -613,6 +599,66 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// =========================================================================== // +// GC Debug +// =========================================================================== // + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + +// =========================================================================== // +// GC Control +// =========================================================================== // + +JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) { + return jl_atomic_load_acquire(&jl_gc_disable_counter); +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -637,54 +683,30 @@ JL_DLLEXPORT int jl_gc_enable(int on) return prev; } -JL_DLLEXPORT int jl_gc_is_enabled(void) +// =========================================================================== // +// MISC +// =========================================================================== // + +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) { jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - -int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; + return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; +JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { + return ijl_small_typeof; } -// gc-debug common functions -// --- - -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; + return jl_buff_tag; } -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +// callback for passing OOM errors from gmp +JL_DLLEXPORT void jl_throw_out_of_memory_error(void) { - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; + jl_throw(jl_memory_exception); } #ifdef __cplusplus diff --git a/src/gc-common.h b/src/gc-common.h index 154b9659e9ccb..32b7470b13a58 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -179,4 +179,10 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; +// =========================================================================== // +// Logging +// =========================================================================== // + +extern int gc_logging_enabled; + #endif // JL_GC_COMMON_H diff --git a/src/gc-debug.c b/src/gc-debug.c index d05fb4b49e9f7..7c479484cde45 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,8 +1105,6 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -extern int gc_logging_enabled; - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; diff --git a/src/gc-interface.h b/src/gc-interface.h index 25ffed4524f0c..0e9ce32697f35 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -94,6 +94,8 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); // should run a collection cycle again (e.g. a full mark right after a full sweep to ensure // we do a full heap traversal). JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); +// Returns whether the thread with `tid` is a collector thread +JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; // ========================================================================= // // Metrics @@ -162,26 +164,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz); JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz); // Wrapper around Libc realloc that updates Julia allocation counters. JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); -// Wrapper around Libc malloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_malloc(size_t sz); -// Wrapper around Libc calloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz); -// Wrapper around Libc free that takes a pointer to the payload of a memory region allocated -// with jl_malloc or jl_calloc, and uses the size information stored in the first machine -// words of the memory buffer update Julia allocation counters, and then frees the -// corresponding memory buffer. -JL_DLLEXPORT void jl_free(void *p); -// Wrapper around Libc realloc that takes a memory region allocated with jl_malloc or -// jl_calloc, and uses the size information stored in the first machine words of the memory -// buffer to update Julia allocation counters, reallocating the corresponding memory buffer -// in the end. -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz); // Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and // Strings. It increments Julia allocation counters and should check whether we're close to // the Julia heap target, and therefore, whether we should run a collection. Note that this @@ -195,14 +177,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); // thread-local allocator of the thread referenced by the first jl_ptls_t argument. JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls, struct _jl_value_t *value); -// Allocates a new weak-reference, assigns its value and increments Julia allocation -// counters. If thread-local allocators are used, then this function should allocate in the -// thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref(struct _jl_value_t *value); -// Allocates an object whose size is specified by the function argument and increments Julia -// allocation counters. If thread-local allocators are used, then this function should -// allocate in the thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_value_t *jl_gc_allocobj(size_t sz); // Permanently allocates a memory slot of the size specified by the first parameter. This // block of memory is allocated in an immortal region that is never swept. The second // parameter specifies whether the memory should be filled with zeros. The third and fourth diff --git a/src/gc-stock.c b/src/gc-stock.c index 55499bce61182..b345fe08ff69c 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2788,19 +2788,8 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; +int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT { + return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid); } JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT @@ -3193,8 +3182,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // free empty GC state for threads that have exited if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit - assert(!gc_is_parallel_collector_thread(t_i)); - assert(!gc_is_concurrent_collector_thread(t_i)); + assert(!gc_is_collector_thread(t_i)); jl_thread_heap_t *heap = &ptls2->gc_tls.heap; if (heap->weak_refs.len == 0) small_arraylist_free(&heap->weak_refs); diff --git a/src/gc-stock.h b/src/gc-stock.h index cc661ce6e1600..0f8d1eee67581 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -422,6 +422,21 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } +STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index b74de3060d26a..ed3d9bf825658 100644 --- a/src/julia.h +++ b/src/julia.h @@ -858,7 +858,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index 04857d440b643..c079c06f0189a 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -367,6 +367,8 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; +extern void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; + JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; @@ -1053,7 +1055,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; +extern _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/stackwalk.c b/src/stackwalk.c index 5f28b61c4a8fe..a1de3a6d61a07 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1294,8 +1294,6 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; -extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; -extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT @@ -1304,12 +1302,8 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); for (size_t i = 0; i < nthreads; i++) { jl_ptls_t ptls2 = allstates[i]; - if (gc_is_parallel_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for parallel GC thread %zu\n", i + 1); - continue; - } - if (gc_is_concurrent_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for concurrent GC thread %zu\n", i + 1); + if (gc_is_collector_thread(i)) { + jl_safe_printf("==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1); continue; } if (ptls2 == NULL) { From 8e15217b8a5eaea51335f6b7577ba929905a4a54 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 19 Sep 2024 04:18:13 +0000 Subject: [PATCH 090/116] Push resolution of merge conflict --- src/scheduler.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index b85a481588e4f..bb2f85b52283f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,20 +80,9 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; -======= -// parallel task runtime -// --- - -JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return cong(max, &ptls->rngseed); -} ->>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) From 0cb0784a43aa01803b73407c90bd5ee44d09531f Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 01:10:31 +0000 Subject: [PATCH 091/116] Removing jl_gc_mark_queue_obj_explicit extern definition from scheduler.c --- src/scheduler.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..7e23f654c2566 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,10 +80,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -// GC functions used -extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, - jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; - // initialize the threading infrastructure // (called only by the main thread) void jl_init_threadinginfra(void) From 12634f36d67bd9c8275feda1e2729b0910ca2664 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 02:50:25 +0000 Subject: [PATCH 092/116] Don't need the getter function since it's possible to use jl_small_typeof directly --- src/gc-common.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 046feae6aa4c5..417f12f26d64d 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -693,10 +693,6 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { - return ijl_small_typeof; -} - const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { From aa8093328cf5f70d9df78fda2315b077a76e4d8b Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 27 Sep 2024 00:49:07 +0000 Subject: [PATCH 093/116] Remove extern from free_stack declaration in julia_internal.h --- src/julia_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index c079c06f0189a..6fd537ed6baf8 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -367,7 +367,7 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; -extern void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; From 7ce3fe392616d4da1035de6b02a21056f05072b6 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 09:12:49 +0000 Subject: [PATCH 094/116] Putting everything that is common GC tls into gc-tls-common.h --- src/gc-common.c | 10 +-- src/gc-stacks.c | 18 +++--- src/gc-stock.c | 154 ++++++++++++++++++++++---------------------- src/gc-tls-common.h | 52 +++++++++++++++ src/gc-tls.h | 25 ------- src/julia_threads.h | 2 + src/stackwalk.c | 2 +- 7 files changed, 147 insertions(+), 116 deletions(-) create mode 100644 src/gc-tls-common.h diff --git a/src/gc-common.c b/src/gc-common.c index 417f12f26d64d..6ce455d3923ad 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -587,16 +587,16 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ // This is **NOT** a GC safe point. mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { + if (ptls->gc_tls_common.heap.mafreelist == NULL) { ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); } else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; + ma = ptls->gc_tls_common.heap.mafreelist; + ptls->gc_tls_common.heap.mafreelist = ma->next; } ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; + ma->next = ptls->gc_tls_common.heap.mallocarrays; + ptls->gc_tls_common.heap.mallocarrays = ma; } // =========================================================================== // diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 8c44b65284386..a8fec938456a3 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -131,7 +131,7 @@ void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(bufsz); if (pool_sizes[pool_id] == bufsz) { - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); return; } } @@ -160,7 +160,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task) #ifdef _COMPILER_ASAN_ENABLED_ __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz); #endif - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); } } } @@ -175,7 +175,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(ssize); ssize = pool_sizes[pool_id]; - small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id]; + small_arraylist_t *pool = &ptls->gc_tls_common.heap.free_stacks[pool_id]; if (pool->len > 0) { stk = small_arraylist_pop(pool); } @@ -196,7 +196,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO } *bufsz = ssize; if (owner) { - small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls->gc_tls_common.heap.live_tasks; mtarraylist_push(live_tasks, owner); } return stk; @@ -223,7 +223,7 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT // free half of stacks that remain unused since last sweep for (int p = 0; p < JL_N_STACK_POOLS; p++) { - small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p]; + small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p]; size_t n_to_free; if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { n_to_free = al->len; // not alive yet or dead, so it does not need these anymore @@ -245,10 +245,10 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT } } if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { - small_arraylist_free(ptls2->gc_tls.heap.free_stacks); + small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks); } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = 0; size_t ndel = 0; size_t l = live_tasks->len; @@ -299,7 +299,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) jl_ptls_t ptls2 = allstates[i]; if (ptls2 == NULL) continue; - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); l += n + (ptls2->root_task->ctx.stkbuf != NULL); } @@ -318,7 +318,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) goto restart; jl_array_data(a,void*)[j++] = t; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); for (size_t i = 0; i < n; i++) { jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i); diff --git a/src/gc-stock.c b/src/gc-stock.c index b345fe08ff69c..8e040c9b25dcf 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -357,7 +357,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *valu jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here - small_arraylist_push(&ptls->gc_tls.heap.weak_refs, wr); + small_arraylist_push(&ptls->gc_tls_common.heap.weak_refs, wr); return wr; } @@ -367,8 +367,8 @@ static void clear_weak_refs(void) for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) { - size_t n, l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t n, l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; for (n = 0; n < l; n++) { jl_weakref_t *wr = (jl_weakref_t*)lst[n]; if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc)) @@ -386,8 +386,8 @@ static void sweep_weak_refs(void) if (ptls2 != NULL) { size_t n = 0; size_t ndel = 0; - size_t l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; if (l == 0) continue; while (1) { @@ -402,7 +402,7 @@ static void sweep_weak_refs(void) lst[n] = lst[n + ndel]; lst[n + ndel] = tmp; } - ptls2->gc_tls.heap.weak_refs.len -= ndel; + ptls2->gc_tls_common.heap.weak_refs.len -= ndel; } } } @@ -410,18 +410,18 @@ static void sweep_weak_refs(void) STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz; + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc) + sz; if (alloc_acc < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, alloc_acc); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, alloc_acc); else { jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); } } STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc) + sz); } // big value list @@ -442,10 +442,10 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) jl_throw(jl_memory_exception); gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t, gc_cblist_notify_external_alloc, (v, allocsz)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef MEMDEBUG memset(v, 0xee, allocsz); @@ -558,8 +558,8 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); jl_batch_accum_heap_size(ptls, sz); } @@ -578,18 +578,18 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTS for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls) { - dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval); - dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc); - dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc); - dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc); - dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); if (update_heap) { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc); - freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc); + freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -605,13 +605,13 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls != NULL) { // don't reset `pool_live_bytes` here - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -654,8 +654,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; if (ptls2 != NULL) { - mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays; - mallocmemory_t **pma = &ptls2->gc_tls.heap.mallocarrays; + mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays; + mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays; while (ma != NULL) { mallocmemory_t *nxt = ma->next; jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1); @@ -667,8 +667,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT *pma = nxt; int isaligned = (uintptr_t)ma->a & 1; jl_gc_free_memory(a, isaligned); - ma->next = ptls2->gc_tls.heap.mafreelist; - ptls2->gc_tls.heap.mafreelist = ma; + ma->next = ptls2->gc_tls_common.heap.mafreelist; + ptls2->gc_tls_common.heap.mafreelist = ma; } gc_time_count_mallocd_memory(bits); ma = nxt; @@ -729,12 +729,12 @@ STATIC_INLINE jl_value_t *jl_gc_small_alloc_inner(jl_ptls_t ptls, int offset, return jl_gc_big_alloc(ptls, osize, NULL); #endif maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc) + 1); // first try to use the freelist jl_taggedvalue_t *v = p->freelist; if (v != NULL) { @@ -971,8 +971,8 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_ // instead of adding it to the thread that originally allocated the page, so we can avoid // an atomic-fetch-add here. size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + delta); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + delta); jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize); } @@ -1228,7 +1228,7 @@ static void gc_sweep_pool(void) } continue; } - jl_atomic_store_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes, 0); + jl_atomic_store_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes, 0); for (int i = 0; i < JL_GC_N_POOLS; i++) { jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i]; jl_taggedvalue_t *last = p->freelist; @@ -2834,7 +2834,7 @@ JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) for (int i = 0; i < n_threads; i++) { jl_ptls_t ptls2 = all_tls_states[i]; if (ptls2 != NULL) { - pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes); + pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes); } } return pool_live_bytes; @@ -3183,11 +3183,12 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit assert(!gc_is_collector_thread(t_i)); + jl_thread_heap_common_t *common_heap = &ptls2->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls2->gc_tls.heap; - if (heap->weak_refs.len == 0) - small_arraylist_free(&heap->weak_refs); - if (heap->live_tasks.len == 0) - small_arraylist_free(&heap->live_tasks); + if (common_heap->weak_refs.len == 0) + small_arraylist_free(&common_heap->weak_refs); + if (common_heap->live_tasks.len == 0) + small_arraylist_free(&common_heap->live_tasks); if (heap->remset.len == 0) arraylist_free(&heap->remset); if (ptls2->finalizers.len == 0) @@ -3256,8 +3257,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { - size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; @@ -3362,6 +3363,7 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { + jl_thread_heap_common_t *common_heap = &ptls->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls->gc_tls.heap; jl_gc_pool_t *p = heap->norm_pools; for (int i = 0; i < JL_GC_N_POOLS; i++) { @@ -3369,12 +3371,12 @@ void jl_init_thread_heap(jl_ptls_t ptls) p[i].freelist = NULL; p[i].newpages = NULL; } - small_arraylist_new(&heap->weak_refs, 0); - small_arraylist_new(&heap->live_tasks, 0); + small_arraylist_new(&common_heap->weak_refs, 0); + small_arraylist_new(&common_heap->live_tasks, 0); for (int i = 0; i < JL_N_STACK_POOLS; i++) - small_arraylist_new(&heap->free_stacks[i], 0); - heap->mallocarrays = NULL; - heap->mafreelist = NULL; + small_arraylist_new(&common_heap->free_stacks[i], 0); + common_heap->mallocarrays = NULL; + common_heap->mafreelist = NULL; heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag; @@ -3400,8 +3402,8 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&q->array, wsa2); arraylist_new(&mq->reclaim_set, 32); - memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); } void jl_free_thread_gc_state(jl_ptls_t ptls) @@ -3579,10 +3581,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz); } return data; @@ -3596,10 +3598,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + nm*sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + nm*sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz * nm); } return data; @@ -3624,10 +3626,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); if (!(sz < old)) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + (sz - old)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1); int64_t diff = sz - old; if (diff < 0) { @@ -3658,10 +3660,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) if (b == NULL) jl_throw(jl_memory_exception); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef _OS_WINDOWS_ SetLastError(last_error); diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h new file mode 100644 index 0000000000000..28fbf2d0c448e --- /dev/null +++ b/src/gc-tls-common.h @@ -0,0 +1,52 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +// Meant to be included in "julia_threads.h" +#ifndef JL_GC_TLS_COMMON_H +#define JL_GC_TLS_COMMON_H + +#include "julia_atomics.h" + +// GC threading ------------------------------------------------------------------ + +#include "arraylist.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + // variable for tracking weak references + small_arraylist_t weak_refs; + // live tasks started on this thread + // that are holding onto a stack from the pool + small_arraylist_t live_tasks; + + // variables for tracking malloc'd arrays + struct _mallocmemory_t *mallocarrays; + struct _mallocmemory_t *mafreelist; + +#define JL_N_STACK_POOLS 16 + small_arraylist_t free_stacks[JL_N_STACK_POOLS]; +} jl_thread_heap_common_t; + +typedef struct { + _Atomic(int64_t) allocd; + _Atomic(int64_t) pool_live_bytes; + _Atomic(uint64_t) malloc; + _Atomic(uint64_t) realloc; + _Atomic(uint64_t) poolalloc; + _Atomic(uint64_t) bigalloc; + _Atomic(int64_t) free_acc; + _Atomic(uint64_t) alloc_acc; +} jl_thread_gc_num_common_t; + +typedef struct { + jl_thread_heap_common_t heap; + jl_thread_gc_num_common_t gc_num; +} jl_gc_tls_states_common_t; + +#ifdef __cplusplus +} +#endif + +#endif // JL_GC_TLS_H diff --git a/src/gc-tls.h b/src/gc-tls.h index 9e4b09404db84..ecc815805a98b 100644 --- a/src/gc-tls.h +++ b/src/gc-tls.h @@ -21,16 +21,6 @@ typedef struct { } jl_gc_pool_t; typedef struct { - // variable for tracking weak references - small_arraylist_t weak_refs; - // live tasks started on this thread - // that are holding onto a stack from the pool - small_arraylist_t live_tasks; - - // variables for tracking malloc'd arrays - struct _mallocmemory_t *mallocarrays; - struct _mallocmemory_t *mafreelist; - // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects struct _bigval_t *young_generation_of_bigvals; @@ -42,22 +32,8 @@ typedef struct { // variables for allocating objects from pools #define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h` jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS]; - -#define JL_N_STACK_POOLS 16 - small_arraylist_t free_stacks[JL_N_STACK_POOLS]; } jl_thread_heap_t; -typedef struct { - _Atomic(int64_t) allocd; - _Atomic(int64_t) pool_live_bytes; - _Atomic(uint64_t) malloc; - _Atomic(uint64_t) realloc; - _Atomic(uint64_t) poolalloc; - _Atomic(uint64_t) bigalloc; - _Atomic(int64_t) free_acc; - _Atomic(uint64_t) alloc_acc; -} jl_thread_gc_num_t; - typedef struct { ws_queue_t chunk_queue; ws_queue_t ptr_queue; @@ -78,7 +54,6 @@ typedef struct { typedef struct { jl_thread_heap_t heap; jl_gc_page_stack_t page_metadata_allocd; - jl_thread_gc_num_t gc_num; jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; _Atomic(size_t) gc_sweeps_requested; diff --git a/src/julia_threads.h b/src/julia_threads.h index b697a0bf030ed..fcc28591658cb 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -5,6 +5,7 @@ #define JL_THREADS_H #include "gc-tls.h" +#include "gc-tls-common.h" #include "julia_atomics.h" #ifndef _OS_WINDOWS_ #include "pthread.h" @@ -155,6 +156,7 @@ typedef struct _jl_tls_states_t { // Counter to disable finalizer **on the current thread** int finalizers_inhibited; jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen + jl_gc_tls_states_common_t gc_tls_common; // common tls for both GCs volatile sig_atomic_t defer_signal; _Atomic(struct _jl_task_t*) current_task; struct _jl_task_t *next_task; diff --git a/src/stackwalk.c b/src/stackwalk.c index a1de3a6d61a07..0988d7a833c94 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1309,7 +1309,7 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT if (ptls2 == NULL) { continue; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); int t_state = JL_TASK_STATE_DONE; jl_task_t *t = ptls2->root_task; From 048af72dee003a3ded89c3bf6c6572f97cb2678a Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 09:14:24 +0000 Subject: [PATCH 095/116] Typo --- src/gc-tls-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h index 28fbf2d0c448e..ba36f5c1c238e 100644 --- a/src/gc-tls-common.h +++ b/src/gc-tls-common.h @@ -49,4 +49,4 @@ typedef struct { } #endif -#endif // JL_GC_TLS_H +#endif // JL_GC_TLS_COMMON_H From fe61c2232d997da0ebd3b936a469024acff7afbb Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 22:46:39 +0000 Subject: [PATCH 096/116] Adding gc-tls-common.h to Makefile as a public header --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index a6b1f433b73ce..80bbdbcff67fc 100644 --- a/src/Makefile +++ b/src/Makefile @@ -103,7 +103,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0) UV_HEADERS += uv.h UV_HEADERS += uv/*.h endif -PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) +PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) ifeq ($(OS),WINNT) PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h) endif From 380fd833efba491cb167ad9c61909199e14098d8 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 23:26:33 +0000 Subject: [PATCH 097/116] Removing gc-tls-common fields from gc-tls-mmtk.h --- src/gc-mmtk.c | 58 +++++++++++++++++------------------ src/gc-tls-mmtk.h | 30 ------------------ src/llvm-late-gc-lowering.cpp | 2 +- 3 files changed, 30 insertions(+), 60 deletions(-) diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index 98a5612871be0..aa010c73b27d2 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -108,7 +108,7 @@ void jl_start_gc_threads(void) { } void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT { - jl_thread_heap_t *heap = &ptls->gc_tls.heap; + jl_thread_heap_common_t *heap = &ptls->gc_tls_common.heap; small_arraylist_new(&heap->weak_refs, 0); small_arraylist_new(&heap->live_tasks, 0); for (int i = 0; i < JL_N_STACK_POOLS; i++) @@ -124,7 +124,7 @@ void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT { memcpy(&ptls->gc_tls.mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext)); // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed) mmtk_post_bind_mutator(&ptls->gc_tls.mmtk_mutator, mmtk_mutator); - memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); + memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num)); } void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls) { @@ -162,8 +162,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { - size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; @@ -186,15 +186,15 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTS for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls) { - dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval); - dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc); - dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc); - dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc); - dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); if (update_heap) { - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -211,13 +211,13 @@ void reset_thread_gc_counts(void) JL_NOTSAFEPOINT jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls != NULL) { // don't reset `pool_live_bytes` here - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -257,8 +257,8 @@ JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) { void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); } void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT @@ -473,8 +473,8 @@ JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, siz mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align)); } - ptls->gc_tls.gc_num.allocd += osize; - ptls->gc_tls.gc_num.poolalloc++; + ptls->gc_tls_common.gc_num.allocd += osize; + ptls->gc_tls_common.gc_num.poolalloc++; return v; } @@ -502,8 +502,8 @@ JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t sz) } v->sz = allocsz; - ptls->gc_tls.gc_num.allocd += allocsz; - ptls->gc_tls.gc_num.bigalloc++; + ptls->gc_tls_common.gc_num.allocd += allocsz; + ptls->gc_tls_common.gc_num.bigalloc++; jl_value_t *result = jl_valueof(&v->header); mmtk_post_alloc(&ptls->gc_tls.mmtk_mutator, result, allocsz, 2); @@ -565,10 +565,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) if (b == NULL) jl_throw(jl_memory_exception); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); // FIXME: Should these be part of mmtk's heap? // malloc_maybe_collect(ptls, sz); // jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, allocsz); diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h index 64a1bae192445..7b1b249cd8ae3 100644 --- a/src/gc-tls-mmtk.h +++ b/src/gc-tls-mmtk.h @@ -9,37 +9,7 @@ extern "C" { #endif -// This mostly remove some fields that are not used by MMTk - -typedef struct { - // variable for tracking weak references - small_arraylist_t weak_refs; - // live tasks started on this thread - // that are holding onto a stack from the pool - small_arraylist_t live_tasks; - - // variables for tracking malloc'd arrays - struct _mallocmemory_t *mallocarrays; - struct _mallocmemory_t *mafreelist; - -#define JL_N_STACK_POOLS 16 - small_arraylist_t free_stacks[JL_N_STACK_POOLS]; -} jl_thread_heap_t; - -typedef struct { - _Atomic(int64_t) allocd; - _Atomic(int64_t) pool_live_bytes; - _Atomic(uint64_t) malloc; - _Atomic(uint64_t) realloc; - _Atomic(uint64_t) poolalloc; - _Atomic(uint64_t) bigalloc; - _Atomic(int64_t) free_acc; - _Atomic(uint64_t) alloc_acc; -} jl_thread_gc_num_t; - typedef struct { - jl_thread_heap_t heap; - jl_thread_gc_num_t gc_num; MMTkMutatorContext mmtk_mutator; size_t malloc_sz_since_last_poll; } jl_gc_tls_states_t; diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index d395771f6df0c..4b7dc0ec855a7 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2528,7 +2528,7 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) builder.CreateStore(new_cursor, cursor_ptr); // ptls->gc_tls.gc_num.allocd += osize; - auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls_common) + offsetof(jl_gc_tls_states_common_t, gc_num)); auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); From ebf478ad2783571684e64fa41c7868d40b105985 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 28 Aug 2024 00:44:09 +0000 Subject: [PATCH 098/116] Refactoring to be considered before adding MMTk --- src/gc-common.c | 156 +++++++++++++++++++++++++++++++++++++++++++ src/gc-common.h | 6 ++ src/gc-debug.c | 41 +----------- src/gc-interface.h | 12 ++++ src/gc-stacks.c | 4 +- src/gc-stock.c | 156 ++++++++++++------------------------------- src/gc-stock.h | 21 ------ src/julia.h | 2 +- src/julia_internal.h | 26 +------- src/scheduler.c | 11 +++ src/stackwalk.c | 4 +- src/staticdata.c | 2 + 12 files changed, 237 insertions(+), 204 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index ee461b576ea9e..2ec167caa667a 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -20,6 +20,11 @@ extern "C" { jl_gc_num_t gc_num = {0}; +JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) +{ + return gc_num.total_time; +} + // =========================================================================== // // GC Callbacks // =========================================================================== // @@ -489,6 +494,87 @@ jl_ptls_t* gc_all_tls_states; // MISC // =========================================================================== // +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_new_weakref_th(ptls, value); +} + +JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc(ptls, sz, ty); +} + +JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sz, NULL); +} + +// allocation wrappers that save the size of allocations, to allow using +// jl_gc_counted_* functions with a libc-compatible API. + +JL_DLLEXPORT void *jl_malloc(size_t sz) +{ + int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); + if (p == NULL) + return NULL; + p[0] = sz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); + if (p == NULL) + return NULL; + p[0] = nmsz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +{ + if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) + return NULL; + return _unchecked_calloc(nm, sz); +} + +JL_DLLEXPORT void jl_free(void *p) +{ + if (p != NULL) { + int64_t *pp = (int64_t *)p - 2; + size_t sz = pp[0]; + jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); + } +} + +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +{ + int64_t *pp; + size_t szold; + if (p == NULL) { + pp = NULL; + szold = 0; + } + else { + pp = (int64_t *)p - 2; + szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; + } + int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); + if (pnew == NULL) + return NULL; + pnew[0] = sz; + return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +// allocator entry points + +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_(ptls, sz, ty); +} + const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { @@ -501,6 +587,76 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } +size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; + size_t sz = layout->size * m->length; + if (layout->flags.arrayelem_isunion) + // account for isbits Union array selector bytes + sz += m->length; + return sz; +} + +// tracking Memorys with malloc'd storage +void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ + // This is **NOT** a GC safe point. + mallocmemory_t *ma; + if (ptls->gc_tls.heap.mafreelist == NULL) { + ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); + } + else { + ma = ptls->gc_tls.heap.mafreelist; + ptls->gc_tls.heap.mafreelist = ma->next; + } + ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); + ma->next = ptls->gc_tls.heap.mallocarrays; + ptls->gc_tls.heap.mallocarrays = ma; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + +// gc-debug common functions +// --- + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + #ifdef __cplusplus } #endif diff --git a/src/gc-common.h b/src/gc-common.h index 4d53830442a7d..154b9659e9ccb 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -53,6 +53,12 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure; // malloc wrappers, aligned allocation // =========================================================================== // +// data structure for tracking malloc'd genericmemory. +typedef struct _mallocmemory_t { + jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory + struct _mallocmemory_t *next; +} mallocmemory_t; + #if defined(_OS_WINDOWS_) STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { diff --git a/src/gc-debug.c b/src/gc-debug.c index 19dd93af5f236..d05fb4b49e9f7 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,46 +1105,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} +extern int gc_logging_enabled; void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { diff --git a/src/gc-interface.h b/src/gc-interface.h index e543b4b5879f1..682f22344d69d 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -128,6 +128,13 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void); // Allocation // ========================================================================= // +// On GCC, this function is inlined when sz is constant (see julia_internal.h) +// In general, this function should implement allocation and should use the specific GC's logic +// to decide whether to allocate a small or a large object. Finally, note that this function +// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record +// an allocation of that type in the allocation profiler. +struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); + // Allocates small objects and increments Julia allocation counterst. Size of the object // header must be included in the object size. The (possibly unused in some implementations) // offset to the arena in which we're allocating is passed in the second parameter, and the @@ -211,6 +218,11 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; +// This function notifies the GC about memory addresses that are set when loading the boot image. +// The GC may use that information to, for instance, determine that such objects should +// be treated as marked and belonged to the old generation in nursery collections. +void jl_gc_notify_image_load(const char* img_data, size_t len); + // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 783129ea97693..8c44b65284386 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -46,7 +46,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { VirtualFree(stkbuf, 0, MEM_RELEASE); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); @@ -81,7 +81,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT return stk; } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { munmap(stkbuf, bufsz); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); diff --git a/src/gc-stock.c b/src/gc-stock.c index 6b97881909bbd..6ebac8a0c079e 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -555,24 +555,6 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT gc_time_big_end(); } -// tracking Memorys with malloc'd storage - -void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ - // This is **NOT** a GC safe point. - mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { - ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); - } - else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; - } - ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; -} - - void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; @@ -649,17 +631,6 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT reset_thread_gc_counts(); } -size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT -{ - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; - size_t sz = layout->size * m->length; - if (layout->flags.arrayelem_isunion) - // account for isbits Union array selector bytes - sz += m->length; - return sz; -} - - static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT { assert(jl_is_genericmemory(v)); @@ -818,6 +789,29 @@ jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) { return jl_gc_small_alloc_inner(ptls, offset, osize); } +// Size does NOT include the type tag!! +inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + int pool_id = jl_gc_szclass(allocsz); + jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; + int osize = jl_gc_sizeclasses[pool_id]; + // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in + // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) + v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_gc_big_alloc_noinline(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) @@ -2794,6 +2788,21 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } +int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -2832,11 +2841,6 @@ JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); } -JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) -{ - return gc_num.total_time; -} - JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; @@ -3397,13 +3401,6 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) gc_mark_roots(mq); } -// allocator entry points - -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc_(ptls, sz, ty); -} - // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { @@ -3685,63 +3682,6 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size return data; } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) -{ - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) -{ - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); -} - -JL_DLLEXPORT void jl_free(void *p) -{ - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } -} - -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) -{ - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - // allocating blocks for Arrays and Strings JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) @@ -3875,18 +3815,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - -JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sz, NULL); -} - JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { if (jl_is_initialized()) { @@ -4014,14 +3942,14 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) } -JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { - return jl_gc_alloc(ptls, sz, ty); + arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +void jl_gc_notify_image_load(const char* img_data, size_t len) { - arraylist_push(&ptls->gc_tls.sweep_objs, obj); + // Do nothing } #ifdef __cplusplus diff --git a/src/gc-stock.h b/src/gc-stock.h index 46f7d3e11e105..cc661ce6e1600 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -106,12 +106,6 @@ JL_EXTENSION typedef struct _bigval_t { // must be 64-byte aligned here, in 32 & 64 bit modes } bigval_t; -// data structure for tracking malloc'd genericmemory. -typedef struct _mallocmemory_t { - jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory - struct _mallocmemory_t *next; -} mallocmemory_t; - // pool page metadata typedef struct _jl_gc_pagemeta_t { // next metadata structure in per-thread list @@ -428,21 +422,6 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } -STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; -} - STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index ed3d9bf825658..b74de3060d26a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -858,7 +858,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index 20d90fede3d5e..04857d440b643 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -513,30 +513,6 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE #define GC_MAX_SZCLASS (2032-sizeof(void*)) static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); - -// Size does NOT include the type tag!! -STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) -{ - jl_value_t *v; - const size_t allocsz = sz + sizeof(jl_taggedvalue_t); - if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); - jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; - int osize = jl_gc_sizeclasses[pool_id]; - // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in - // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) - v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); - } - else { - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - v = jl_gc_big_alloc_noinline(ptls, allocsz); - } - jl_set_typeof(v, ty); - maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); - return v; -} - /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a * gc frame, until it has been fully initialized. An uninitialized value in a * gc frame can crash upon encountering the first safepoint. By delaying use of @@ -1077,7 +1053,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern _Atomic(uint32_t) jl_gc_disable_counter; +extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..b85a481588e4f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,9 +80,20 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } +<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; +======= +// parallel task runtime +// --- + +JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return cong(max, &ptls->rngseed); +} +>>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) diff --git a/src/stackwalk.c b/src/stackwalk.c index 6aa36fa8b499c..5f28b61c4a8fe 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -5,7 +5,7 @@ utilities for walking the stack and looking up information about code addresses */ #include -#include "gc-stock.h" +#include "gc-common.h" #include "julia.h" #include "julia_internal.h" #include "threading.h" @@ -1294,6 +1294,8 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; +extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; +extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT diff --git a/src/staticdata.c b/src/staticdata.c index 0a8cbe6db7c67..bba35e6dcb5f9 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -657,6 +657,7 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -4054,6 +4055,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From 0a8444ea6f539cdb63481f45411f42629c1c97e1 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 29 Aug 2024 04:57:59 +0000 Subject: [PATCH 099/116] Removing jl_gc_notify_image_load, since it's a new function and not part of the refactoring --- src/gc-interface.h | 5 ----- src/gc-stock.c | 5 ----- src/staticdata.c | 2 -- 3 files changed, 12 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 682f22344d69d..25ffed4524f0c 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -218,11 +218,6 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; -// This function notifies the GC about memory addresses that are set when loading the boot image. -// The GC may use that information to, for instance, determine that such objects should -// be treated as marked and belonged to the old generation in nursery collections. -void jl_gc_notify_image_load(const char* img_data, size_t len); - // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stock.c b/src/gc-stock.c index 6ebac8a0c079e..88b201a687eba 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3947,11 +3947,6 @@ JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *o arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -void jl_gc_notify_image_load(const char* img_data, size_t len) -{ - // Do nothing -} - #ifdef __cplusplus } #endif diff --git a/src/staticdata.c b/src/staticdata.c index bba35e6dcb5f9..0a8cbe6db7c67 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -657,7 +657,6 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -4055,7 +4054,6 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From c8818eab4ec04a248121bef73e1dd5e3b29a3ceb Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 2 Sep 2024 01:27:08 +0000 Subject: [PATCH 100/116] Moving gc_enable code to gc-common.c --- src/gc-common.c | 30 ++++++++++++++++++++++++++++++ src/gc-stock.c | 30 ------------------------------ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 2ec167caa667a..03c046bc300f2 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -613,6 +613,36 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// collector entry point and control +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) +{ + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + } + } + else if (prev && !on) { + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + int gc_logging_enabled = 0; JL_DLLEXPORT void jl_enable_gc_logging(int enable) { diff --git a/src/gc-stock.c b/src/gc-stock.c index 88b201a687eba..55499bce61182 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2803,36 +2803,6 @@ int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT return tid == concurrent_collector_thread_id; } -// collector entry point and control -_Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) -{ - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; - } - } - else if (prev && !on) { - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; -} - -JL_DLLEXPORT int jl_gc_is_enabled(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT { jl_gc_num_t num = gc_num; From e721e0c121ee911c29e736668b5e20766844d85e Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 16 Sep 2024 06:38:02 +0000 Subject: [PATCH 101/116] Addressing PR comments --- src/gc-common.c | 134 +++++++++++++++++++++++++------------------ src/gc-common.h | 6 ++ src/gc-debug.c | 2 - src/gc-interface.h | 30 +--------- src/gc-stock.c | 18 +----- src/gc-stock.h | 15 +++++ src/julia.h | 2 +- src/julia_internal.h | 4 +- src/stackwalk.c | 10 +--- 9 files changed, 110 insertions(+), 111 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 03c046bc300f2..046feae6aa4c5 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -491,15 +491,9 @@ int gc_n_threads; jl_ptls_t* gc_all_tls_states; // =========================================================================== // -// MISC +// Allocation // =========================================================================== // -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) { return jl_gc_alloc(ptls, sz, ty); @@ -575,17 +569,9 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) return jl_gc_alloc_(ptls, sz, ty); } -const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 -JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT -{ - return jl_buff_tag; -} - -// callback for passing OOM errors from gmp -JL_DLLEXPORT void jl_throw_out_of_memory_error(void) -{ - jl_throw(jl_memory_exception); -} +// =========================================================================== // +// Generic Memory +// =========================================================================== // size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT { @@ -613,6 +599,66 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// =========================================================================== // +// GC Debug +// =========================================================================== // + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + +// =========================================================================== // +// GC Control +// =========================================================================== // + +JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) { + return jl_atomic_load_acquire(&jl_gc_disable_counter); +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -637,54 +683,30 @@ JL_DLLEXPORT int jl_gc_enable(int on) return prev; } -JL_DLLEXPORT int jl_gc_is_enabled(void) +// =========================================================================== // +// MISC +// =========================================================================== // + +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) { jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - -int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; + return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; +JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { + return ijl_small_typeof; } -// gc-debug common functions -// --- - -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; + return jl_buff_tag; } -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +// callback for passing OOM errors from gmp +JL_DLLEXPORT void jl_throw_out_of_memory_error(void) { - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; + jl_throw(jl_memory_exception); } #ifdef __cplusplus diff --git a/src/gc-common.h b/src/gc-common.h index 154b9659e9ccb..32b7470b13a58 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -179,4 +179,10 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; +// =========================================================================== // +// Logging +// =========================================================================== // + +extern int gc_logging_enabled; + #endif // JL_GC_COMMON_H diff --git a/src/gc-debug.c b/src/gc-debug.c index d05fb4b49e9f7..7c479484cde45 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,8 +1105,6 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -extern int gc_logging_enabled; - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; diff --git a/src/gc-interface.h b/src/gc-interface.h index 25ffed4524f0c..0e9ce32697f35 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -94,6 +94,8 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); // should run a collection cycle again (e.g. a full mark right after a full sweep to ensure // we do a full heap traversal). JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); +// Returns whether the thread with `tid` is a collector thread +JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; // ========================================================================= // // Metrics @@ -162,26 +164,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz); JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz); // Wrapper around Libc realloc that updates Julia allocation counters. JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); -// Wrapper around Libc malloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_malloc(size_t sz); -// Wrapper around Libc calloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz); -// Wrapper around Libc free that takes a pointer to the payload of a memory region allocated -// with jl_malloc or jl_calloc, and uses the size information stored in the first machine -// words of the memory buffer update Julia allocation counters, and then frees the -// corresponding memory buffer. -JL_DLLEXPORT void jl_free(void *p); -// Wrapper around Libc realloc that takes a memory region allocated with jl_malloc or -// jl_calloc, and uses the size information stored in the first machine words of the memory -// buffer to update Julia allocation counters, reallocating the corresponding memory buffer -// in the end. -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz); // Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and // Strings. It increments Julia allocation counters and should check whether we're close to // the Julia heap target, and therefore, whether we should run a collection. Note that this @@ -195,14 +177,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); // thread-local allocator of the thread referenced by the first jl_ptls_t argument. JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls, struct _jl_value_t *value); -// Allocates a new weak-reference, assigns its value and increments Julia allocation -// counters. If thread-local allocators are used, then this function should allocate in the -// thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref(struct _jl_value_t *value); -// Allocates an object whose size is specified by the function argument and increments Julia -// allocation counters. If thread-local allocators are used, then this function should -// allocate in the thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_value_t *jl_gc_allocobj(size_t sz); // Permanently allocates a memory slot of the size specified by the first parameter. This // block of memory is allocated in an immortal region that is never swept. The second // parameter specifies whether the memory should be filled with zeros. The third and fourth diff --git a/src/gc-stock.c b/src/gc-stock.c index 55499bce61182..b345fe08ff69c 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2788,19 +2788,8 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; +int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT { + return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid); } JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT @@ -3193,8 +3182,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // free empty GC state for threads that have exited if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit - assert(!gc_is_parallel_collector_thread(t_i)); - assert(!gc_is_concurrent_collector_thread(t_i)); + assert(!gc_is_collector_thread(t_i)); jl_thread_heap_t *heap = &ptls2->gc_tls.heap; if (heap->weak_refs.len == 0) small_arraylist_free(&heap->weak_refs); diff --git a/src/gc-stock.h b/src/gc-stock.h index cc661ce6e1600..0f8d1eee67581 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -422,6 +422,21 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } +STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index b74de3060d26a..ed3d9bf825658 100644 --- a/src/julia.h +++ b/src/julia.h @@ -858,7 +858,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index 04857d440b643..c079c06f0189a 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -367,6 +367,8 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; +extern void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; + JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; @@ -1053,7 +1055,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; +extern _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/stackwalk.c b/src/stackwalk.c index 5f28b61c4a8fe..a1de3a6d61a07 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1294,8 +1294,6 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; -extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; -extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT @@ -1304,12 +1302,8 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); for (size_t i = 0; i < nthreads; i++) { jl_ptls_t ptls2 = allstates[i]; - if (gc_is_parallel_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for parallel GC thread %zu\n", i + 1); - continue; - } - if (gc_is_concurrent_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for concurrent GC thread %zu\n", i + 1); + if (gc_is_collector_thread(i)) { + jl_safe_printf("==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1); continue; } if (ptls2 == NULL) { From 6c0eb93fccbd77a338c6a6e2ddae8888fa6bc1b2 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 19 Sep 2024 04:18:13 +0000 Subject: [PATCH 102/116] Push resolution of merge conflict --- src/scheduler.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index b85a481588e4f..bb2f85b52283f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,20 +80,9 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; -======= -// parallel task runtime -// --- - -JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return cong(max, &ptls->rngseed); -} ->>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) From fb0ec76ecc52efae85ad65c34b1a3f49f24475e7 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 01:10:31 +0000 Subject: [PATCH 103/116] Removing jl_gc_mark_queue_obj_explicit extern definition from scheduler.c --- src/scheduler.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..7e23f654c2566 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,10 +80,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -// GC functions used -extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, - jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; - // initialize the threading infrastructure // (called only by the main thread) void jl_init_threadinginfra(void) From 3eea0790d832eba1d17b1a1564447f51986c7118 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 02:50:25 +0000 Subject: [PATCH 104/116] Don't need the getter function since it's possible to use jl_small_typeof directly --- src/gc-common.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 046feae6aa4c5..417f12f26d64d 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -693,10 +693,6 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { - return ijl_small_typeof; -} - const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { From ef6c79823306f2556951d6f8a70b165aceda2c76 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 27 Sep 2024 00:49:07 +0000 Subject: [PATCH 105/116] Remove extern from free_stack declaration in julia_internal.h --- src/julia_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index c079c06f0189a..6fd537ed6baf8 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -367,7 +367,7 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; -extern void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; From 63ca362bfaeed147887da242a6721de014ca5535 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 09:12:49 +0000 Subject: [PATCH 106/116] Putting everything that is common GC tls into gc-tls-common.h --- src/gc-common.c | 10 +-- src/gc-stacks.c | 18 +++--- src/gc-stock.c | 154 ++++++++++++++++++++++---------------------- src/gc-tls-common.h | 52 +++++++++++++++ src/gc-tls.h | 25 ------- src/julia_threads.h | 2 + src/stackwalk.c | 2 +- 7 files changed, 147 insertions(+), 116 deletions(-) create mode 100644 src/gc-tls-common.h diff --git a/src/gc-common.c b/src/gc-common.c index 417f12f26d64d..6ce455d3923ad 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -587,16 +587,16 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ // This is **NOT** a GC safe point. mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { + if (ptls->gc_tls_common.heap.mafreelist == NULL) { ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); } else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; + ma = ptls->gc_tls_common.heap.mafreelist; + ptls->gc_tls_common.heap.mafreelist = ma->next; } ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; + ma->next = ptls->gc_tls_common.heap.mallocarrays; + ptls->gc_tls_common.heap.mallocarrays = ma; } // =========================================================================== // diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 8c44b65284386..a8fec938456a3 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -131,7 +131,7 @@ void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(bufsz); if (pool_sizes[pool_id] == bufsz) { - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); return; } } @@ -160,7 +160,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task) #ifdef _COMPILER_ASAN_ENABLED_ __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz); #endif - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); } } } @@ -175,7 +175,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(ssize); ssize = pool_sizes[pool_id]; - small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id]; + small_arraylist_t *pool = &ptls->gc_tls_common.heap.free_stacks[pool_id]; if (pool->len > 0) { stk = small_arraylist_pop(pool); } @@ -196,7 +196,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO } *bufsz = ssize; if (owner) { - small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls->gc_tls_common.heap.live_tasks; mtarraylist_push(live_tasks, owner); } return stk; @@ -223,7 +223,7 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT // free half of stacks that remain unused since last sweep for (int p = 0; p < JL_N_STACK_POOLS; p++) { - small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p]; + small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p]; size_t n_to_free; if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { n_to_free = al->len; // not alive yet or dead, so it does not need these anymore @@ -245,10 +245,10 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT } } if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { - small_arraylist_free(ptls2->gc_tls.heap.free_stacks); + small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks); } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = 0; size_t ndel = 0; size_t l = live_tasks->len; @@ -299,7 +299,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) jl_ptls_t ptls2 = allstates[i]; if (ptls2 == NULL) continue; - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); l += n + (ptls2->root_task->ctx.stkbuf != NULL); } @@ -318,7 +318,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) goto restart; jl_array_data(a,void*)[j++] = t; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); for (size_t i = 0; i < n; i++) { jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i); diff --git a/src/gc-stock.c b/src/gc-stock.c index b345fe08ff69c..8e040c9b25dcf 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -357,7 +357,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *valu jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here - small_arraylist_push(&ptls->gc_tls.heap.weak_refs, wr); + small_arraylist_push(&ptls->gc_tls_common.heap.weak_refs, wr); return wr; } @@ -367,8 +367,8 @@ static void clear_weak_refs(void) for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) { - size_t n, l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t n, l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; for (n = 0; n < l; n++) { jl_weakref_t *wr = (jl_weakref_t*)lst[n]; if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc)) @@ -386,8 +386,8 @@ static void sweep_weak_refs(void) if (ptls2 != NULL) { size_t n = 0; size_t ndel = 0; - size_t l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; if (l == 0) continue; while (1) { @@ -402,7 +402,7 @@ static void sweep_weak_refs(void) lst[n] = lst[n + ndel]; lst[n + ndel] = tmp; } - ptls2->gc_tls.heap.weak_refs.len -= ndel; + ptls2->gc_tls_common.heap.weak_refs.len -= ndel; } } } @@ -410,18 +410,18 @@ static void sweep_weak_refs(void) STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz; + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc) + sz; if (alloc_acc < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, alloc_acc); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, alloc_acc); else { jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); } } STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc) + sz); } // big value list @@ -442,10 +442,10 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) jl_throw(jl_memory_exception); gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t, gc_cblist_notify_external_alloc, (v, allocsz)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef MEMDEBUG memset(v, 0xee, allocsz); @@ -558,8 +558,8 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); jl_batch_accum_heap_size(ptls, sz); } @@ -578,18 +578,18 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTS for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls) { - dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval); - dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc); - dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc); - dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc); - dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); if (update_heap) { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc); - freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc); + freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -605,13 +605,13 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls != NULL) { // don't reset `pool_live_bytes` here - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -654,8 +654,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; if (ptls2 != NULL) { - mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays; - mallocmemory_t **pma = &ptls2->gc_tls.heap.mallocarrays; + mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays; + mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays; while (ma != NULL) { mallocmemory_t *nxt = ma->next; jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1); @@ -667,8 +667,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT *pma = nxt; int isaligned = (uintptr_t)ma->a & 1; jl_gc_free_memory(a, isaligned); - ma->next = ptls2->gc_tls.heap.mafreelist; - ptls2->gc_tls.heap.mafreelist = ma; + ma->next = ptls2->gc_tls_common.heap.mafreelist; + ptls2->gc_tls_common.heap.mafreelist = ma; } gc_time_count_mallocd_memory(bits); ma = nxt; @@ -729,12 +729,12 @@ STATIC_INLINE jl_value_t *jl_gc_small_alloc_inner(jl_ptls_t ptls, int offset, return jl_gc_big_alloc(ptls, osize, NULL); #endif maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc) + 1); // first try to use the freelist jl_taggedvalue_t *v = p->freelist; if (v != NULL) { @@ -971,8 +971,8 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_ // instead of adding it to the thread that originally allocated the page, so we can avoid // an atomic-fetch-add here. size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + delta); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + delta); jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize); } @@ -1228,7 +1228,7 @@ static void gc_sweep_pool(void) } continue; } - jl_atomic_store_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes, 0); + jl_atomic_store_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes, 0); for (int i = 0; i < JL_GC_N_POOLS; i++) { jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i]; jl_taggedvalue_t *last = p->freelist; @@ -2834,7 +2834,7 @@ JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) for (int i = 0; i < n_threads; i++) { jl_ptls_t ptls2 = all_tls_states[i]; if (ptls2 != NULL) { - pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes); + pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes); } } return pool_live_bytes; @@ -3183,11 +3183,12 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit assert(!gc_is_collector_thread(t_i)); + jl_thread_heap_common_t *common_heap = &ptls2->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls2->gc_tls.heap; - if (heap->weak_refs.len == 0) - small_arraylist_free(&heap->weak_refs); - if (heap->live_tasks.len == 0) - small_arraylist_free(&heap->live_tasks); + if (common_heap->weak_refs.len == 0) + small_arraylist_free(&common_heap->weak_refs); + if (common_heap->live_tasks.len == 0) + small_arraylist_free(&common_heap->live_tasks); if (heap->remset.len == 0) arraylist_free(&heap->remset); if (ptls2->finalizers.len == 0) @@ -3256,8 +3257,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { - size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; @@ -3362,6 +3363,7 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { + jl_thread_heap_common_t *common_heap = &ptls->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls->gc_tls.heap; jl_gc_pool_t *p = heap->norm_pools; for (int i = 0; i < JL_GC_N_POOLS; i++) { @@ -3369,12 +3371,12 @@ void jl_init_thread_heap(jl_ptls_t ptls) p[i].freelist = NULL; p[i].newpages = NULL; } - small_arraylist_new(&heap->weak_refs, 0); - small_arraylist_new(&heap->live_tasks, 0); + small_arraylist_new(&common_heap->weak_refs, 0); + small_arraylist_new(&common_heap->live_tasks, 0); for (int i = 0; i < JL_N_STACK_POOLS; i++) - small_arraylist_new(&heap->free_stacks[i], 0); - heap->mallocarrays = NULL; - heap->mafreelist = NULL; + small_arraylist_new(&common_heap->free_stacks[i], 0); + common_heap->mallocarrays = NULL; + common_heap->mafreelist = NULL; heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag; @@ -3400,8 +3402,8 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&q->array, wsa2); arraylist_new(&mq->reclaim_set, 32); - memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); } void jl_free_thread_gc_state(jl_ptls_t ptls) @@ -3579,10 +3581,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz); } return data; @@ -3596,10 +3598,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + nm*sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + nm*sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz * nm); } return data; @@ -3624,10 +3626,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); if (!(sz < old)) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + (sz - old)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1); int64_t diff = sz - old; if (diff < 0) { @@ -3658,10 +3660,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) if (b == NULL) jl_throw(jl_memory_exception); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef _OS_WINDOWS_ SetLastError(last_error); diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h new file mode 100644 index 0000000000000..28fbf2d0c448e --- /dev/null +++ b/src/gc-tls-common.h @@ -0,0 +1,52 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +// Meant to be included in "julia_threads.h" +#ifndef JL_GC_TLS_COMMON_H +#define JL_GC_TLS_COMMON_H + +#include "julia_atomics.h" + +// GC threading ------------------------------------------------------------------ + +#include "arraylist.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + // variable for tracking weak references + small_arraylist_t weak_refs; + // live tasks started on this thread + // that are holding onto a stack from the pool + small_arraylist_t live_tasks; + + // variables for tracking malloc'd arrays + struct _mallocmemory_t *mallocarrays; + struct _mallocmemory_t *mafreelist; + +#define JL_N_STACK_POOLS 16 + small_arraylist_t free_stacks[JL_N_STACK_POOLS]; +} jl_thread_heap_common_t; + +typedef struct { + _Atomic(int64_t) allocd; + _Atomic(int64_t) pool_live_bytes; + _Atomic(uint64_t) malloc; + _Atomic(uint64_t) realloc; + _Atomic(uint64_t) poolalloc; + _Atomic(uint64_t) bigalloc; + _Atomic(int64_t) free_acc; + _Atomic(uint64_t) alloc_acc; +} jl_thread_gc_num_common_t; + +typedef struct { + jl_thread_heap_common_t heap; + jl_thread_gc_num_common_t gc_num; +} jl_gc_tls_states_common_t; + +#ifdef __cplusplus +} +#endif + +#endif // JL_GC_TLS_H diff --git a/src/gc-tls.h b/src/gc-tls.h index 9e4b09404db84..ecc815805a98b 100644 --- a/src/gc-tls.h +++ b/src/gc-tls.h @@ -21,16 +21,6 @@ typedef struct { } jl_gc_pool_t; typedef struct { - // variable for tracking weak references - small_arraylist_t weak_refs; - // live tasks started on this thread - // that are holding onto a stack from the pool - small_arraylist_t live_tasks; - - // variables for tracking malloc'd arrays - struct _mallocmemory_t *mallocarrays; - struct _mallocmemory_t *mafreelist; - // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects struct _bigval_t *young_generation_of_bigvals; @@ -42,22 +32,8 @@ typedef struct { // variables for allocating objects from pools #define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h` jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS]; - -#define JL_N_STACK_POOLS 16 - small_arraylist_t free_stacks[JL_N_STACK_POOLS]; } jl_thread_heap_t; -typedef struct { - _Atomic(int64_t) allocd; - _Atomic(int64_t) pool_live_bytes; - _Atomic(uint64_t) malloc; - _Atomic(uint64_t) realloc; - _Atomic(uint64_t) poolalloc; - _Atomic(uint64_t) bigalloc; - _Atomic(int64_t) free_acc; - _Atomic(uint64_t) alloc_acc; -} jl_thread_gc_num_t; - typedef struct { ws_queue_t chunk_queue; ws_queue_t ptr_queue; @@ -78,7 +54,6 @@ typedef struct { typedef struct { jl_thread_heap_t heap; jl_gc_page_stack_t page_metadata_allocd; - jl_thread_gc_num_t gc_num; jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; _Atomic(size_t) gc_sweeps_requested; diff --git a/src/julia_threads.h b/src/julia_threads.h index b697a0bf030ed..fcc28591658cb 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -5,6 +5,7 @@ #define JL_THREADS_H #include "gc-tls.h" +#include "gc-tls-common.h" #include "julia_atomics.h" #ifndef _OS_WINDOWS_ #include "pthread.h" @@ -155,6 +156,7 @@ typedef struct _jl_tls_states_t { // Counter to disable finalizer **on the current thread** int finalizers_inhibited; jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen + jl_gc_tls_states_common_t gc_tls_common; // common tls for both GCs volatile sig_atomic_t defer_signal; _Atomic(struct _jl_task_t*) current_task; struct _jl_task_t *next_task; diff --git a/src/stackwalk.c b/src/stackwalk.c index a1de3a6d61a07..0988d7a833c94 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1309,7 +1309,7 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT if (ptls2 == NULL) { continue; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); int t_state = JL_TASK_STATE_DONE; jl_task_t *t = ptls2->root_task; From 3271996a9eb45899e330a274420a53d45c6b4079 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 09:14:24 +0000 Subject: [PATCH 107/116] Typo --- src/gc-tls-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h index 28fbf2d0c448e..ba36f5c1c238e 100644 --- a/src/gc-tls-common.h +++ b/src/gc-tls-common.h @@ -49,4 +49,4 @@ typedef struct { } #endif -#endif // JL_GC_TLS_H +#endif // JL_GC_TLS_COMMON_H From cd4f5a177f0c0c7d9e0fb59bf830f2d914c46727 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 22:46:39 +0000 Subject: [PATCH 108/116] Adding gc-tls-common.h to Makefile as a public header --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index a6b1f433b73ce..80bbdbcff67fc 100644 --- a/src/Makefile +++ b/src/Makefile @@ -103,7 +103,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0) UV_HEADERS += uv.h UV_HEADERS += uv/*.h endif -PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) +PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) ifeq ($(OS),WINNT) PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h) endif From f4eba6b1dbe4bf3d66ff9de33953a80d6afb0d07 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 10 Oct 2024 01:42:47 +0000 Subject: [PATCH 109/116] Adding jl_full_sweep_reasons since timing.jl depends on it --- src/gc-mmtk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index aa010c73b27d2..0d9a4db1d4fbc 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -10,6 +10,11 @@ extern "C" { #endif +// FIXME: Does it make sense for MMTk to implement something similar +// for now, just ignoring this. +// Table recording number of full GCs due to each reason +JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS]; + // FIXME: Should the values below be shared between both GC's? // Note that MMTk uses a hard max heap limit, which is set by default // as 70% of the free available memory. The min heap is set as the From c20ecb31d11a4ce7354d05ae5e49ffc91714901f Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 10 Oct 2024 02:06:50 +0000 Subject: [PATCH 110/116] Fixing issue with jl_full_sweep_reasons (missing constants) --- src/gc-mmtk.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index 0d9a4db1d4fbc..48992aeb43bd0 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -12,6 +12,14 @@ extern "C" { // FIXME: Does it make sense for MMTk to implement something similar // for now, just ignoring this. + +// Must be kept in sync with `base/timing.jl` +#define FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL (0) +#define FULL_SWEEP_REASON_FORCED_FULL_SWEEP (1) +#define FULL_SWEEP_REASON_USER_MAX_EXCEEDED (2) +#define FULL_SWEEP_REASON_LARGE_PROMOTION_RATE (3) +#define FULL_SWEEP_NUM_REASONS (4) + // Table recording number of full GCs due to each reason JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS]; From 7cc2fe10dd849f1e69f640e73f1cab2c62c6a6e2 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 10 Oct 2024 03:11:10 +0000 Subject: [PATCH 111/116] Removing reduntant changes to upstream (wip; need to add back stuff for sticky) --- Makefile | 5 +---- src/Makefile | 5 ++--- src/builtins.c | 1 - src/datatype.c | 8 -------- src/gc-common.c | 2 +- src/gc-stacks.c | 2 -- src/gc-stock.c | 2 +- src/genericmemory.c | 1 - src/init.c | 7 ++----- src/jitlayers.h | 4 ---- src/julia.h | 2 +- src/julia_internal.h | 2 +- src/julia_threads.h | 2 +- src/llvm-final-gc-lowering.cpp | 2 +- src/llvm-late-gc-lowering.cpp | 2 +- src/symbol.c | 4 ---- src/threading.c | 3 +-- 17 files changed, 13 insertions(+), 41 deletions(-) diff --git a/Makefile b/Makefile index 732fcfcc77e7d..4fd8b878c5d1f 100644 --- a/Makefile +++ b/Makefile @@ -130,7 +130,7 @@ check-whitespace: ifneq ($(NO_GIT), 1) @# Append the directory containing the julia we just built to the end of `PATH`, @# to give us the best chance of being able to run this check. - @PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" $(JULIA_EXECUTABLE) $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) + @PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" julia $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) else $(warn "Skipping whitespace check because git is unavailable") endif @@ -648,9 +648,6 @@ testall: check-whitespace $(JULIA_BUILD_MODE) testall1: check-whitespace $(JULIA_BUILD_MODE) @env JULIA_CPU_THREADS=1 $(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE) -testall3: check-whitespace $(JULIA_BUILD_MODE) - @env JULIA_CPU_THREADS=3 $(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE) - test-%: check-whitespace $(JULIA_BUILD_MODE) .FORCE @([ $$(( $$(date +%s) - $$(date -r $(build_private_libdir)/sys.$(SHLIB_EXT) +%s) )) -le 100 ] && \ printf '\033[93m HINT The system image was recently rebuilt. Are you aware of the test-revise-* targets? See CONTRIBUTING.md. \033[0m\n') || true diff --git a/src/Makefile b/src/Makefile index b27eee8db8511..b2857e63d5881 100644 --- a/src/Makefile +++ b/src/Makefile @@ -18,7 +18,6 @@ FLAGS := \ -I$(SRCDIR)/flisp -I$(SRCDIR)/support \ -I$(LIBUV_INC) -I$(build_includedir) \ -I$(JULIAHOME)/deps/valgrind - FLAGS += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden -fno-common \ -Wno-comment -Wpointer-arith -Wundef ifeq ($(USEGCC),1) # GCC bug #25509 (void)__attribute__((warn_unused_result)) @@ -427,13 +426,13 @@ $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/jul @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(MMTK_OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@ - $(DSYMUTIL) $@ + $(DSYMUTIL) $@ $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(DOBJS) $(MMTK_DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(MMTK_DOBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ - $(DSYMUTIL) $@ + $(DSYMUTIL) $@ ifneq ($(OS), WINNT) $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_SHLIB_EXT): \ diff --git a/src/builtins.c b/src/builtins.c index d1ffadaf706cc..939aef4234ac9 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -22,7 +22,6 @@ #include #include "julia.h" #include "julia_internal.h" -#include "gc-interface.h" #include "builtin_proto.h" #include "intrinsics.h" #include "julia_assert.h" diff --git a/src/datatype.c b/src/datatype.c index 03afce0e97a25..3a2ebf2bb303a 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -290,10 +290,6 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz, if ((void*)ret == HT_NOTFOUND) { if (!should_malloc) { char *perm_mem = (char *)jl_gc_perm_alloc(flddesc_sz, 0, 4, 0); -#ifdef MMTK_GC - jl_ptls_t ptls = jl_current_task->ptls; - mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(perm_mem), flddesc_sz); -#endif assert(perm_mem); ret = (jl_datatype_layout_t *)perm_mem; memcpy(perm_mem, flddesc, flddesc_sz); @@ -977,10 +973,6 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name, jl_datatype_layout_t *layout = (jl_datatype_layout_t *) jl_gc_perm_alloc(sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t), 0, 4, 0); -#ifdef MMTK_GC - jl_ptls_t ptls = jl_current_task->ptls; - mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(layout), sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t)); -#endif layout->size = large ? GC_MAX_SZCLASS+1 : 0; layout->nfields = 0; layout->alignment = sizeof(void *); diff --git a/src/gc-common.c b/src/gc-common.c index 29e9233205dd5..6ce455d3923ad 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -707,4 +707,4 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) #ifdef __cplusplus } -#endif \ No newline at end of file +#endif diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 6d96825dfd13d..a8fec938456a3 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -289,8 +289,6 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT } } -extern int gc_first_tid; - JL_DLLEXPORT jl_array_t *jl_live_tasks(void) { size_t nthreads = jl_atomic_load_acquire(&jl_n_threads); diff --git a/src/gc-stock.c b/src/gc-stock.c index 3c5cea47a6236..daebfc4e22ba9 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3916,4 +3916,4 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) } #endif -#endif // !MMTK_GC \ No newline at end of file +#endif // !MMTK_GC diff --git a/src/genericmemory.c b/src/genericmemory.c index 6851e9131e534..93d90ef99e8b0 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -54,7 +54,6 @@ jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t is tot = sizeof(jl_genericmemory_t) + sizeof(void*); } m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tot, mtype); - if (pooled) { data = (char*)m + JL_SMALL_BYTE_ALIGNMENT; } diff --git a/src/init.c b/src/init.c index 1e5cd129cf264..413d4e8055e54 100644 --- a/src/init.c +++ b/src/init.c @@ -286,7 +286,8 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER JL_STDOUT = (uv_stream_t*) STDOUT_FILENO; JL_STDERR = (uv_stream_t*) STDERR_FILENO; - jl_gc_run_all_finalizers(ct); + if (ct) + jl_gc_run_all_finalizers(ct); uv_loop_t *loop = jl_global_event_loop(); if (loop != NULL) { @@ -826,7 +827,6 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) arraylist_push(&eytzinger_image_tree, (void*)1); // outside image jl_ptls_t ptls = jl_init_threadtls(0); - #pragma GCC diagnostic push #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 #pragma GCC diagnostic ignored "-Wdangling-pointer" @@ -889,9 +889,6 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_ jl_start_gc_threads(); uv_barrier_wait(&thread_init_done); -#ifdef MMTK_GC - mmtk_initialize_collection((void *)ptls); -#endif jl_gc_enable(1); if (jl_options.image_file && (!jl_generating_output() || jl_options.incremental) && jl_module_init_order) { diff --git a/src/jitlayers.h b/src/jitlayers.h index 6dc9c51cef98d..9ae99d3a2c9b2 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -326,10 +326,6 @@ class MaxAlignedAllocImpl Align MaxAlign = alignment(Size); assert(Alignment < MaxAlign); (void)Alignment; void* result = jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset); -#ifdef MMTK_GC - jl_ptls_t ptls = jl_current_task->ptls; - mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(result), Size); -#endif return result; } diff --git a/src/julia.h b/src/julia.h index 5954872dcafa6..ed3d9bf825658 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2654,4 +2654,4 @@ typedef struct { } #endif -#endif \ No newline at end of file +#endif diff --git a/src/julia_internal.h b/src/julia_internal.h index ec77ebe93233e..6fd537ed6baf8 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1970,4 +1970,4 @@ JL_DLLIMPORT uint64_t jl_getUnwindInfo(uint64_t dwBase); #define JL_PROBE_RT_SLEEP_CHECK_UV_WAKE_ENABLED() (0) #endif -#endif \ No newline at end of file +#endif diff --git a/src/julia_threads.h b/src/julia_threads.h index e118295ef4056..3b804823d796b 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -313,4 +313,4 @@ JL_DLLEXPORT int jl_setaffinity(int16_t tid, char *mask, int cpumasksize); } #endif -#endif \ No newline at end of file +#endif diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 816181b14e1dd..76dcd944890ab 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -225,4 +225,4 @@ PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM return PreservedAnalyses::allInSet(); } return PreservedAnalyses::all(); -} \ No newline at end of file +} diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index bc8cec2eb42cd..4b7dc0ec855a7 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2629,4 +2629,4 @@ PreservedAnalyses LateLowerGCPass::run(Function &F, FunctionAnalysisManager &AM) } } return PreservedAnalyses::all(); -} \ No newline at end of file +} diff --git a/src/symbol.c b/src/symbol.c index 079c044752e1b..ef2c11e0842e8 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -40,10 +40,6 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT sym = (jl_sym_t*)jl_valueof(tag); // set to old marked so that we won't look at it in the GC or write barrier. jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED); -#ifdef MMTK_GC - jl_ptls_t ptls = jl_current_task->ptls; - mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(tag), nb); -#endif jl_atomic_store_relaxed(&sym->left, NULL); jl_atomic_store_relaxed(&sym->right, NULL); sym->hash = hash_symbol(str, len); diff --git a/src/threading.c b/src/threading.c index ac2a75e23a1c3..4e4186f5f070d 100644 --- a/src/threading.c +++ b/src/threading.c @@ -340,9 +340,8 @@ jl_ptls_t jl_init_threadtls(int16_t tid) #endif ptls->system_id = uv_thread_self(); ptls->rngseed = jl_rand(); - if (tid == 0) { + if (tid == 0) ptls->disable_gc = 1; - } #ifdef _OS_WINDOWS_ if (tid == 0) { if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), From 3f7fd3194878af7e80a01032a1fd848a76d882a0 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 10 Oct 2024 03:12:58 +0000 Subject: [PATCH 112/116] Typo --- src/gc-stock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-stock.h b/src/gc-stock.h index 91da2cd32f28a..686753fd37349 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -729,4 +729,4 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect #endif -#endif // !MMTK_GC \ No newline at end of file +#endif // !MMTK_GC From 7ff37de41278b5f1290f21714f876f356266698a Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 10 Oct 2024 03:22:31 +0000 Subject: [PATCH 113/116] Cleanup --- src/Makefile | 9 --------- src/jitlayers.h | 3 +-- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/Makefile b/src/Makefile index b2857e63d5881..a88f28b98de14 100644 --- a/src/Makefile +++ b/src/Makefile @@ -270,15 +270,6 @@ $(BUILDDIR)/%.o : $(SRCDIR)/%.d $(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d @$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@) -ifeq ($(WITH_MMTK), 1) -$(MMTK_JULIA_INC)/%.o: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) - @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@) -$(MMTK_JULIA_INC)/%.dbg.obj: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) - @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@) -$(MMTK_LIB_DST): $(MMTK_LIB_SRC) - @$(call PRINT_MMTK, cp $< $@) -endif - # public header rules $(eval $(call dir_target,$(build_includedir)/julia)) define public_header_target diff --git a/src/jitlayers.h b/src/jitlayers.h index 9ae99d3a2c9b2..3353a4093bd27 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -325,8 +325,7 @@ class MaxAlignedAllocImpl LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) { Align MaxAlign = alignment(Size); assert(Alignment < MaxAlign); (void)Alignment; - void* result = jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset); - return result; + return jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset); } inline LLVM_ATTRIBUTE_RETURNS_NONNULL From 2e1d5da8d102344b9fabaeb97b8cabaf925d18f5 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 10 Oct 2024 04:49:47 +0000 Subject: [PATCH 114/116] Cleanup; Adding pieces to make building immix possible again --- src/gc-interface.h | 2 ++ src/gc-mmtk.c | 33 +++++++++++++++++++++++++++++++++ src/gc-stock.c | 22 ++++++++++++++++++++++ src/genericmemory.c | 2 +- src/julia.h | 9 +++++++++ src/mmtk-gc.c | 39 --------------------------------------- src/staticdata.c | 2 -- src/threading.c | 1 - 8 files changed, 67 insertions(+), 43 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index b1f3ab9d6908d..6e36f5670c7f3 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -96,6 +96,8 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); // Returns whether the thread with `tid` is a collector thread JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; +// Pinning objects; Returns whether the object has been pinned by this call. +JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj); // ========================================================================= // // Metrics diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index 48992aeb43bd0..b36524e8f56fd 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -634,6 +634,33 @@ int jl_n_sweepthreads; // `tid` of first GC thread int gc_first_tid; +// Write barriers + +// No inline write barrier -- only used for debugging +JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT +{ + jl_gc_wb_back(parent); +} + +JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + jl_gc_wb(parent, ptr); +} + +JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, (const void*) 0); +} + +JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr); +} + JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT { mmtk_unreachable(); @@ -722,6 +749,12 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) return NULL; } +extern unsigned char mmtk_pin_object(void* obj); + +JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj) { + return mmtk_pin_object(obj); +} + #ifdef __cplusplus } #endif diff --git a/src/gc-stock.c b/src/gc-stock.c index daebfc4e22ba9..d193254834a56 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3912,6 +3912,28 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) // Do nothing } +JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj) { + return 0; +} + +// added for MMTk integration + +JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT +{ +} + +JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ +} + +JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT +{ +} + +JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT +{ +} + #ifdef __cplusplus } #endif diff --git a/src/genericmemory.c b/src/genericmemory.c index 93d90ef99e8b0..276a4fdd17d8a 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -270,7 +270,7 @@ JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destda _Atomic(void*) * dest_p = (_Atomic(void*)*)destdata; _Atomic(void*) * src_p = (_Atomic(void*)*)srcdata; jl_value_t *owner = jl_genericmemory_owner(dest); - mmtk_gc_wb(owner, NULL); + jl_gc_wb(owner, NULL); // FIXME: needs to be added here since the check below doesn't apply to MMTk if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) { jl_value_t *src_owner = jl_genericmemory_owner(src); ssize_t done = 0; diff --git a/src/julia.h b/src/julia.h index ed3d9bf825658..816d7b5d75b16 100644 --- a/src/julia.h +++ b/src/julia.h @@ -79,6 +79,15 @@ typedef struct _jl_tls_states_t *jl_ptls_t; extern "C" { #endif +// object pinning ------------------------------------------------------------ + +// FIXME: Pinning objects that get hashed in the ptrhash table +// until we implement address space hashing. +#define PTRHASH_PIN(key) jl_gc_pin_object(key); + +// Called when pinning objects that would cause an error if moved +#define PTR_PIN(key) jl_gc_pin_object(key); + // core data types ------------------------------------------------------------ // the common fields are hidden before the pointer, but the following macro is diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 5a104c4856c54..284e72a502b3a 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -295,15 +295,6 @@ void jl_init_thread_heap(jl_ptls_t ptls) mmtk_post_bind_mutator(&ptls->mmtk_mutator, mmtk_mutator); } -void jl_free_thread_gc_state(jl_ptls_t ptls) -{ -} - -void jl_deinit_thread_heap(jl_ptls_t ptls) -{ - mmtk_destroy_mutator(&ptls->mmtk_mutator); -} - extern jl_mutex_t finalizers_lock; extern arraylist_t to_finalize; extern arraylist_t finalizer_list_marked; @@ -529,31 +520,6 @@ JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array mmtk_memory_region_copy(&ptls->mmtk_mutator, jl_array_owner(src), src_p, jl_array_owner(dest), dest_p, n); } -// No inline write barrier -- only used for debugging -JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT -{ - jl_gc_wb_back(parent); -} - -JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - jl_gc_wb(parent, ptr); -} - -JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT -{ - jl_task_t *ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, (const void*) 0); -} - -JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT -{ - jl_task_t *ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr); -} - void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) { jl_ptls_t ptls = jl_current_task->ptls; @@ -572,11 +538,6 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) mmtk_set_vm_space((void*)img_data, len); } -void jl_gc_notify_image_alloc(char* img_data, size_t len) -{ - mmtk_immortal_region_post_alloc((void*)img_data, len); -} - #ifdef __cplusplus } #endif diff --git a/src/staticdata.c b/src/staticdata.c index af24a84f39854..6f4bc61521c1a 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -3915,7 +3915,6 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i ios_seek(f, datastartpos); if (needs_permalloc) { sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); - jl_gc_notify_image_alloc(sysimg, len); } else sysimg = &f->buf[f->bpos]; @@ -4025,7 +4024,6 @@ JL_DLLEXPORT void jl_restore_system_image(const char *fname) ios_seek_end(&f); size_t len = ios_pos(&f); char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); - jl_gc_notify_image_alloc(sysimg, len); ios_seek(&f, 0); if (ios_readall(&f, sysimg, len) != len) jl_errorf("Error reading system image file."); diff --git a/src/threading.c b/src/threading.c index 4e4186f5f070d..9e6974da3b2ec 100644 --- a/src/threading.c +++ b/src/threading.c @@ -527,7 +527,6 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER #else pthread_mutex_unlock(&in_signal_lock); #endif - jl_deinit_thread_heap(ptls); free(ptls->bt_data); small_arraylist_free(&ptls->locks); ptls->previous_exception = NULL; From 2ca9fb0d73fc154ab612ea9aae5b4e958066c930 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 10 Oct 2024 05:47:03 +0000 Subject: [PATCH 115/116] Trying to add sticky back again (wip) --- src/Makefile | 2 +- src/gc-mmtk.c | 17 +- src/julia.h | 72 ++++- src/llvm-final-gc-lowering.cpp | 39 +++ src/llvm-gc-interface-passes.h | 13 + src/llvm-late-gc-lowering.cpp | 59 +++- src/mmtk-gc.c | 545 --------------------------------- 7 files changed, 193 insertions(+), 554 deletions(-) delete mode 100644 src/mmtk-gc.c diff --git a/src/Makefile b/src/Makefile index a88f28b98de14..308678662c879 100644 --- a/src/Makefile +++ b/src/Makefile @@ -341,7 +341,7 @@ $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,de $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h -$(BUILDDIR)/gc-mmtk.o $(BUILDDIR)/mmtk-gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h +$(BUILDDIR)/gc-mmtk.o $(BUILDDIR)/gc-mmtk.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(BUILDDIR)/gc-stock.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(SRCDIR)/gc-page-profiler.h $(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc-heap-snapshot.h $(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc-alloc-profiler.h diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index b36524e8f56fd..1f20ba875b150 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -382,9 +382,18 @@ inline void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, } inline void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - // FIXME: Similarly, for now, we do nothing - // but when supporting moving, this is where we set the valid object (VO) bit - // and log (old gen) bit + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + intptr_t addr = (intptr_t) obj; + uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); + intptr_t shift = (addr >> 3) & 0b111; + while(1) { + uint8_t old_val = *meta_addr; + uint8_t new_val = old_val | (1 << shift); + if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) { + break; + } + } + } } // allocation wrappers that track allocation and let collection run @@ -634,7 +643,7 @@ int jl_n_sweepthreads; // `tid` of first GC thread int gc_first_tid; -// Write barriers +// TODO: Move write barriers from julia.h and add them here // No inline write barrier -- only used for debugging JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT diff --git a/src/julia.h b/src/julia.h index 816d7b5d75b16..651f313021f95 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1130,7 +1130,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, struct _jl_task_t *owner) JL_N JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz); // GC write barriers - +#ifndef MMTK_GC STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT { // parent and ptr isa jl_value_t* @@ -1160,6 +1160,24 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_ if (ly->npointers) jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt); } +#else // MMTK_GC +STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT; + +STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + mmtk_gc_wb(parent, ptr); +} + +STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* +{ + mmtk_gc_wb(ptr, (void*)0); +} + +STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT +{ + mmtk_gc_wb(parent, (void*)0); +} +#endif // MMTK_GC JL_DLLEXPORT void jl_gc_safepoint(void); JL_DLLEXPORT int jl_safepoint_suspend_thread(int tid, int waitstate); @@ -2659,6 +2677,58 @@ typedef struct { int emit_metadata; } jl_emission_params_t; +#ifdef MMTK_GC + +extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); +extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); + +// These need to be constants. + +#define MMTK_OBJECT_BARRIER (1) +// Stickyimmix needs write barrier. Immix does not need write barrier. +#ifdef MMTK_PLAN_IMMIX +#define MMTK_NEEDS_WRITE_BARRIER (0) +#endif +#ifdef MMTK_PLAN_STICKYIMMIX +#define MMTK_NEEDS_WRITE_BARRIER (1) +#endif + +#define MMTK_DEFAULT_IMMIX_ALLOCATOR (0) +#define MMTK_IMMORTAL_BUMP_ALLOCATOR (0) + +extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; + +// Directly call into MMTk for write barrier (debugging only) +STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_post(&ptls->gc_tls.mmtk_mutator, parent, ptr); +} + +// Inlined fastpath +STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + intptr_t addr = (intptr_t) (void*) parent; + uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); + intptr_t shift = (addr >> 3) & 0b111; + uint8_t byte_val = *meta_addr; + if (((byte_val >> shift) & 1) == 1) { + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr); + } + } +} + +STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + mmtk_gc_wb_fast(parent, ptr); +} + +#endif + #ifdef __cplusplus } #endif diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 76dcd944890ab..b06a084651231 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -117,6 +117,32 @@ void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F) target->eraseFromParent(); } +#ifdef MMTK_GC +void FinalLowerGC::lowerWriteBarrier1(CallInst *target, Function &F) +{ + assert(target->arg_size() == 1); + target->setCalledFunction(writeBarrier1Func); +} + +void FinalLowerGC::lowerWriteBarrier2(CallInst *target, Function &F) +{ + assert(target->arg_size() == 2); + target->setCalledFunction(writeBarrier2Func); +} + +void FinalLowerGC::lowerWriteBarrier1Slow(CallInst *target, Function &F) +{ + assert(target->arg_size() == 1); + target->setCalledFunction(writeBarrier1SlowFunc); +} + +void FinalLowerGC::lowerWriteBarrier2Slow(CallInst *target, Function &F) +{ + assert(target->arg_size() == 2); + target->setCalledFunction(writeBarrier2SlowFunc); +} +#endif + void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) { ++GCAllocBytesCount; @@ -181,6 +207,12 @@ bool FinalLowerGC::runOnFunction(Function &F) smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc); bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc); allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped); +#ifdef MMTK_GC + writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1); + writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2); + writeBarrier1SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier1Slow); + writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow); +#endif T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); // Lower all calls to supported intrinsics. @@ -209,6 +241,13 @@ bool FinalLowerGC::runOnFunction(Function &F) LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot); LOWER_INTRINSIC(safepoint, lowerSafepoint); +#ifdef MMTK_GC + LOWER_INTRINSIC(writeBarrier1, lowerWriteBarrier1); + LOWER_INTRINSIC(writeBarrier2, lowerWriteBarrier2); + LOWER_INTRINSIC(writeBarrier1Slow, lowerWriteBarrier1Slow); + LOWER_INTRINSIC(writeBarrier2Slow, lowerWriteBarrier2Slow); +#endif + #undef LOWER_INTRINSIC } } diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index ed6b94dcdc3fc..7ddfc1f1c10ef 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -389,6 +389,12 @@ struct FinalLowerGC: private JuliaPassContext { Function *smallAllocFunc; Function *bigAllocFunc; Function *allocTypedFunc; +#ifdef MMTK_GC + Function *writeBarrier1Func; + Function *writeBarrier2Func; + Function *writeBarrier1SlowFunc; + Function *writeBarrier2SlowFunc; +#endif Instruction *pgcstack; Type *T_size; @@ -412,6 +418,13 @@ struct FinalLowerGC: private JuliaPassContext { // Lowers a `julia.safepoint` intrinsic. void lowerSafepoint(CallInst *target, Function &F); + +#ifdef MMTK_GC + void lowerWriteBarrier1(CallInst *target, Function &F); + void lowerWriteBarrier2(CallInst *target, Function &F); + void lowerWriteBarrier1Slow(CallInst *target, Function &F); + void lowerWriteBarrier2Slow(CallInst *target, Function &F); +#endif }; #endif // LLVM_GC_PASSES_H diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 4b7dc0ec855a7..3201ae64cf984 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1944,14 +1944,15 @@ void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVe if (CFGModified) { *CFGModified = true; } + + IRBuilder<> builder(CI); + builder.SetCurrentDebugLocation(CI->getDebugLoc()); +#ifndef MMTK_GC auto DebugInfoMeta = F.getParent()->getModuleFlag("julia.debug_level"); int debug_info = 1; if (DebugInfoMeta != nullptr) { debug_info = cast(cast(DebugInfoMeta)->getValue())->getZExtValue(); } - - IRBuilder<> builder(CI); - builder.SetCurrentDebugLocation(CI->getDebugLoc()); auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED); setName(parBits, "parent_bits", debug_info); auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED)); @@ -1981,6 +1982,58 @@ void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVe else { assert(false); } +#else + // FIXME: Currently we call write barrier with the src object (parent). + // This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all. + // But for other MMTk plans, we need to be careful. + const bool INLINE_WRITE_BARRIER = true; + if (CI->getCalledOperand() == write_barrier_func) { + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + if (INLINE_WRITE_BARRIER) { + auto i8_ty = Type::getInt8Ty(F.getContext()); + auto intptr_ty = T_size; + + // intptr_t addr = (intptr_t) (void*) src; + // uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6)); + intptr_t metadata_base_address = reinterpret_cast(MMTK_SIDE_LOG_BIT_BASE_ADDRESS); + auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address); + auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0)); + + auto parent_val = builder.CreatePtrToInt(parent, intptr_ty); + auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6)); + auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr); + + // intptr_t shift = (addr >> 3) & 0b111; + auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7)); + auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty); + + // uint8_t byte_val = *meta_addr; + auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align()); + + // if (((byte_val >> shift) & 1) == 1) { + auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8); + auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1)); + auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1)); + + // object_reference_write_slow_call((void*) src, (void*) slot, (void*) target); + MDBuilder MDB(F.getContext()); + SmallVector Weights{1, 9}; + if (!S->DT) { + S->DT = &GetDT(); + } + DomTreeUpdater dtu = DomTreeUpdater(S->DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy); + auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights), &dtu); + builder.SetInsertPoint(mayTriggerSlowpath); + builder.CreateCall(getOrDeclare(jl_intrinsics::writeBarrier1Slow), { parent }); + } else { + Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); + builder.CreateCall(wb_func, { parent }); + } + } + } else { + assert(false); + } +#endif CI->eraseFromParent(); } } diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c deleted file mode 100644 index 284e72a502b3a..0000000000000 --- a/src/mmtk-gc.c +++ /dev/null @@ -1,545 +0,0 @@ -// This file is a part of Julia. License is MIT: https://julialang.org/license - -#ifdef MMTK_GC - -#include "gc.h" -#include "mmtk_julia.h" -#include "julia_gcext.h" - -// callbacks -// --- - -typedef void (*jl_gc_cb_func_t)(void); - -JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable) -{ -} -JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable) -{ -} -JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable) -{ -} -JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable) -{ -} -JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable) -{ -} -JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable) -{ -} -JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable) -{ -} - -// mutex for page profile -uv_mutex_t page_profile_lock; - -JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) -{ - uv_mutex_lock(&page_profile_lock); - const char *str = "Page profiler in unsupported in MMTk."; - ios_write(stream, str, strlen(str)); - uv_mutex_unlock(&page_profile_lock); -} - -JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS]; - -STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT -{ - // FIXME: MMTk would have to provide its own stats -} - -#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants - -JL_DLLEXPORT uint64_t jl_get_pg_size(void) -{ - return MMTK_GC_PAGE_SZ; -} - -inline void maybe_collect(jl_ptls_t ptls) -{ - // Just do a safe point for general maybe_collect - jl_gc_safepoint_(ptls); -} - -// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll), -// is expensive. So we only check for every few allocations. -static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) -{ - // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to - // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage - // as much as we can. - if (ptls->malloc_sz_since_last_poll > 4096) { - jl_atomic_store_relaxed(&ptls->malloc_sz_since_last_poll, 0); - mmtk_gc_poll(ptls); - } else { - jl_atomic_fetch_add_relaxed(&ptls->malloc_sz_since_last_poll, sz); - jl_gc_safepoint_(ptls); - } -} - -// allocation -int jl_gc_classify_pools(size_t sz, int *osize) -{ - if (sz > GC_MAX_SZCLASS) - return -1; // call big alloc function - size_t allocsz = sz + sizeof(jl_taggedvalue_t); - *osize = LLT_ALIGN(allocsz, 16); - return 0; // use MMTk's fastpath logic -} - -// malloc wrappers, aligned allocation -// We currently just duplicate what Julia GC does. We will in the future replace the malloc calls with MMTK's malloc. - -#if defined(_OS_WINDOWS_) -inline void *jl_malloc_aligned(size_t sz, size_t align) -{ - return _aligned_malloc(sz ? sz : 1, align); -} -inline void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, - size_t align) -{ - (void)oldsz; - return _aligned_realloc(p, sz ? sz : 1, align); -} -inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT -{ - _aligned_free(p); -} -#else -inline void *jl_malloc_aligned(size_t sz, size_t align) -{ -#if defined(_P64) || defined(__APPLE__) - if (align <= 16) - return malloc(sz); -#endif - void *ptr; - if (posix_memalign(&ptr, align, sz)) - return NULL; - return ptr; -} -inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, - size_t align) -{ -#if defined(_P64) || defined(__APPLE__) - if (align <= 16) - return realloc(d, sz); -#endif - void *b = jl_malloc_aligned(sz, align); - if (b != NULL) { - memcpy(b, d, oldsz > sz ? sz : oldsz); - free(d); - } - return b; -} -inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT -{ - free(p); -} -#endif - -// weak references -// --- -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) -{ - jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); - wr->value = value; // NOTE: wb not needed here - mmtk_add_weak_candidate(wr); - return wr; -} - - -// big values -// --- - -// Size includes the tag and the tag is not cleared!! -inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) -{ - // TODO: assertion needed here? - assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); - // TODO: drop this okay? - // maybe_collect(ptls); - - jl_value_t *v = jl_mmtk_gc_alloc_big(ptls, sz); - // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_big; enable - // here when that's edited? - /* - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, - jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1); - */ - // TODO: move to jl_mmtk_gc_alloc_big if needed. -/* -#ifdef MEMDEBUG - memset(v, 0xee, allocsz); -#endif -*/ - // TODO: need to set this? have to move to jl_mmtk_gc_alloc_big then. - // v->age = 0; - // TODO: dropping this; confirm okay? `sweep_big` no longer needed? - // gc_big_object_link(v, &ptls->heap.big_objects); - return v; -} - -// Size includes the tag and the tag is not cleared!! -inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize) -{ - assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); -#ifdef MEMDEBUG - return jl_gc_big_alloc(ptls, osize); -#endif - // TODO: drop this okay? - // maybe_collect(ptls); - - jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, osize, 16, NULL); - // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_default; enable - // here when that's edited? - /* - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize); - jl_atomic_store_relaxed(&ptls->gc_num.poolalloc, - jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1); - */ - return v; -} - -// roots -// --- - -JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) -{ - mmtk_unreachable(); -} - -// TODO: exported, but not MMTk-specific? -JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const void *stored, jl_datatype_t *dt) JL_NOTSAFEPOINT -{ - mmtk_unreachable(); -} - - -// marking -// --- - -JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) -{ - mmtk_unreachable(); - return 0; -} -JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent, - jl_value_t **objs, size_t nobjs) -{ - mmtk_unreachable(); -} - - -// GC control -// --- - -JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) -{ - jl_task_t *ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { - size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); - static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); - jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); - return; - } - mmtk_handle_user_collection_request(ptls, collection); -} - -// Per-thread initialization -// TODO: remove `norm_pools`, `weak_refs`, etc. from `heap`? -// TODO: remove `gc_cache`? -void jl_init_thread_heap(jl_ptls_t ptls) -{ - jl_thread_heap_t *heap = &ptls->gc_tls.heap; - jl_gc_pool_t *p = heap->norm_pools; - for (int i = 0; i < JL_GC_N_POOLS; i++) { - p[i].osize = jl_gc_sizeclasses[i]; - p[i].freelist = NULL; - p[i].newpages = NULL; - } - small_arraylist_new(&heap->weak_refs, 0); - small_arraylist_new(&heap->live_tasks, 0); - for (int i = 0; i < JL_N_STACK_POOLS; i++) - small_arraylist_new(&heap->free_stacks[i], 0); - heap->mallocarrays = NULL; - heap->mafreelist = NULL; - heap->big_objects = NULL; - arraylist_new(&heap->remset, 0); - arraylist_new(&ptls->finalizers, 0); - arraylist_new(&ptls->gc_tls.sweep_objs, 0); - - jl_gc_mark_cache_t *gc_cache = &ptls->gc_tls.gc_cache; - gc_cache->perm_scanned_bytes = 0; - gc_cache->scanned_bytes = 0; - gc_cache->nbig_obj = 0; - - memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); - - // Clear the malloc sz count - jl_atomic_store_relaxed(&ptls->malloc_sz_since_last_poll, 0); - - // Create mutator - MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid); - // Copy the mutator to the thread local storage - memcpy(&ptls->mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext)); - // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed) - mmtk_post_bind_mutator(&ptls->mmtk_mutator, mmtk_mutator); -} - -extern jl_mutex_t finalizers_lock; -extern arraylist_t to_finalize; -extern arraylist_t finalizer_list_marked; - -// System-wide initialization -// TODO: remove locks? remove anything else? -void jl_gc_init(void) -{ - if (jl_options.heap_size_hint) - jl_gc_set_max_memory(jl_options.heap_size_hint); - - JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock"); - JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); - uv_mutex_init(&gc_perm_lock); - - arraylist_new(&to_finalize, 0); - arraylist_new(&finalizer_list_marked, 0); - - gc_num.interval = default_collect_interval; - last_long_collect_interval = default_collect_interval; - gc_num.allocd = 0; - gc_num.max_pause = 0; - gc_num.max_memory = 0; - -#ifdef _P64 - total_mem = uv_get_total_memory(); - uint64_t constrained_mem = uv_get_constrained_memory(); - if (constrained_mem > 0 && constrained_mem < total_mem) - total_mem = constrained_mem; -#endif - - // We allocate with abandon until we get close to the free memory on the machine. - uint64_t free_mem = uv_get_available_memory(); - uint64_t high_water_mark = free_mem / 10 * 7; // 70% high water mark - - if (high_water_mark < max_total_memory) - max_total_memory = high_water_mark; - - // MMTk-specific - long long min_heap_size; - long long max_heap_size; - char* min_size_def = getenv("MMTK_MIN_HSIZE"); - char* min_size_gb = getenv("MMTK_MIN_HSIZE_G"); - - char* max_size_def = getenv("MMTK_MAX_HSIZE"); - char* max_size_gb = getenv("MMTK_MAX_HSIZE_G"); - - // default min heap currently set as Julia's default_collect_interval - if (min_size_def != NULL) { - char *p; - double min_size = strtod(min_size_def, &p); - min_heap_size = (long) 1024 * 1024 * min_size; - } else if (min_size_gb != NULL) { - char *p; - double min_size = strtod(min_size_gb, &p); - min_heap_size = (long) 1024 * 1024 * 1024 * min_size; - } else { - min_heap_size = default_collect_interval; - } - - // default max heap currently set as 70% the free memory in the system - if (max_size_def != NULL) { - char *p; - double max_size = strtod(max_size_def, &p); - max_heap_size = (long) 1024 * 1024 * max_size; - } else if (max_size_gb != NULL) { - char *p; - double max_size = strtod(max_size_gb, &p); - max_heap_size = (long) 1024 * 1024 * 1024 * max_size; - } else { - max_heap_size = uv_get_free_memory() * 70 / 100; - } - - // Assert that the number of stock GC threads is 0; MMTK uses the number of threads in jl_options.ngcthreads - assert(jl_n_gcthreads == 0); - - // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined - int copy_stacks; - -#ifdef COPY_STACKS - copy_stacks = 1; -#else - copy_stacks = 0; -#endif - - mmtk_julia_copy_stack_check(copy_stacks); - - // if only max size is specified initialize MMTk with a fixed size heap - // TODO: We just assume mark threads means GC threads, and ignore the number of concurrent sweep threads. - // If the two values are the same, we can use either. Otherwise, we need to be careful. - uintptr_t gcthreads = jl_options.nmarkthreads; - if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { - mmtk_gc_init(0, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); - } else { - mmtk_gc_init(min_heap_size, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); - } -} - -// allocation wrappers that track allocation and let collection run - -JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) -{ - jl_gcframe_t **pgcstack = jl_get_pgcstack(); - jl_task_t *ct = jl_current_task; - void *data = malloc(sz); - if (data != NULL && pgcstack != NULL && ct->world_age) { - jl_ptls_t ptls = ct->ptls; - malloc_maybe_collect(ptls, sz); - jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz); - } - return data; -} - -JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) -{ - jl_gcframe_t **pgcstack = jl_get_pgcstack(); - jl_task_t *ct = jl_current_task; - void *data = calloc(nm, sz); - if (data != NULL && pgcstack != NULL && ct->world_age) { - jl_ptls_t ptls = ct->ptls; - malloc_maybe_collect(ptls, nm * sz); - jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz); - } - return data; -} - -JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) -{ - jl_gcframe_t **pgcstack = jl_get_pgcstack(); - jl_task_t *ct = jl_current_task; - free(p); - if (pgcstack != NULL && ct->world_age) { - jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, -sz); - } -} - -JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) -{ - jl_gcframe_t **pgcstack = jl_get_pgcstack(); - jl_task_t *ct = jl_current_task; - if (pgcstack && ct->world_age) { - jl_ptls_t ptls = ct->ptls; - malloc_maybe_collect(ptls, sz); - if (sz < old) - jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, old - sz); - else - jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz - old); - } - return realloc(p, sz); -} - -jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) -{ - size_t len = jl_string_len(s); - jl_value_t *snew = jl_alloc_string(sz); - memcpy(jl_string_data(snew), jl_string_data(s), sz <= len ? sz : len); - if(mmtk_is_pinned(s)) { - // if the source string was pinned, we also pin the new one - mmtk_pin_object(snew); - } - return snew; -} - -JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) -{ - return 0; -} - -JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void) -{ - return 0; -} - -// TODO: if this is needed, it can be added in MMTk -JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) -{ - return NULL; -} - - -// gc-debug functions -// --- - -jl_gc_pagemeta_t *jl_gc_page_metadata(void *data) -{ - return NULL; -} - -JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p) -{ - return NULL; -} - -void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT -{ -} - -void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT -{ - // May not be accurate but should be helpful enough - uint64_t pool_count = gc_num.poolalloc; - uint64_t big_count = gc_num.bigalloc; - jl_safe_printf("Allocations: %" PRIu64 " " - "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n", - pool_count + big_count, pool_count, big_count, gc_num.pause); -} - -void jl_print_gc_stats(JL_STREAM *s) -{ -} - -// gc thread function -void jl_gc_threadfun(void *arg) -{ - mmtk_unreachable(); -} - -// added for MMTk integration - -JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT -{ - jl_ptls_t ptls = jl_current_task->ptls; - mmtk_memory_region_copy(&ptls->mmtk_mutator, jl_array_owner(src), src_p, jl_array_owner(dest), dest_p, n); -} - -void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) -{ - jl_ptls_t ptls = jl_current_task->ptls; - size_t allocsz = mmtk_align_alloc_sz(sz); - void* addr = mmtk_immortal_alloc_fast(&ptls->mmtk_mutator, allocsz, align, offset); - return addr; -} - -void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) -{ - return jl_gc_perm_alloc_nolock(sz, zero, align, offset); -} - -void jl_gc_notify_image_load(const char* img_data, size_t len) -{ - mmtk_set_vm_space((void*)img_data, len); -} - -#ifdef __cplusplus -} -#endif - -#endif // MMTK_GC From b536f42167c70f5dfdfb947c9554077583abfbb2 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 11 Oct 2024 01:56:12 +0000 Subject: [PATCH 116/116] Adding jl_gc_notify_image_alloc calls that set the log bit for the whole chunk of memory --- src/gc-interface.h | 7 ++++++- src/gc-mmtk.c | 9 ++++++--- src/gc-stock.c | 5 +++++ src/staticdata.c | 2 ++ 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 6e36f5670c7f3..176efc81b7ca7 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -195,10 +195,15 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; // This function notifies the GC about memory addresses that are set when loading the boot image. -// The GC may use that information to, for instance, determine that such objects should +// The GC may use that information to, for instance, determine that all objects in that chunk of memory should // be treated as marked and belonged to the old generation in nursery collections. void jl_gc_notify_image_load(const char* img_data, size_t len); +// This function notifies the GC about memory addresses that are set when allocating the boot image. +// The GC may use that information to, for instance, determine that all objects in that chunk of memory should +// be treated as marked and belonged to the old generation in nursery collections. +void jl_gc_notify_image_alloc(char* img_data, size_t len); + // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index 1f20ba875b150..f4a44471f37f8 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -453,6 +453,7 @@ void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offs jl_ptls_t ptls = jl_current_task->ptls; size_t allocsz = mmtk_align_alloc_sz(sz); void* addr = mmtk_immortal_alloc_fast(&ptls->gc_tls.mmtk_mutator, allocsz, align, offset); + mmtk_immortal_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, jl_valueof(addr), allocsz); return addr; } @@ -468,9 +469,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT sizeof(void*) * 2 : 16)); jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align, sizeof(void*) % align); - - jl_ptls_t ptls = jl_current_task->ptls; - mmtk_immortal_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, jl_valueof(o), allocsz); o->header = (uintptr_t)ty; return jl_valueof(o); } @@ -608,6 +606,11 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) mmtk_set_vm_space((void*)img_data, len); } +void jl_gc_notify_image_alloc(char* img_data, size_t len) +{ + mmtk_immortal_region_post_alloc((void*)img_data, len); +} + // mutex for page profile uv_mutex_t page_profile_lock; diff --git a/src/gc-stock.c b/src/gc-stock.c index d193254834a56..e99db4c54d17e 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3912,6 +3912,11 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) // Do nothing } +void jl_gc_notify_image_alloc(const char* img_data, size_t len) +{ + // Do nothing +} + JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj) { return 0; } diff --git a/src/staticdata.c b/src/staticdata.c index 6f4bc61521c1a..af24a84f39854 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -3915,6 +3915,7 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i ios_seek(f, datastartpos); if (needs_permalloc) { sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + jl_gc_notify_image_alloc(sysimg, len); } else sysimg = &f->buf[f->bpos]; @@ -4024,6 +4025,7 @@ JL_DLLEXPORT void jl_restore_system_image(const char *fname) ios_seek_end(&f); size_t len = ios_pos(&f); char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + jl_gc_notify_image_alloc(sysimg, len); ios_seek(&f, 0); if (ios_readall(&f, sysimg, len) != len) jl_errorf("Error reading system image file.");