From 096e13d2ad938ecf87bc95aa02557ac3982f4a18 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Mon, 29 May 2017 01:29:53 -0400 Subject: [PATCH] Store global variables and function addresses as 32bit offsets * Require small code model for sysimg * Makes the two tables static data and reduces the number of dynamic relocations by ~100x (~40k to ~400). PR #22472 (cherry picked from commit 92152aa231516744d3c4e4fb647346b504c61c8b) --- src/anticodegen.c | 5 +++-- src/debuginfo.cpp | 19 +++++++++++------ src/dump.c | 48 ++++++++++++++++++++++++++--------------- src/jitlayers.cpp | 51 ++++++++++++++++++++++++-------------------- src/julia_internal.h | 3 +++ 5 files changed, 78 insertions(+), 48 deletions(-) diff --git a/src/anticodegen.c b/src/anticodegen.c index b40e9b7fb343a2..b19afe21d57a8c 100644 --- a/src/anticodegen.c +++ b/src/anticodegen.c @@ -36,9 +36,10 @@ int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int no return 0; } -void jl_register_fptrs(uint64_t sysimage_base, void **fptrs, jl_method_instance_t **linfos, size_t n) +void jl_register_fptrs(uint64_t sysimage_base, const char *base, const int32_t *offsets, + jl_method_instance_t **linfos, size_t n) { - (void)sysimage_base; (void)fptrs; (void)linfos; (void)n; + (void)sysimage_base; (void)base; (void)offsets; (void)linfos; (void)n; } void jl_compile_linfo(jl_method_instance_t *li) { } diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index 42167bad3fde7b..c6e8694a77212b 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -1026,13 +1026,20 @@ openDebugInfo(StringRef debuginfopath, const debug_link_info &info) #endif static uint64_t jl_sysimage_base; -static void **sysimg_fvars; +static const char *sysimg_fvars_base = nullptr; +static const int32_t *sysimg_fvars_offsets; static jl_method_instance_t **sysimg_fvars_linfo; static size_t sysimg_fvars_n; -extern "C" void jl_register_fptrs(uint64_t sysimage_base, void **fptrs, jl_method_instance_t **linfos, size_t n) +static const void *sysimg_fvars(size_t idx) +{ + return sysimg_fvars_base + sysimg_fvars_offsets[idx]; +} +void jl_register_fptrs(uint64_t sysimage_base, const char *base, const int32_t *offsets, + jl_method_instance_t **linfos, size_t n) { jl_sysimage_base = (uintptr_t)sysimage_base; - sysimg_fvars = fptrs; + sysimg_fvars_base = base; + sysimg_fvars_offsets = offsets; sysimg_fvars_linfo = linfos; sysimg_fvars_n = n; } @@ -1053,7 +1060,7 @@ static void get_function_name_and_base(const object::ObjectFile *object, bool in if (!object) return; // Assume we only need base address for sysimg for now - if (!insysimage || !sysimg_fvars) + if (!insysimage || !sysimg_fvars_base) saddr = nullptr; // Try platform specific methods first since they are usually faster if (saddr && !*saddr) { @@ -1457,9 +1464,9 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip return 1; } frame0->fromC = !isSysImg; - if (isSysImg && sysimg_fvars && saddr) { + if (isSysImg && sysimg_fvars_base && saddr) { for (size_t i = 0; i < sysimg_fvars_n; i++) { - if (saddr == sysimg_fvars[i]) { + if (saddr == sysimg_fvars(i)) { frame0->linfo = sysimg_fvars_linfo[i]; break; } diff --git a/src/dump.c b/src/dump.c index baa4729c98be32..c34a352ef8f00c 100644 --- a/src/dump.c +++ b/src/dump.c @@ -209,8 +209,20 @@ static void write_float64(ios_t *s, double x) #define jl_serialize_value(s, v) jl_serialize_value_((s), (jl_value_t*)(v), 0) static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_literal); static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc); -static jl_value_t ***sysimg_gvars = NULL; -static void **sysimg_fvars = NULL; +static char *sysimg_gvars_base = NULL; +static const int32_t *sysimg_gvars_offsets = NULL; +static const char *sysimg_fvars_base = NULL; +static const int32_t *sysimg_fvars_offsets = NULL; + +static inline jl_value_t **sysimg_gvars(size_t idx) +{ + return (jl_value_t**)(sysimg_gvars_base + sysimg_gvars_offsets[idx]); +} + +static inline const void *sysimg_fvars(const char *base, size_t idx) +{ + return base + sysimg_fvars_offsets[idx]; +} #ifdef HAVE_CPUID extern void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType); @@ -233,13 +245,17 @@ static void jl_load_sysimg_so(void) int imaging_mode = jl_generating_output() && !jl_options.incremental; // in --build mode only use sysimg data, not precompiled native code if (!imaging_mode && jl_options.use_precompiled==JL_OPTIONS_USE_PRECOMPILED_YES) { - sysimg_gvars = (jl_value_t***)jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars"); - sysimg_fvars = (void**)jl_dlsym(jl_sysimg_handle, "jl_sysimg_fvars"); + sysimg_gvars_base = (char*)jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base"); + sysimg_gvars_offsets = (const int32_t*)jl_dlsym(jl_sysimg_handle, + "jl_sysimg_gvars_offsets"); + sysimg_fvars_base = (const char*)jl_dlsym(jl_sysimg_handle, "jl_sysimg_fvars_base"); + sysimg_fvars_offsets = (const int32_t*)jl_dlsym(jl_sysimg_handle, + "jl_sysimg_fvars_offsets"); globalUnique = *(size_t*)jl_dlsym(jl_sysimg_handle, "jl_globalUnique"); #ifdef JULIA_ENABLE_THREADING size_t tls_getter_idx = *(size_t*)jl_dlsym(jl_sysimg_handle, "jl_ptls_states_getter_idx"); - *sysimg_gvars[tls_getter_idx - 1] = + *sysimg_gvars(tls_getter_idx - 1) = (jl_value_t*)jl_get_ptls_states_getter(); #endif const char *cpu_target = (const char*)jl_dlsym(jl_sysimg_handle, "jl_sysimg_cpu_target"); @@ -267,7 +283,7 @@ static void jl_load_sysimg_so(void) #ifdef _OS_WINDOWS_ sysimage_base = (intptr_t)jl_sysimg_handle; #else - if (dladdr((void*)sysimg_gvars, &dlinfo) != 0) { + if (dladdr((void*)sysimg_gvars_base, &dlinfo) != 0) { sysimage_base = (intptr_t)dlinfo.dli_fbase; } else { @@ -284,8 +300,8 @@ static jl_value_t *jl_deserialize_gv(jl_serializer_state *s, jl_value_t *v) { // Restore the GlobalVariable reference to this jl_value_t via the sysimg_gvars table int32_t gvname_index = read_int32(s->s)-1; - if (sysimg_gvars != NULL && gvname_index >= 0 && s->mode == MODE_SYSTEM_IMAGE) { - *sysimg_gvars[gvname_index] = v; + if (sysimg_gvars_base != NULL && gvname_index >= 0 && s->mode == MODE_SYSTEM_IMAGE) { + *sysimg_gvars(gvname_index) = v; } return v; } @@ -415,13 +431,11 @@ static void jl_delayed_fptrs(jl_method_instance_t *li, int32_t func, int32_t cfu } } -void jl_register_fptrs(uint64_t sysimage_base, void **fptrs, jl_method_instance_t **linfos, size_t n); - static void jl_update_all_fptrs(void) { //jl_printf(JL_STDOUT, "delayed_fptrs_n: %d\n", delayed_fptrs_n); - void **fvars = sysimg_fvars; - if (fvars == NULL) { + const char *fvars_base = sysimg_fvars_base; + if (fvars_base == NULL) { size_t i; for (i = 0; i < delayed_fptrs_n; i++) { jl_method_instance_t *li = delayed_fptrs[i].li; @@ -432,8 +446,8 @@ static void jl_update_all_fptrs(void) } // jl_fptr_to_llvm needs to decompress some ASTs, therefore this needs to be NULL // to skip trying to restore GlobalVariable pointers in jl_deserialize_gv - sysimg_gvars = NULL; - sysimg_fvars = NULL; + sysimg_gvars_base = NULL; + sysimg_fvars_base = NULL; size_t i; jl_method_instance_t **linfos = (jl_method_instance_t**)malloc(sizeof(jl_method_instance_t*) * sysimg_fvars_max); for (i = 0; i < delayed_fptrs_n; i++) { @@ -441,16 +455,16 @@ static void jl_update_all_fptrs(void) assert(li->def && li->jlcall_api && li->jlcall_api != 2); int32_t cfunc = delayed_fptrs[i].cfunc - 1; if (cfunc >= 0) { - jl_fptr_to_llvm((jl_fptr_t)fvars[cfunc], li, 1); + jl_fptr_to_llvm((jl_fptr_t)(uintptr_t)sysimg_fvars(fvars_base, cfunc), li, 1); linfos[cfunc] = li; } int32_t func = delayed_fptrs[i].func - 1; if (func >= 0) { - jl_fptr_to_llvm((jl_fptr_t)fvars[func], li, 0); + jl_fptr_to_llvm((jl_fptr_t)(uintptr_t)sysimg_fvars(fvars_base, func), li, 0); linfos[func] = li; } } - jl_register_fptrs(sysimage_base, fvars, linfos, sysimg_fvars_max); + jl_register_fptrs(sysimage_base, fvars_base, sysimg_fvars_offsets, linfos, sysimg_fvars_max); delayed_fptrs_n = 0; delayed_fptrs_max = 0; sysimg_fvars_max = 0; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index b049c5da6db04d..ce58e5b6f32a13 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -966,8 +966,8 @@ void add_named_global(GlobalValue *gv, void *addr, bool dllimport) jl_ExecutionEngine->addGlobalMapping(gv, addr); } -static std::vector jl_sysimg_gvars; -static std::vector jl_sysimg_fvars; +static std::vector jl_sysimg_gvars; +static std::vector jl_sysimg_fvars; static std::map jl_value_to_llvm; // global variables to pointers are pretty common, @@ -996,7 +996,7 @@ void* jl_emit_and_add_to_shadow(GlobalVariable *gv, void *gvarinit) addComdat(shadowvar); if (imaging_mode && gvarinit) { // make the pointer valid for future sessions - jl_sysimg_gvars.push_back(ConstantExpr::getBitCast(shadowvar, T_psize)); + jl_sysimg_gvars.push_back(shadowvar); jl_value_llvm gv_struct; gv_struct.gv = global_proto(gv); gv_struct.index = jl_sysimg_gvars.size(); @@ -1081,23 +1081,29 @@ extern "C" { } #endif -static void jl_gen_llvm_globaldata(llvm::Module *mod, ValueToValueMapTy &VMap, - const char *sysimg_data, size_t sysimg_len) +static void emit_offset_table(Module *mod, const std::vector &vars, StringRef name) { - ArrayType *gvars_type = ArrayType::get(T_psize, jl_sysimg_gvars.size()); - addComdat(new GlobalVariable(*mod, - gvars_type, - true, - GlobalVariable::ExternalLinkage, - MapValue(ConstantArray::get(gvars_type, ArrayRef(jl_sysimg_gvars)), VMap), - "jl_sysimg_gvars")); - ArrayType *fvars_type = ArrayType::get(T_pvoidfunc, jl_sysimg_fvars.size()); - addComdat(new GlobalVariable(*mod, - fvars_type, - true, + assert(!vars.empty()); + addComdat(GlobalAlias::create(GlobalVariable::ExternalLinkage, name + "_base", vars[0])); + auto vbase = ConstantExpr::getPtrToInt(vars[0], T_size); + size_t nvars = vars.size(); + std::vector offsets(nvars); + for (size_t i = 0; i < nvars; i++) { + auto ptrdiff = ConstantExpr::getSub(ConstantExpr::getPtrToInt(vars[i], T_size), vbase); + offsets[i] = sizeof(void*) == 8 ? ConstantExpr::getTrunc(ptrdiff, T_uint32) : ptrdiff; + } + ArrayType *vars_type = ArrayType::get(T_uint32, nvars); + addComdat(new GlobalVariable(*mod, vars_type, true, GlobalVariable::ExternalLinkage, - MapValue(ConstantArray::get(fvars_type, ArrayRef(jl_sysimg_fvars)), VMap), - "jl_sysimg_fvars")); + ConstantArray::get(vars_type, ArrayRef(offsets)), + name + "_offsets")); +} + + +static void jl_gen_llvm_globaldata(Module *mod, const char *sysimg_data, size_t sysimg_len) +{ + emit_offset_table(mod, jl_sysimg_gvars, "jl_sysimg_gvars"); + emit_offset_table(mod, jl_sysimg_fvars, "jl_sysimg_fvars"); addComdat(new GlobalVariable(*mod, T_size, true, @@ -1201,7 +1207,8 @@ void jl_dump_native(const char *bc_fname, const char *obj_fname, const char *sys #else Reloc::Default, #endif - CodeModel::Default, + // Use small model so that we can use signed 32bits offset in the function and GV tables + CodeModel::Small, CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag? )); @@ -1307,7 +1314,7 @@ void jl_dump_native(const char *bc_fname, const char *obj_fname, const char *sys #endif // add metadata information - jl_gen_llvm_globaldata(clone, VMap, sysimg_data, sysimg_len); + jl_gen_llvm_globaldata(shadow_output, sysimg_data, sysimg_len); // do the actual work PM.run(*clone); @@ -1323,9 +1330,7 @@ extern "C" int32_t jl_assign_functionID(void *function) assert(imaging_mode); if (function == NULL) return 0; - jl_sysimg_fvars.push_back(ConstantExpr::getBitCast( - shadow_output->getNamedValue(((Function*)function)->getName()), - T_pvoidfunc)); + jl_sysimg_fvars.push_back(shadow_output->getNamedValue(((Function*)function)->getName())); return jl_sysimg_fvars.size(); } diff --git a/src/julia_internal.h b/src/julia_internal.h index 573d80125dd9d8..785c01732a6938 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -969,6 +969,9 @@ extern jl_sym_t *inert_sym; extern jl_sym_t *static_parameter_sym; extern jl_sym_t *polly_sym; extern jl_sym_t *inline_sym; extern jl_sym_t *propagate_inbounds_sym; +void jl_register_fptrs(uint64_t sysimage_base, const char *base, const int32_t *offsets, + jl_method_instance_t **linfos, size_t n); + #ifdef __cplusplus } #endif