diff --git a/NEWS.md b/NEWS.md index f5feae306daea..03ef4c206b720 100644 --- a/NEWS.md +++ b/NEWS.md @@ -38,7 +38,10 @@ Command-line option changes --------------------------- * New option `--strip-metadata` to remove docstrings, source location information, and local - variable names when building a system image. + variable names when building a system image ([#42513]). +* New option `--strip-ir` to remove the compiler's IR (intermediate representation) of source + code when building a system image. The resulting image will only work if `--compile=all` is + used, or if all needed code is precompiled. Multi-threading changes ----------------------- diff --git a/base/options.jl b/base/options.jl index 9c82cb7f6637a..2af8337673b93 100644 --- a/base/options.jl +++ b/base/options.jl @@ -48,6 +48,7 @@ struct JLOptions image_codegen::Int8 rr_detach::Int8 strip_metadata::Int8 + strip_ir::Int8 end # This runs early in the sysimage != is not defined yet diff --git a/src/gf.c b/src/gf.c index 1615e13fcc2e6..b16ab0058a79c 100644 --- a/src/gf.c +++ b/src/gf.c @@ -1946,10 +1946,11 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t compile_option = ((jl_method_t*)def)->module->compile; } + // if compilation is disabled or source is unavailable, try calling unspecialized version if (compile_option == JL_OPTIONS_COMPILE_OFF || - compile_option == JL_OPTIONS_COMPILE_MIN) { + compile_option == JL_OPTIONS_COMPILE_MIN || + def->source == jl_nothing) { // copy fptr from the template method definition - jl_method_t *def = mi->def.method; if (jl_is_method(def) && def->unspecialized) { jl_code_instance_t *unspec = jl_atomic_load_relaxed(&def->unspecialized->cache); if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) { @@ -1964,6 +1965,10 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t return codeinst; } } + } + // if that didn't work and compilation is off, try running in the interpreter + if (compile_option == JL_OPTIONS_COMPILE_OFF || + compile_option == JL_OPTIONS_COMPILE_MIN) { jl_code_info_t *src = jl_code_for_interpreter(mi); if (!jl_code_requires_compiler(src)) { jl_code_instance_t *codeinst = jl_new_codeinst(mi, @@ -1985,8 +1990,16 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t jl_method_instance_t *unspec = jl_get_unspecialized(mi); jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0); // ask codegen to make the fptr for unspec - if (jl_atomic_load_relaxed(&ucache->invoke) == NULL) + if (jl_atomic_load_relaxed(&ucache->invoke) == NULL) { + if (def->source == jl_nothing && (ucache->def->uninferred == jl_nothing || + ucache->def->uninferred == NULL)) { + jl_printf(JL_STDERR, "source not available for "); + jl_static_show(JL_STDERR, (jl_value_t*)mi); + jl_printf(JL_STDERR, "\n"); + jl_error("source missing for method that needs to be compiled"); + } jl_generate_fptr_for_unspecialized(ucache); + } assert(jl_atomic_load_relaxed(&ucache->invoke) != NULL); if (jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_sparam && jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_interpret_call) { diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 511491a31c8bb..fb5543a06685b 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -318,10 +318,12 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES } if (src == NULL && jl_is_method(mi->def.method) && jl_symbol_name(mi->def.method->name)[0] != '@') { - // If the caller didn't provide the source, - // see if it is inferred, or try to infer it for ourself. - // (but don't bother with typeinf on macros or toplevel thunks) - src = jl_type_infer(mi, world, 0); + if (mi->def.method->source != jl_nothing) { + // If the caller didn't provide the source and IR is available, + // see if it is inferred, or try to infer it for ourself. + // (but don't bother with typeinf on macros or toplevel thunks) + src = jl_type_infer(mi, world, 0); + } } jl_code_instance_t *compiled = jl_method_compiled(mi, world); if (compiled) { diff --git a/src/jloptions.c b/src/jloptions.c index d91852f5a88e5..214a047529354 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -80,6 +80,7 @@ JL_DLLEXPORT void jl_init_options(void) 0, // image-codegen 0, // rr-detach 0, // strip-metadata + 0, // strip-ir }; jl_options_initialized = 1; } @@ -165,6 +166,7 @@ static const char opts_hidden[] = " --output-o name Generate an object file (including system image data)\n" " --output-ji name Generate a system image data file (.ji)\n" " --strip-metadata Remove docstrings and source location info from system image\n" + " --strip-ir Remove IR (intermediate representation) of compiled functions\n" // compiler debugging (see the devdocs for tips on using these options) " --output-unopt-bc name Generate unoptimized LLVM bitcode (.bc)\n" @@ -215,6 +217,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) opt_image_codegen, opt_rr_detach, opt_strip_metadata, + opt_strip_ir, }; static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:"; static const struct option longopts[] = { @@ -269,6 +272,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) { "image-codegen", no_argument, 0, opt_image_codegen }, { "rr-detach", no_argument, 0, opt_rr_detach }, { "strip-metadata", no_argument, 0, opt_strip_metadata }, + { "strip-ir", no_argument, 0, opt_strip_ir }, { 0, 0, 0, 0 } }; @@ -696,6 +700,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) case opt_strip_metadata: jl_options.strip_metadata = 1; break; + case opt_strip_ir: + jl_options.strip_ir = 1; + break; default: jl_errorf("julia: unhandled option -- %c\n" "This is a bug, please report it.", c); diff --git a/src/jloptions.h b/src/jloptions.h index 1d541834a3a22..0f53bc0f8a4de 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -52,6 +52,7 @@ typedef struct { int8_t image_codegen; int8_t rr_detach; int8_t strip_metadata; + int8_t strip_ir; } jl_options_t; #endif diff --git a/src/staticdata.c b/src/staticdata.c index 4db3a70f7ddee..2a2ccc6924ce2 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -233,6 +233,9 @@ static arraylist_t ccallable_list; static htable_t fptr_to_id; void *native_functions; +// table of struct field addresses to rewrite during saving +static htable_t field_replace; + // array of definitions for the predefined function pointers // (reverse of fptr_to_id) // This is a manually constructed dual of the fvars array, which would be produced by codegen for Julia code, for C. @@ -415,6 +418,13 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m) } } +static jl_value_t *get_replaceable_field(jl_value_t **addr) +{ + jl_value_t *fld = (jl_value_t*)ptrhash_get(&field_replace, addr); + if (fld == HT_NOTFOUND) + return *addr; + return fld; +} #define NBOX_C 1024 @@ -515,7 +525,7 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recur size_t i, np = t->layout->npointers; for (i = 0; i < np; i++) { uint32_t ptr = jl_ptr_offset(t, i); - jl_value_t *fld = ((jl_value_t* const*)data)[ptr]; + jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr]); jl_serialize_value(s, fld); } } @@ -944,7 +954,7 @@ static void jl_write_values(jl_serializer_state *s) size_t np = t->layout->npointers; for (i = 0; i < np; i++) { size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*); - jl_value_t *fld = *(jl_value_t**)&data[offset]; + jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset]); if (fld != NULL) { arraylist_push(&s->relocs_list, (void*)(uintptr_t)(offset + reloc_offset)); // relocation location arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target @@ -1527,7 +1537,7 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache) } } -static jl_value_t *strip_codeinfo(jl_method_t *m, jl_value_t *ci_, int isdef) +static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig) { jl_code_info_t *ci = NULL; JL_GC_PUSH1(&ci); @@ -1552,7 +1562,7 @@ static jl_value_t *strip_codeinfo(jl_method_t *m, jl_value_t *ci_, int isdef) if (s != (jl_value_t*)jl_unused_sym) jl_array_ptr_set(ci->slotnames, i, questionsym); } - if (isdef) { + if (orig) { m->slot_syms = jl_compress_argnames(ci->slotnames); jl_gc_wb(m, m->slot_syms); } @@ -1563,38 +1573,83 @@ static jl_value_t *strip_codeinfo(jl_method_t *m, jl_value_t *ci_, int isdef) return ret; } +static void record_field_change(jl_value_t **addr, jl_value_t *newval) +{ + ptrhash_put(&field_replace, (void*)addr, newval); +} + static void strip_specializations_(jl_method_instance_t *mi) { assert(jl_is_method_instance(mi)); jl_code_instance_t *codeinst = mi->cache; while (codeinst) { if (codeinst->inferred && codeinst->inferred != jl_nothing) { - codeinst->inferred = strip_codeinfo(mi->def.method, codeinst->inferred, 0); - jl_gc_wb(codeinst, codeinst->inferred); + if (jl_options.strip_ir) { + record_field_change(&codeinst->inferred, jl_nothing); + } + else if (jl_options.strip_metadata) { + codeinst->inferred = strip_codeinfo_meta(mi->def.method, codeinst->inferred, 0); + jl_gc_wb(codeinst, codeinst->inferred); + } } codeinst = jl_atomic_load_relaxed(&codeinst->next); } + if (jl_options.strip_ir) { + record_field_change(&mi->uninferred, NULL); + record_field_change((jl_value_t**)&mi->backedges, NULL); + record_field_change((jl_value_t**)&mi->callbacks, NULL); + record_field_change(&mi->specTypes, (jl_value_t*)jl_emptytuple_type); + } } static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env) { jl_method_t *m = def->func.method; if (m->source) { - m->source = strip_codeinfo(m, m->source, 1); - jl_gc_wb(m, m->source); + int stripped_ir = 0; + if (jl_options.strip_ir) { + if (m->unspecialized) { + jl_code_instance_t *unspec = jl_atomic_load_relaxed(&m->unspecialized->cache); + if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) { + // we have a generic compiled version, so can remove the IR + record_field_change(&m->source, jl_nothing); + stripped_ir = 1; + } + } + if (!stripped_ir) { + int mod_setting = jl_get_module_compile(m->module); + // if the method is declared not to be compiled, keep IR for interpreter + if (!(mod_setting == JL_OPTIONS_COMPILE_OFF || mod_setting == JL_OPTIONS_COMPILE_MIN)) { + record_field_change(&m->source, jl_nothing); + stripped_ir = 1; + } + } + } + if (jl_options.strip_metadata && !stripped_ir) { + m->source = strip_codeinfo_meta(m, m->source, 1); + jl_gc_wb(m, m->source); + } } - jl_svec_t *specializations = def->func.method->specializations; + jl_svec_t *specializations = m->specializations; size_t i, l = jl_svec_len(specializations); for (i = 0; i < l; i++) { jl_value_t *mi = jl_svecref(specializations, i); if (mi != jl_nothing) strip_specializations_((jl_method_instance_t*)mi); } + if (jl_options.strip_ir) { + record_field_change((jl_value_t**)&m->specializations, (jl_value_t*)jl_emptysvec); + record_field_change((jl_value_t**)&m->speckeyset, jl_an_empty_vec_any); + } + if (m->unspecialized) + strip_specializations_(m->unspecialized); return 1; } static void strip_all_codeinfos_(jl_methtable_t *mt, void *_env) { + if (jl_options.strip_ir) + record_field_change((jl_value_t**)&mt->backedges, NULL); jl_typemap_visitor(mt->defs, strip_all_codeinfos__, NULL); } @@ -1610,11 +1665,15 @@ static void jl_cleanup_serializer2(void); static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED { - if (jl_options.strip_metadata) - jl_strip_all_codeinfos(); jl_gc_collect(JL_GC_FULL); jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers JL_TIMING(SYSIMG_DUMP); + + htable_new(&field_replace, 10000); + // strip metadata and IR when requested + if (jl_options.strip_metadata || jl_options.strip_ir) + jl_strip_all_codeinfos(); + int en = jl_gc_enable(0); jl_init_serializer2(1); htable_reset(&backref_table, 250000); @@ -1759,6 +1818,7 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED arraylist_free(&ccallable_list); arraylist_free(&s.relocs_list); arraylist_free(&s.gctags_list); + htable_free(&field_replace); jl_cleanup_serializer2(); jl_gc_enable(en);