diff --git a/NEWS.md b/NEWS.md index 9fca99a785f3a..7fd694237d31f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -37,6 +37,8 @@ Compiler/Runtime improvements Command-line option changes --------------------------- +* New option `--strip-metadata` to remove docstrings, source location information, and local + variable names when building a system image. Multi-threading changes ----------------------- diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl index 473070114b0a2..b84b3ee8d55f4 100644 --- a/base/docs/Docs.jl +++ b/base/docs/Docs.jl @@ -74,15 +74,15 @@ const META = gensym(:meta) const METAType = IdDict{Any,Any} function meta(m::Module) - if !isdefined(m, META) + if !isdefined(m, META) || getfield(m, META) === nothing initmeta(m) end return getfield(m, META)::METAType end function initmeta(m::Module) - if !isdefined(m, META) - Core.eval(m, :(const $META = $(METAType()))) + if !isdefined(m, META) || getfield(m, META) === nothing + Core.eval(m, :($META = $(METAType()))) push!(modules, m) end nothing diff --git a/base/loading.jl b/base/loading.jl index 30f7bd25a1160..b3e2f913215e8 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -765,7 +765,7 @@ function _include_from_serialized(path::String, depmods::Vector{Any}) restored = sv[1]::Vector{Any} for M in restored M = M::Module - if isdefined(M, Base.Docs.META) + if isdefined(M, Base.Docs.META) && getfield(M, Base.Docs.META) !== nothing push!(Base.Docs.modules, M) end if parentmodule(M) === M diff --git a/base/options.jl b/base/options.jl index 16dc884e8651e..9c82cb7f6637a 100644 --- a/base/options.jl +++ b/base/options.jl @@ -47,6 +47,7 @@ struct JLOptions warn_scope::Int8 image_codegen::Int8 rr_detach::Int8 + strip_metadata::Int8 end # This runs early in the sysimage != is not defined yet diff --git a/src/codegen.cpp b/src/codegen.cpp index b09eb8db04e07..680cbfe12842a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -7038,7 +7038,7 @@ static std::pair, jl_llvm_functions_t> }; std::vector current_lineinfo, new_lineinfo; auto coverageVisitStmt = [&] (size_t dbg) { - if (dbg == 0) + if (dbg == 0 || dbg >= linetable.size()) return; // Compute inlining stack for current line, inner frame first while (dbg) { @@ -7131,8 +7131,10 @@ static std::pair, jl_llvm_functions_t> sync_bytes = ctx.builder.CreateCall(prepare_call(diff_gc_total_bytes_func), {}); { // coverage for the function definition line number const auto &topinfo = linetable.at(0); - if (topinfo == linetable.at(1)) - current_lineinfo.push_back(1); + if (linetable.size() > 1) { + if (topinfo == linetable.at(1)) + current_lineinfo.push_back(1); + } if (do_coverage(topinfo.is_user_code)) coverageVisitLine(ctx, topinfo.file, topinfo.line); } diff --git a/src/jloptions.c b/src/jloptions.c index 31c4995eaa598..d91852f5a88e5 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -79,6 +79,7 @@ JL_DLLEXPORT void jl_init_options(void) JL_OPTIONS_WARN_SCOPE_ON, // ambiguous scope warning 0, // image-codegen 0, // rr-detach + 0, // strip-metadata }; jl_options_initialized = 1; } @@ -163,6 +164,7 @@ static const char opts_hidden[] = // compiler output options " --output-o name Generate an object file (including system image data)\n" " --output-ji name Generate a system image data file (.ji)\n" + " --strip-metadata Remove docstrings and source location info from system image\n" // compiler debugging (see the devdocs for tips on using these options) " --output-unopt-bc name Generate unoptimized LLVM bitcode (.bc)\n" @@ -212,6 +214,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) opt_bug_report, opt_image_codegen, opt_rr_detach, + opt_strip_metadata, }; static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:"; static const struct option longopts[] = { @@ -265,6 +268,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) { "lisp", no_argument, 0, 1 }, { "image-codegen", no_argument, 0, opt_image_codegen }, { "rr-detach", no_argument, 0, opt_rr_detach }, + { "strip-metadata", no_argument, 0, opt_strip_metadata }, { 0, 0, 0, 0 } }; @@ -689,6 +693,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) case opt_rr_detach: jl_options.rr_detach = 1; break; + case opt_strip_metadata: + jl_options.strip_metadata = 1; + break; default: jl_errorf("julia: unhandled option -- %c\n" "This is a bug, please report it.", c); diff --git a/src/jloptions.h b/src/jloptions.h index 4b2b0504a75b5..1d541834a3a22 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -51,6 +51,7 @@ typedef struct { int8_t warn_scope; int8_t image_codegen; int8_t rr_detach; + int8_t strip_metadata; } jl_options_t; #endif diff --git a/src/staticdata.c b/src/staticdata.c index 341f1a7e8c983..4db3a70f7ddee 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -266,6 +266,8 @@ static jl_typename_t *jl_idtable_typename = NULL; static jl_value_t *jl_bigint_type = NULL; static int gmp_limb_size = 0; +static jl_sym_t *jl_docmeta_sym = NULL; + enum RefTags { DataRef, ConstDataRef, @@ -399,7 +401,10 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m) jl_serialize_value(s, (jl_value_t*)table[i]); jl_binding_t *b = (jl_binding_t*)table[i+1]; jl_serialize_value(s, b->name); - jl_serialize_value(s, jl_atomic_load_relaxed(&b->value)); + if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata) + jl_serialize_value(s, jl_nothing); + else + jl_serialize_value(s, jl_atomic_load_relaxed(&b->value)); jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref)); jl_serialize_value(s, b->owner); } @@ -651,7 +656,10 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t record_gvar(s, jl_get_llvm_gv(native_functions, (jl_value_t*)b), ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + binding_reloc_offset); write_pointerfield(s, (jl_value_t*)b->name); - write_pointerfield(s, jl_atomic_load_relaxed(&b->value)); + if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata) + write_pointerfield(s, jl_nothing); + else + write_pointerfield(s, jl_atomic_load_relaxed(&b->value)); write_pointerfield(s, jl_atomic_load_relaxed(&b->globalref)); write_pointerfield(s, (jl_value_t*)b->owner); size_t flag_offset = offsetof(jl_binding_t, owner) + sizeof(b->owner); @@ -1519,6 +1527,81 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache) } } +static jl_value_t *strip_codeinfo(jl_method_t *m, jl_value_t *ci_, int isdef) +{ + jl_code_info_t *ci = NULL; + JL_GC_PUSH1(&ci); + int compressed = 0; + if (!jl_is_code_info(ci_)) { + compressed = 1; + ci = jl_uncompress_ir(m, NULL, (jl_array_t*)ci_); + } + else { + ci = (jl_code_info_t*)ci_; + } + // leave codelocs length the same so the compiler can assume that; just zero it + memset(jl_array_data(ci->codelocs), 0, jl_array_len(ci->codelocs)*sizeof(int32_t)); + // empty linetable + if (jl_is_array(ci->linetable)) + jl_array_del_end((jl_array_t*)ci->linetable, jl_array_len(ci->linetable)); + // replace slot names with `?`, except unused_sym since the compiler looks at it + jl_sym_t *questionsym = jl_symbol("?"); + int i, l = jl_array_len(ci->slotnames); + for (i = 0; i < l; i++) { + jl_value_t *s = jl_array_ptr_ref(ci->slotnames, i); + if (s != (jl_value_t*)jl_unused_sym) + jl_array_ptr_set(ci->slotnames, i, questionsym); + } + if (isdef) { + m->slot_syms = jl_compress_argnames(ci->slotnames); + jl_gc_wb(m, m->slot_syms); + } + jl_value_t *ret = (jl_value_t*)ci; + if (compressed) + ret = (jl_value_t*)jl_compress_ir(m, ci); + JL_GC_POP(); + return ret; +} + +static void strip_specializations_(jl_method_instance_t *mi) +{ + assert(jl_is_method_instance(mi)); + jl_code_instance_t *codeinst = mi->cache; + while (codeinst) { + if (codeinst->inferred && codeinst->inferred != jl_nothing) { + codeinst->inferred = strip_codeinfo(mi->def.method, codeinst->inferred, 0); + jl_gc_wb(codeinst, codeinst->inferred); + } + codeinst = jl_atomic_load_relaxed(&codeinst->next); + } +} + +static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env) +{ + jl_method_t *m = def->func.method; + if (m->source) { + m->source = strip_codeinfo(m, m->source, 1); + jl_gc_wb(m, m->source); + } + jl_svec_t *specializations = def->func.method->specializations; + size_t i, l = jl_svec_len(specializations); + for (i = 0; i < l; i++) { + jl_value_t *mi = jl_svecref(specializations, i); + if (mi != jl_nothing) + strip_specializations_((jl_method_instance_t*)mi); + } + return 1; +} + +static void strip_all_codeinfos_(jl_methtable_t *mt, void *_env) +{ + jl_typemap_visitor(mt->defs, strip_all_codeinfos__, NULL); +} + +static void jl_strip_all_codeinfos(void) +{ + jl_foreach_reachable_mtable(strip_all_codeinfos_, NULL); +} // --- entry points --- @@ -1527,6 +1610,8 @@ static void jl_cleanup_serializer2(void); static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED { + if (jl_options.strip_metadata) + jl_strip_all_codeinfos(); jl_gc_collect(JL_GC_FULL); jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers JL_TIMING(SYSIMG_DUMP); @@ -1570,6 +1655,12 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")), jl_symbol("BITS_PER_LIMB"))) / 8; } + if (jl_base_module) { + jl_value_t *docs = jl_get_global(jl_base_module, jl_symbol("Docs")); + if (docs && jl_is_module(docs)) { + jl_docmeta_sym = (jl_sym_t*)jl_get_global((jl_module_t*)docs, jl_symbol("META")); + } + } { // step 1: record values (recursively) that need to go in the image size_t i;