From d47cbf65c520142bd6926f28aee8ba2a22bf0140 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 12 Apr 2024 21:22:51 -0400 Subject: [PATCH] Allow for querying of build_id from objects (#53943) For GPUCompiler we would like to support a native on disk cache of LLVM IR. One of the longstanding issues has been the cache invalidation of such an on disk cache. With #52233 we now have an integrated cache for the inference results and we can rely on `CodeInstance` to be stable across sessions. Due to #52119 we can also rely on the `objectid` to be stable. My inital thought was to key the native disk cache in GPUCompiler on the objectid of the corresponding CodeInstance (+ some compilation parameters). While discussing this with @rayegun yesterday we noted that having a CodeInstance with the same objectid might not be enough provenance. E.g we are not gurantueed that the CodeInstance is from the same build artifact and the same precise source code. For the package images we are tracking this during loading and validate all contents at once, and we keep explicitly track of the provenance chain. This PR adds a lookup up table where we map from "external_blobs" e.g. loaded images, to the corresponding top module of each image, and uses this to determine the build_id of the package image. --- base/loading.jl | 8 ++++++++ src/init.c | 1 + src/julia.h | 1 + src/julia_internal.h | 4 +++- src/staticdata.c | 25 ++++++++++++++++++++++++- test/precompile.jl | 8 ++++++++ test/precompile_absint1.jl | 8 ++++++++ test/precompile_absint2.jl | 8 ++++++++ 8 files changed, 61 insertions(+), 2 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index ee6c47c7ce155..86cad6f2ed94d 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -3061,6 +3061,14 @@ function module_build_id(m::Module) return (UInt128(hi) << 64) | lo end +function object_build_id(obj) + mod = ccall(:jl_object_top_module, Any, (Any,), obj) + if mod === nothing + return nothing + end + return module_build_id(mod::Module) +end + function isvalid_cache_header(f::IOStream) pkgimage = Ref{UInt8}() checksum = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid}, Ptr{UInt8}, Ptr{Int64}, Ptr{Int64}), f.ios, pkgimage, Ref{Int64}(), Ref{Int64}()) # returns checksum id or zero diff --git a/src/init.c b/src/init.c index 2c1ad618948f8..41ec4c78b195d 100644 --- a/src/init.c +++ b/src/init.c @@ -826,6 +826,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) arraylist_new(&jl_linkage_blobs, 0); arraylist_new(&jl_image_relocs, 0); + arraylist_new(&jl_top_mods, 0); arraylist_new(&eytzinger_image_tree, 0); arraylist_new(&eytzinger_idxs, 0); arraylist_push(&eytzinger_idxs, (void*)0); diff --git a/src/julia.h b/src/julia.h index 8dc53b56785d3..e90e9653d2c85 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2078,6 +2078,7 @@ JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t e JL_DLLEXPORT void jl_restore_system_image(const char *fname); JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len); JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete, const char *pkgimage); +JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred); JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *ci); diff --git a/src/julia_internal.h b/src/julia_internal.h index 7df996d888909..9840da6b17448 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -333,6 +333,7 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m); extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED; JL_DLLEXPORT extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages JL_DLLEXPORT extern arraylist_t jl_image_relocs; // external linkage: sysimg/pkgimages +JL_DLLEXPORT extern arraylist_t jl_top_mods; // external linkage: sysimg/pkgimages extern arraylist_t eytzinger_image_tree; extern arraylist_t eytzinger_idxs; @@ -1012,7 +1013,8 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT; -uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT; +// Query if this object is perm-allocated in an image. +JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT; // the first argument to jl_idtable_rehash is used to return a value // make sure it is rooted if it is used after the function returns diff --git a/src/staticdata.c b/src/staticdata.c index 3e2aaf44d8a0d..a41d6b76586eb 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -337,6 +337,8 @@ static arraylist_t object_worklist; // used to mimic recursion by jl_serialize_ // jl_linkage_blobs.items[2i:2i+1] correspond to build_ids[i] (0-offset indexing) arraylist_t jl_linkage_blobs; arraylist_t jl_image_relocs; +// Keep track of which image corresponds to which top module. +arraylist_t jl_top_mods; // Eytzinger tree of images. Used for very fast jl_object_in_image queries // See https://algorithmica.org/en/eytzinger @@ -451,11 +453,23 @@ size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT return idx; } -uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT +JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT { return eyt_obj_in_img(obj); } +// Map an object to it's "owning" top module +JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT +{ + size_t idx = external_blob_index(v); + size_t lbids = n_linkage_blobs(); + if (idx < lbids) { + return (jl_value_t*)jl_top_mods.items[idx]; + } + // The object is runtime allocated + return (jl_value_t*)jl_nothing; +} + // hash of definitions for predefined function pointers static htable_t fptr_to_id; void *native_functions; // opaque jl_native_code_desc_t blob used for fetching data from LLVM @@ -3550,6 +3564,15 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl arraylist_push(&jl_linkage_blobs, (void*)image_base); arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg)); arraylist_push(&jl_image_relocs, (void*)relocs_base); + if (restored == NULL) { + arraylist_push(&jl_top_mods, (void*)jl_top_module); + } else { + size_t len = jl_array_nrows(*restored); + assert(len > 0); + jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(*restored, len-1); + assert(jl_is_module(topmod)); + arraylist_push(&jl_top_mods, (void*)topmod); + } jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t)); rebuild_image_blob_tree(); diff --git a/test/precompile.jl b/test/precompile.jl index a830a3af3ea7d..ffd6e24789aba 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -15,6 +15,11 @@ FooBase_module = :FooBase4b3a94a1a081a8cb end using .ConflictingBindings +@testset "object_build_id" begin + @test Base.object_build_id([1]) === nothing + @test Base.object_build_id(Base) == Base.module_build_id(Base) +end + # method root provenance rootid(m::Module) = Base.module_build_id(Base.parentmodule(m)) % UInt64 @@ -350,6 +355,9 @@ precompile_test_harness(false) do dir @test objectid(Foo.a_vec_int) === Foo.oid_vec_int @test objectid(Foo.a_mat_int) === Foo.oid_mat_int @test Foo.oid_vec_int !== Foo.oid_mat_int + @test Base.object_build_id(Foo.a_vec_int) == Base.object_build_id(Foo.a_mat_int) + @test Base.object_build_id(Foo) == Base.module_build_id(Foo) + @test Base.object_build_id(Foo.a_vec_int) == Base.module_build_id(Foo) end @eval begin function ccallable_test() diff --git a/test/precompile_absint1.jl b/test/precompile_absint1.jl index f47b26bce9d94..7bc0382ffda85 100644 --- a/test/precompile_absint1.jl +++ b/test/precompile_absint1.jl @@ -44,10 +44,14 @@ precompile_test_harness() do load_path @test isdefined(ci, :next) @test ci.owner === nothing @test ci.max_world == typemax(UInt) + @test Base.module_build_id(TestAbsIntPrecompile1) == + Base.object_build_id(ci) ci = ci.next @test !isdefined(ci, :next) @test ci.owner === cache_owner @test ci.max_world == typemax(UInt) + @test Base.module_build_id(TestAbsIntPrecompile1) == + Base.object_build_id(ci) end let m = only(methods(sum, (Vector{Float64},))) found = false @@ -57,10 +61,14 @@ precompile_test_harness() do load_path @test isdefined(ci, :next) @test ci.owner === cache_owner @test ci.max_world == typemax(UInt) + @test Base.module_build_id(TestAbsIntPrecompile1) == + Base.object_build_id(ci) ci = ci.next @test !isdefined(ci, :next) @test ci.owner === nothing @test ci.max_world == typemax(UInt) + @test Base.module_build_id(TestAbsIntPrecompile1) == + Base.object_build_id(ci) found = true break end diff --git a/test/precompile_absint2.jl b/test/precompile_absint2.jl index a7bf01debe861..066dcbaece4c4 100644 --- a/test/precompile_absint2.jl +++ b/test/precompile_absint2.jl @@ -67,10 +67,14 @@ precompile_test_harness() do load_path @test isdefined(ci, :next) @test ci.owner === nothing @test ci.max_world == typemax(UInt) + @test Base.module_build_id(TestAbsIntPrecompile2) == + Base.object_build_id(ci) ci = ci.next @test !isdefined(ci, :next) @test ci.owner === cache_owner @test ci.max_world == typemax(UInt) + @test Base.module_build_id(TestAbsIntPrecompile2) == + Base.object_build_id(ci) end let m = only(methods(sum, (Vector{Float64},))) found = false @@ -80,10 +84,14 @@ precompile_test_harness() do load_path @test isdefined(ci, :next) @test ci.owner === cache_owner @test ci.max_world == typemax(UInt) + @test Base.module_build_id(TestAbsIntPrecompile2) == + Base.object_build_id(ci) ci = ci.next @test !isdefined(ci, :next) @test ci.owner === nothing @test ci.max_world == typemax(UInt) + @test Base.module_build_id(TestAbsIntPrecompile2) == + Base.object_build_id(ci) found = true break end