Skip to content

Commit

Permalink
Allow for querying of build_id from objects (#53943)
Browse files Browse the repository at this point in the history
For GPUCompiler we would like to support a native on disk cache of LLVM
IR.
One of the longstanding issues has been the cache invalidation of such
an on disk cache.

With #52233 we now have an integrated cache for the inference results
and we can rely
on `CodeInstance` to be stable across sessions. Due to #52119 we can
also rely on the
`objectid` to be stable. 

My inital thought was to key the native disk cache in GPUCompiler on the
objectid of
the corresponding CodeInstance (+ some compilation parameters).

While discussing this with @rayegun yesterday we noted that having a
CodeInstance with
the same objectid might not be enough provenance. E.g we are not
gurantueed that the
CodeInstance is from the same build artifact and the same precise source
code.

For the package images we are tracking this during loading and validate
all contents
at once, and we keep explicitly track of the provenance chain.

This PR adds a lookup up table where we map from "external_blobs" e.g.
loaded images,
to the corresponding top module of each image, and uses this to
determine the
build_id of the package image.
  • Loading branch information
vchuravy committed Apr 13, 2024
1 parent be3bc9a commit d47cbf6
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 2 deletions.
8 changes: 8 additions & 0 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3061,6 +3061,14 @@ function module_build_id(m::Module)
return (UInt128(hi) << 64) | lo
end

function object_build_id(obj)
mod = ccall(:jl_object_top_module, Any, (Any,), obj)
if mod === nothing
return nothing
end
return module_build_id(mod::Module)
end

function isvalid_cache_header(f::IOStream)
pkgimage = Ref{UInt8}()
checksum = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid}, Ptr{UInt8}, Ptr{Int64}, Ptr{Int64}), f.ios, pkgimage, Ref{Int64}(), Ref{Int64}()) # returns checksum id or zero
Expand Down
1 change: 1 addition & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)

arraylist_new(&jl_linkage_blobs, 0);
arraylist_new(&jl_image_relocs, 0);
arraylist_new(&jl_top_mods, 0);
arraylist_new(&eytzinger_image_tree, 0);
arraylist_new(&eytzinger_idxs, 0);
arraylist_push(&eytzinger_idxs, (void*)0);
Expand Down
1 change: 1 addition & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2078,6 +2078,7 @@ JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t e
JL_DLLEXPORT void jl_restore_system_image(const char *fname);
JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete, const char *pkgimage);
JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT;

JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *ci);
Expand Down
4 changes: 3 additions & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m);
extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
JL_DLLEXPORT extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages
JL_DLLEXPORT extern arraylist_t jl_image_relocs; // external linkage: sysimg/pkgimages
JL_DLLEXPORT extern arraylist_t jl_top_mods; // external linkage: sysimg/pkgimages
extern arraylist_t eytzinger_image_tree;
extern arraylist_t eytzinger_idxs;

Expand Down Expand Up @@ -1012,7 +1013,8 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT

size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;

uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
// Query if this object is perm-allocated in an image.
JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;

// the first argument to jl_idtable_rehash is used to return a value
// make sure it is rooted if it is used after the function returns
Expand Down
25 changes: 24 additions & 1 deletion src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ static arraylist_t object_worklist; // used to mimic recursion by jl_serialize_
// jl_linkage_blobs.items[2i:2i+1] correspond to build_ids[i] (0-offset indexing)
arraylist_t jl_linkage_blobs;
arraylist_t jl_image_relocs;
// Keep track of which image corresponds to which top module.
arraylist_t jl_top_mods;

// Eytzinger tree of images. Used for very fast jl_object_in_image queries
// See https://algorithmica.org/en/eytzinger
Expand Down Expand Up @@ -451,11 +453,23 @@ size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
return idx;
}

uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
{
return eyt_obj_in_img(obj);
}

// Map an object to it's "owning" top module
JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT
{
size_t idx = external_blob_index(v);
size_t lbids = n_linkage_blobs();
if (idx < lbids) {
return (jl_value_t*)jl_top_mods.items[idx];
}
// The object is runtime allocated
return (jl_value_t*)jl_nothing;
}

// hash of definitions for predefined function pointers
static htable_t fptr_to_id;
void *native_functions; // opaque jl_native_code_desc_t blob used for fetching data from LLVM
Expand Down Expand Up @@ -3550,6 +3564,15 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
arraylist_push(&jl_linkage_blobs, (void*)image_base);
arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg));
arraylist_push(&jl_image_relocs, (void*)relocs_base);
if (restored == NULL) {
arraylist_push(&jl_top_mods, (void*)jl_top_module);
} else {
size_t len = jl_array_nrows(*restored);
assert(len > 0);
jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(*restored, len-1);
assert(jl_is_module(topmod));
arraylist_push(&jl_top_mods, (void*)topmod);
}
jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t));
rebuild_image_blob_tree();

Expand Down
8 changes: 8 additions & 0 deletions test/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ FooBase_module = :FooBase4b3a94a1a081a8cb
end
using .ConflictingBindings

@testset "object_build_id" begin
@test Base.object_build_id([1]) === nothing
@test Base.object_build_id(Base) == Base.module_build_id(Base)
end

# method root provenance

rootid(m::Module) = Base.module_build_id(Base.parentmodule(m)) % UInt64
Expand Down Expand Up @@ -350,6 +355,9 @@ precompile_test_harness(false) do dir
@test objectid(Foo.a_vec_int) === Foo.oid_vec_int
@test objectid(Foo.a_mat_int) === Foo.oid_mat_int
@test Foo.oid_vec_int !== Foo.oid_mat_int
@test Base.object_build_id(Foo.a_vec_int) == Base.object_build_id(Foo.a_mat_int)
@test Base.object_build_id(Foo) == Base.module_build_id(Foo)
@test Base.object_build_id(Foo.a_vec_int) == Base.module_build_id(Foo)
end

@eval begin function ccallable_test()
Expand Down
8 changes: 8 additions & 0 deletions test/precompile_absint1.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,14 @@ precompile_test_harness() do load_path
@test isdefined(ci, :next)
@test ci.owner === nothing
@test ci.max_world == typemax(UInt)
@test Base.module_build_id(TestAbsIntPrecompile1) ==
Base.object_build_id(ci)
ci = ci.next
@test !isdefined(ci, :next)
@test ci.owner === cache_owner
@test ci.max_world == typemax(UInt)
@test Base.module_build_id(TestAbsIntPrecompile1) ==
Base.object_build_id(ci)
end
let m = only(methods(sum, (Vector{Float64},)))
found = false
Expand All @@ -57,10 +61,14 @@ precompile_test_harness() do load_path
@test isdefined(ci, :next)
@test ci.owner === cache_owner
@test ci.max_world == typemax(UInt)
@test Base.module_build_id(TestAbsIntPrecompile1) ==
Base.object_build_id(ci)
ci = ci.next
@test !isdefined(ci, :next)
@test ci.owner === nothing
@test ci.max_world == typemax(UInt)
@test Base.module_build_id(TestAbsIntPrecompile1) ==
Base.object_build_id(ci)
found = true
break
end
Expand Down
8 changes: 8 additions & 0 deletions test/precompile_absint2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,14 @@ precompile_test_harness() do load_path
@test isdefined(ci, :next)
@test ci.owner === nothing
@test ci.max_world == typemax(UInt)
@test Base.module_build_id(TestAbsIntPrecompile2) ==
Base.object_build_id(ci)
ci = ci.next
@test !isdefined(ci, :next)
@test ci.owner === cache_owner
@test ci.max_world == typemax(UInt)
@test Base.module_build_id(TestAbsIntPrecompile2) ==
Base.object_build_id(ci)
end
let m = only(methods(sum, (Vector{Float64},)))
found = false
Expand All @@ -80,10 +84,14 @@ precompile_test_harness() do load_path
@test isdefined(ci, :next)
@test ci.owner === cache_owner
@test ci.max_world == typemax(UInt)
@test Base.module_build_id(TestAbsIntPrecompile2) ==
Base.object_build_id(ci)
ci = ci.next
@test !isdefined(ci, :next)
@test ci.owner === nothing
@test ci.max_world == typemax(UInt)
@test Base.module_build_id(TestAbsIntPrecompile2) ==
Base.object_build_id(ci)
found = true
break
end
Expand Down

0 comments on commit d47cbf6

Please sign in to comment.