From 64fc754775c4615645b73964a295ac271a0416a2 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Tue, 16 May 2023 00:29:33 +0200 Subject: [PATCH 01/21] replace depot path with @depot in .ji files --- base/initdefs.jl | 41 +++++++++++++++++++++++++++++++++++++++++ base/loading.jl | 9 +++++++-- src/precompile.c | 11 ++++++----- src/staticdata_utils.c | 6 +++--- 4 files changed, 57 insertions(+), 10 deletions(-) diff --git a/base/initdefs.jl b/base/initdefs.jl index c04a97971eff2..d9fbed8665069 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -115,6 +115,47 @@ function init_depot_path() nothing end +# replace leading dirname with `@depot, @stdlib` if `path` is located withiin any of +# DEPOT_PATH/compiled or Sys.STDLIB +# return normalized path otherwise +function replace_depot_path(_path::AbstractString) + path = normpath(_path) + if startswith(path, Sys.STDLIB) + length(path) == 1+length(Sys.STDLIB) && return joinpath("@stdlib", "") + return joinpath("@stdlib", path[2+length(Sys.STDLIB):end]) + end + for depot in DEPOT_PATH + if startswith(path, joinpath(depot, "compiled")) + return joinpath("@depot", path[2+length(depot):end]) + end + end + return path +end + +# resolve leading `@depot, @stdlib` alias from `_path` to point to a valid path in any +# of DEPOT_PATH/compiled or Sys.STDLIb +# if `_path` has no leading alias, we return a normalized path +function resolve_depot_path(_path::AbstractString) + path = normpath(_path) + if startswith(path, "@stdlib") + length(path) == 1+length("@stdlib") && return joinpath(Sys.STDLIB, "") + fullpath = joinpath(Sys.STDLIB, path[2+length("@stdlib"):end]) + ispath(fullpath) && return fullpath + throw(ErrorException("Failed to resolve `$path` to a stdlib path in `$(Sys.STDLIB)`.")) + elseif startswith(path, joinpath("@depot", "compiled")) + dir = path[2+length("@depot"):end] + for depot in DEPOT_PATH + fullpath = joinpath(depot, dir) + ispath(fullpath) && return fullpath + end + io = IOBuffer() + print(io, "Failed to resolve `$path` to a valid path for any depot in `DEPOT_PATH = ", + DEPOT_PATH, "`.") + throw(ErrorException(String(take!(io)))) + end + return path +end + ## LOAD_PATH & ACTIVE_PROJECT ## # JULIA_LOAD_PATH: split on `:` (or `;` on Windows) diff --git a/base/loading.jl b/base/loading.jl index 4eca0ea0e1200..9fa7e71c27011 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1671,7 +1671,8 @@ function _include_dependency(mod::Module, _path::AbstractString) end if _track_dependencies[] @lock require_lock begin - push!(_require_dependencies, (mod, path, mtime(path))) + push!(_require_dependencies, (mod, path, mtime(path), + path[1] == '\0' ? path : replace_depot_path(path))) end end return path, prev @@ -1787,7 +1788,9 @@ function __require(into::Module, mod::Symbol) end uuidkey, env = uuidkey_env if _track_dependencies[] - push!(_require_dependencies, (into, binpack(uuidkey), 0.0)) + path = binpack(uuidkey) + push!(_require_dependencies, (into, path, 0.0, + path[1] == '\0' ? path : replace_depot_path(path))) end return _require_prelocked(uuidkey, env) finally @@ -2632,6 +2635,8 @@ function parse_cache_header(f::IO) end if depname[1] == '\0' push!(requires, modkey => binunpack(depname)) + elseif depname[1] == '@' + push!(includes, CacheHeaderIncludes(modkey, resolve_depot_path(depname), mtime, modpath)) else push!(includes, CacheHeaderIncludes(modkey, depname, mtime, modpath)) end diff --git a/src/precompile.c b/src/precompile.c index f6266e252f609..5f3cf3738a52a 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -30,8 +30,8 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { write_uint64(f, posfile); ios_seek_end(f); // Each source-text file is written as - // int32: length of abspath - // char*: abspath + // int32: length of depot alias + // char*: depot alias // uint64: length of src text // char*: src text // At the end we write int32(0) as a terminal sentinel. @@ -50,12 +50,13 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0); if (!srctp) { jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n", - jl_string_data(dep)); + depstr); continue; } - size_t slen = jl_string_len(dep); + jl_value_t *depalias = jl_fieldref(deptuple, 3); // file @depot alias + size_t slen = jl_string_len(depalias); write_int32(f, slen); - ios_write(f, depstr, slen); + ios_write(f, jl_string_data(depalias), slen); posfile = ios_pos(f); write_uint64(f, 0); // placeholder for length of this file in bytes uint64_t filelen = (uint64_t) ios_copyall(f, &srctext); diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index a4cbc3fd5ebc4..f28ed9227820e 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -713,10 +713,10 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t size_t i, l = udeps ? jl_array_len(udeps) : 0; for (i = 0; i < l; i++) { jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath - size_t slen = jl_string_len(dep); + jl_value_t *depalias = jl_fieldref(deptuple, 3); // file @depot alias + size_t slen = jl_string_len(depalias); write_int32(s, slen); - ios_write(s, jl_string_data(dep), slen); + ios_write(s, jl_string_data(depalias), slen); write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module jl_module_t *depmod_top = depmod; From 95dbbf73b7bc6208bf96cd4a6ee3aaf1074a7229 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Sat, 20 May 2023 21:48:44 +0200 Subject: [PATCH 02/21] add unit test --- test/Makefile | 22 ++++++++++++++++++++-- test/choosetests.jl | 2 +- test/relocatedepot.jl | 23 +++++++++++++++++++++++ test/testenv.jl | 2 ++ 4 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 test/relocatedepot.jl diff --git a/test/Makefile b/test/Makefile index 88dbe5b2b4ed6..970ff702a630d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -14,7 +14,7 @@ unexport JULIA_BINDIR := TESTGROUPS = unicode strings compiler TESTS = all default stdlib $(TESTGROUPS) \ $(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \ - $(filter-out runtests testdefs, \ + $(filter-out runtests testdefs relocatedepot, \ $(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/*.jl))) \ $(foreach group,$(TESTGROUPS), \ $(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/$(group)/*.jl))) @@ -34,6 +34,24 @@ $(addprefix revise-, $(TESTS)): revise-% : @cd $(SRCDIR) && \ $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*) +relocatedepot: + @rm -rf $(SRCDIR)/relocatedepot + @cd $(SRCDIR) && \ + $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@) + @mkdir $(SRCDIR)/relocatedepot + @cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot + @cd $(SRCDIR) && \ + $(call PRINT_JULIA, $(call spawn,RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@) + +revise-relocatedepot: revise-% : + @rm -rf $(SRCDIR)/relocatedepot + @cd $(SRCDIR) && \ + $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*) + @mkdir $(SRCDIR)/relocatedepot + @cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot + @cd $(SRCDIR) && \ + $(call PRINT_JULIA, $(call spawn,RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*) + embedding: @$(MAKE) -C $(SRCDIR)/$@ check $(EMBEDDING_ARGS) @@ -47,4 +65,4 @@ clean: @$(MAKE) -C embedding $@ $(EMBEDDING_ARGS) @$(MAKE) -C gcext $@ $(GCEXT_ARGS) -.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) embedding gcext clangsa clean +.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext clangsa clean diff --git a/test/choosetests.jl b/test/choosetests.jl index 2f77b11767dee..221a49b710d8b 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -24,7 +24,7 @@ const TESTNAMES = [ "some", "meta", "stacktraces", "docs", "gc", "misc", "threads", "stress", "binaryplatforms", "atexit", "enums", "cmdlineargs", "int", "interpreter", - "checked", "bitset", "floatfuncs", "precompile", + "checked", "bitset", "floatfuncs", "precompile", "relocatedepot", "boundscheck", "error", "ambiguous", "cartesian", "osutils", "channels", "iostream", "secretbuffer", "specificity", "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap", diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl new file mode 100644 index 0000000000000..f512d17764ccb --- /dev/null +++ b/test/relocatedepot.jl @@ -0,0 +1,23 @@ +using Test + + +include("testenv.jl") + + +@testset "compile and load relocated pkg" begin + pkg = Base.identify_package("DelimitedFiles") + if !test_relocated_depot + # precompile + Base.require(Main, :DelimitedFiles) + @test Base.root_module_exists(pkg) == true + else + path = Base.locate_package(pkg) + iscached = @lock Base.require_lock begin + m = Base._require_search_from_serialized(pkg, path, UInt128(0)) + m isa Module + end + @test iscached == true # can load from cache + Base.require(Main, :DelimitedFiles) + @test Base.root_module_exists(pkg) == true + end +end diff --git a/test/testenv.jl b/test/testenv.jl index 6f99291e01138..3ef1126e0e927 100644 --- a/test/testenv.jl +++ b/test/testenv.jl @@ -35,6 +35,8 @@ if !@isdefined(testenv_defined) const rr_exename = `` end + const test_relocated_depot = haskey(ENV, "RELOCATEDEPOT") + function addprocs_with_testenv(X; rr_allowed=true, kwargs...) exename = rr_allowed ? `$rr_exename $test_exename` : test_exename if X isa Integer From 8ecb71499719b6ca91c47f6c11a8aa6190d00630 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Tue, 1 Aug 2023 01:28:28 +0200 Subject: [PATCH 03/21] write src file size and hash to .ji and use that for is_stale check when loading --- base/initdefs.jl | 16 +++++++++++----- base/loading.jl | 37 +++++++++++++++++++++++++++---------- src/precompile.c | 2 +- src/staticdata.c | 2 +- src/staticdata_utils.c | 4 +++- 5 files changed, 43 insertions(+), 18 deletions(-) diff --git a/base/initdefs.jl b/base/initdefs.jl index d9fbed8665069..da43f22940d28 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -115,8 +115,8 @@ function init_depot_path() nothing end -# replace leading dirname with `@depot, @stdlib` if `path` is located withiin any of -# DEPOT_PATH/compiled or Sys.STDLIB +# replace leading dirname with `@depot, @stdlib` if `path` is located within any of +# DEPOT_PATH or Sys.STDLIB # return normalized path otherwise function replace_depot_path(_path::AbstractString) path = normpath(_path) @@ -125,8 +125,13 @@ function replace_depot_path(_path::AbstractString) return joinpath("@stdlib", path[2+length(Sys.STDLIB):end]) end for depot in DEPOT_PATH - if startswith(path, joinpath(depot, "compiled")) - return joinpath("@depot", path[2+length(depot):end]) + if startswith(path, depot) # joinpath(depot, "compiled")) + subpath = path[1+length(depot):end] + if startswith(subpath, "/") + subpath = subpath[2:end] + end + return joinpath("@depot", subpath) + # return joinpath("@depot", path[1+length(depot):end]) end end return path @@ -142,12 +147,13 @@ function resolve_depot_path(_path::AbstractString) fullpath = joinpath(Sys.STDLIB, path[2+length("@stdlib"):end]) ispath(fullpath) && return fullpath throw(ErrorException("Failed to resolve `$path` to a stdlib path in `$(Sys.STDLIB)`.")) - elseif startswith(path, joinpath("@depot", "compiled")) + elseif startswith(path, joinpath("@depot")) #, "compiled")) dir = path[2+length("@depot"):end] for depot in DEPOT_PATH fullpath = joinpath(depot, dir) ispath(fullpath) && return fullpath end + # TODO Why IOBuffer here? io = IOBuffer() print(io, "Failed to resolve `$path` to a valid path for any depot in `DEPOT_PATH = ", DEPOT_PATH, "`.") diff --git a/base/loading.jl b/base/loading.jl index 9fa7e71c27011..1675909cddf8c 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -371,7 +371,7 @@ its `PkgId`, or `nothing` if it cannot be found. If only the `name` argument is provided, it searches each environment in the stack and its named direct dependencies. -There `where` argument provides the context from where to search for the +The `where` argument provides the context from where to search for the package: in this case it first checks if the name matches the context itself, otherwise it searches all recursive dependencies (from the resolved manifest of each environment) until it locates the context `where`, and from there @@ -1660,7 +1660,7 @@ const include_callbacks = Any[] # used to optionally track dependencies when requiring a module: const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them -const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled +const _require_dependencies = Any[] # a list of (mod, path, mtime, fsize, hash, depot_alias) tuples that are the file dependencies of the module currently being precompiled const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies function _include_dependency(mod::Module, _path::AbstractString) prev = source_path(nothing) @@ -1671,8 +1671,12 @@ function _include_dependency(mod::Module, _path::AbstractString) end if _track_dependencies[] @lock require_lock begin - push!(_require_dependencies, (mod, path, mtime(path), - path[1] == '\0' ? path : replace_depot_path(path))) + fsize, hash = if isfile(path) + UInt64(filesize(path)), open(_crc32c, path, "r") + else + UInt64(0), UInt32(0) + end + push!(_require_dependencies, (mod, path, mtime(path), fsize, hash, replace_depot_path(path))) end end return path, prev @@ -1789,8 +1793,7 @@ function __require(into::Module, mod::Symbol) uuidkey, env = uuidkey_env if _track_dependencies[] path = binpack(uuidkey) - push!(_require_dependencies, (into, path, 0.0, - path[1] == '\0' ? path : replace_depot_path(path))) + push!(_require_dependencies, (into, path, 0.0, UInt64(0), UInt32(0), replace_depot_path(path))) end return _require_prelocked(uuidkey, env) finally @@ -2587,6 +2590,8 @@ struct CacheHeaderIncludes id::PkgId filename::String mtime::Float64 + fsize::UInt64 + hash::UInt32 modpath::Vector{String} # seemingly not needed in Base, but used by Revise end @@ -2616,6 +2621,10 @@ function parse_cache_header(f::IO) totbytes -= n2 mtime = read(f, Float64) totbytes -= 8 + fsize = read(f, UInt64) + totbytes -= 8 + hash = read(f, UInt32) + totbytes -= 4 n1 = read(f, Int32) totbytes -= 4 # map ids to keys @@ -2635,10 +2644,8 @@ function parse_cache_header(f::IO) end if depname[1] == '\0' push!(requires, modkey => binunpack(depname)) - elseif depname[1] == '@' - push!(includes, CacheHeaderIncludes(modkey, resolve_depot_path(depname), mtime, modpath)) else - push!(includes, CacheHeaderIncludes(modkey, depname, mtime, modpath)) + push!(includes, CacheHeaderIncludes(modkey, resolve_depot_path(depname), mtime, fsize, hash, modpath)) end end prefs = String[] @@ -3212,7 +3219,7 @@ end end end for chi in includes - f, ftime_req = chi.filename, chi.mtime + f, ftime_req, fsize_req, hash_req = chi.filename, chi.mtime, chi.fsize, chi.hash if !ispath(f) _f = fixup_stdlib_path(f) if isfile(_f) && startswith(_f, Sys.STDLIB) @@ -3222,6 +3229,16 @@ end @debug "Rejecting stale cache file $cachefile because file $f does not exist" return true end + fsize = filesize(f) + if fsize != chi.fsize + @debug "Rejecting stale cache file $cachefile (file size $fsize_req) because file $f (file size $fsize) has changed" + return true + end + hash = open(_crc32c, f, "r") + if hash != chi.hash + @debug "Rejecting stale cache file $cachefile (hash $hash_req) because file $f (hash $hash) has changed" + return true + end ftime = mtime(f) is_stale = ( ftime != ftime_req ) && ( ftime != floor(ftime_req) ) && # Issue #13606, PR #13613: compensate for Docker images rounding mtimes diff --git a/src/precompile.c b/src/precompile.c index 5f3cf3738a52a..2542a4f9f4124 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -53,7 +53,7 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { depstr); continue; } - jl_value_t *depalias = jl_fieldref(deptuple, 3); // file @depot alias + jl_value_t *depalias = jl_fieldref(deptuple, 5); // file @depot alias size_t slen = jl_string_len(depalias); write_int32(f, slen); ios_write(f, jl_string_data(depalias), slen); diff --git a/src/staticdata.c b/src/staticdata.c index c684bee4c485b..5102247dc5b19 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2691,7 +2691,7 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a write_uint8(f, jl_cache_flags()); // write description of contents (name, uuid, buildid) write_worklist_for_header(f, worklist); - // Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist + // Determine unique (module, abspath, mtime, hash, fsize) dependencies for the files defining modules in the worklist // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header. // Also write Preferences. // last word of the dependency list is the end of the data / start of the srctextpos diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index f28ed9227820e..16a46b5939764 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -713,11 +713,13 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t size_t i, l = udeps ? jl_array_len(udeps) : 0; for (i = 0; i < l; i++) { jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *depalias = jl_fieldref(deptuple, 3); // file @depot alias + jl_value_t *depalias = jl_fieldref(deptuple, 5); // file @depot alias size_t slen = jl_string_len(depalias); write_int32(s, slen); ios_write(s, jl_string_data(depalias), slen); write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime + write_uint64(s, jl_unbox_uint64(jl_fieldref(deptuple, 3))); // fsize + write_uint32(s, jl_unbox_uint32(jl_fieldref(deptuple, 4))); // hash jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module jl_module_t *depmod_top = depmod; while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top) From f65a30558d2fd72e27c01efbcdec17d5b8e945a9 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Tue, 1 Aug 2023 22:17:49 +0200 Subject: [PATCH 04/21] refactor depot path helpers --- base/initdefs.jl | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/base/initdefs.jl b/base/initdefs.jl index da43f22940d28..df560588ee3d1 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -115,23 +115,24 @@ function init_depot_path() nothing end -# replace leading dirname with `@depot, @stdlib` if `path` is located within any of -# DEPOT_PATH or Sys.STDLIB +# replace leading dirname with `@depot, @stdlib` if `path` is located within any of DEPOT_PATH or Sys.STDLIB # return normalized path otherwise function replace_depot_path(_path::AbstractString) path = normpath(_path) if startswith(path, Sys.STDLIB) - length(path) == 1+length(Sys.STDLIB) && return joinpath("@stdlib", "") - return joinpath("@stdlib", path[2+length(Sys.STDLIB):end]) + subpath = path[nextind(path,length(Sys.STDLIB)):end] + if isabspath(subpath) + subpath = subpath[nextind(subpath,1):end] + end + return joinpath("@stdlib", subpath) end for depot in DEPOT_PATH - if startswith(path, depot) # joinpath(depot, "compiled")) - subpath = path[1+length(depot):end] - if startswith(subpath, "/") - subpath = subpath[2:end] + if startswith(path, joinpath(depot, "compiled")) || startswith(path, joinpath(depot, "packages")) + subpath = path[nextind(path,length(depot)):end] + if isabspath(subpath) + subpath = subpath[nextind(subpath,1):end] end return joinpath("@depot", subpath) - # return joinpath("@depot", path[1+length(depot):end]) end end return path @@ -143,21 +144,16 @@ end function resolve_depot_path(_path::AbstractString) path = normpath(_path) if startswith(path, "@stdlib") - length(path) == 1+length("@stdlib") && return joinpath(Sys.STDLIB, "") - fullpath = joinpath(Sys.STDLIB, path[2+length("@stdlib"):end]) + fullpath = joinpath(Sys.STDLIB, path[nextind(path,length("@stdlib")+1):end]) ispath(fullpath) && return fullpath - throw(ErrorException("Failed to resolve `$path` to a stdlib path in `$(Sys.STDLIB)`.")) - elseif startswith(path, joinpath("@depot")) #, "compiled")) - dir = path[2+length("@depot"):end] + throw(ErrorException("Failed to resolve `$path` ($fullpath) to a stdlib path in `$(Sys.STDLIB)`.")) + elseif startswith(path, joinpath("@depot")) + dir = path[nextind(path,length("@depot")+1):end] for depot in DEPOT_PATH fullpath = joinpath(depot, dir) ispath(fullpath) && return fullpath end - # TODO Why IOBuffer here? - io = IOBuffer() - print(io, "Failed to resolve `$path` to a valid path for any depot in `DEPOT_PATH = ", - DEPOT_PATH, "`.") - throw(ErrorException(String(take!(io)))) + throw(ErrorException("Failed to resolve `$path` to a valid path for any depot in `DEPOT_PATH`")) end return path end From 026924403aa584bc1d5d9956ec4203ccf900de25 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Wed, 2 Aug 2023 09:13:28 +0200 Subject: [PATCH 05/21] disable mtime check for stale_cachefile --- base/loading.jl | 22 +++++++++++----------- test/precompile.jl | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 1675909cddf8c..bd717c648234e 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -3239,17 +3239,17 @@ end @debug "Rejecting stale cache file $cachefile (hash $hash_req) because file $f (hash $hash) has changed" return true end - ftime = mtime(f) - is_stale = ( ftime != ftime_req ) && - ( ftime != floor(ftime_req) ) && # Issue #13606, PR #13613: compensate for Docker images rounding mtimes - ( ftime != ceil(ftime_req) ) && # PR: #47433 Compensate for CirceCI's truncating of timestamps in its caching - ( ftime != trunc(ftime_req, digits=6) ) && # Issue #20837, PR #20840: compensate for GlusterFS truncating mtimes to microseconds - ( ftime != 1.0 ) && # PR #43090: provide compatibility with Nix mtime. - !( 0 < (ftime_req - ftime) < 1e-6 ) # PR #45552: Compensate for Windows tar giving mtimes that may be incorrect by up to one microsecond - if is_stale - @debug "Rejecting stale cache file $cachefile (mtime $ftime_req) because file $f (mtime $ftime) has changed" - return true - end + # ftime = mtime(f) + # is_stale = ( ftime != ftime_req ) && + # ( ftime != floor(ftime_req) ) && # Issue #13606, PR #13613: compensate for Docker images rounding mtimes + # ( ftime != ceil(ftime_req) ) && # PR: #47433 Compensate for CirceCI's truncating of timestamps in its caching + # ( ftime != trunc(ftime_req, digits=6) ) && # Issue #20837, PR #20840: compensate for GlusterFS truncating mtimes to microseconds + # ( ftime != 1.0 ) && # PR #43090: provide compatibility with Nix mtime. + # !( 0 < (ftime_req - ftime) < 1e-6 ) # PR #45552: Compensate for Windows tar giving mtimes that may be incorrect by up to one microsecond + # if is_stale + # @debug "Rejecting stale cache file $cachefile (mtime $ftime_req) because file $f (mtime $ftime) has changed" + # return true + # end end end diff --git a/test/precompile.jl b/test/precompile.jl index 671535c360625..fe46984c8614c 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -584,12 +584,12 @@ precompile_test_harness(false) do dir fb_uuid = Base.module_build_id(FooBar) sleep(2); touch(FooBar_file) insert!(DEPOT_PATH, 1, dir2) - @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true + @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc @eval using FooBar1 @test !isfile(joinpath(cachedir2, "FooBar.ji")) @test !isfile(joinpath(cachedir, "FooBar1.ji")) @test isfile(joinpath(cachedir2, "FooBar1.ji")) - @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true + @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc @test Base.stale_cachefile(FooBar1_file, joinpath(cachedir2, "FooBar1.ji")) isa Tsc @test fb_uuid == Base.module_build_id(FooBar) fb_uuid1 = Base.module_build_id(FooBar1) From 27245419d7f3896ad8d27f8c64a89dcc47013a7b Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Mon, 7 Aug 2023 21:30:00 +0200 Subject: [PATCH 06/21] fix depot helpers just use error() --- base/initdefs.jl | 14 +++++--------- base/loading.jl | 12 ++++++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/base/initdefs.jl b/base/initdefs.jl index df560588ee3d1..f6c133e623e55 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -116,9 +116,7 @@ function init_depot_path() end # replace leading dirname with `@depot, @stdlib` if `path` is located within any of DEPOT_PATH or Sys.STDLIB -# return normalized path otherwise -function replace_depot_path(_path::AbstractString) - path = normpath(_path) +function replace_depot_path(path::AbstractString) if startswith(path, Sys.STDLIB) subpath = path[nextind(path,length(Sys.STDLIB)):end] if isabspath(subpath) @@ -138,22 +136,20 @@ function replace_depot_path(_path::AbstractString) return path end -# resolve leading `@depot, @stdlib` alias from `_path` to point to a valid path in any +# resolve leading `@depot, @stdlib` alias from `path` to point to a valid path in any # of DEPOT_PATH/compiled or Sys.STDLIb -# if `_path` has no leading alias, we return a normalized path -function resolve_depot_path(_path::AbstractString) - path = normpath(_path) +function resolve_depot_path(path::AbstractString) if startswith(path, "@stdlib") fullpath = joinpath(Sys.STDLIB, path[nextind(path,length("@stdlib")+1):end]) ispath(fullpath) && return fullpath - throw(ErrorException("Failed to resolve `$path` ($fullpath) to a stdlib path in `$(Sys.STDLIB)`.")) + error("Failed to resolve `$path` ($fullpath) to a stdlib path in `$(Sys.STDLIB)`") elseif startswith(path, joinpath("@depot")) dir = path[nextind(path,length("@depot")+1):end] for depot in DEPOT_PATH fullpath = joinpath(depot, dir) ispath(fullpath) && return fullpath end - throw(ErrorException("Failed to resolve `$path` to a valid path for any depot in `DEPOT_PATH`")) + error("Failed to resolve `$path` to a valid path for any depot in `DEPOT_PATH`") end return path end diff --git a/base/loading.jl b/base/loading.jl index bd717c648234e..6eda9e5254d80 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1419,6 +1419,7 @@ function isprecompiled(pkg::PkgId; end try # update timestamp of precompilation file so that it is the first to be tried by code loading + # TODO Still relevant? touch(path_to_try) catch ex # file might be read-only and then we fail to update timestamp, which is fine @@ -2606,7 +2607,8 @@ function parse_cache_header(f::IO) build_id = read(f, UInt64) # build UUID (mostly just a timestamp) push!(modules, PkgId(uuid, sym) => build_id) end - totbytes = read(f, Int64) # total bytes for file dependencies + preferences + # write_dependency_list writes UInt64 here + totbytes = Int64(read(f, UInt64)) # total bytes for file dependencies + preferences # read the list of requirements # and split the list into include and requires statements includes = CacheHeaderIncludes[] @@ -2670,6 +2672,7 @@ function parse_cache_header(f::IO) n == 0 && break sym = String(read(f, n)) # module name uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID + # why not build_id = UInt128(read(f, UInt64), read(f, UInt64)) build_id = UInt128(read(f, UInt64)) << 64 build_id |= read(f, UInt64) push!(required_modules, PkgId(uuid, sym) => build_id) @@ -3153,7 +3156,7 @@ end if build_id != UInt128(0) id_build = (UInt128(checksum) << 64) | id.second if id_build != build_id - @debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it is does not provide desired build_id ($((UUID(build_id))))" + @debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it does not provide desired build_id ($((UUID(build_id))))" return true end end @@ -3213,8 +3216,9 @@ end end for (modkey, req_modkey) in requires # verify that `require(modkey, name(req_modkey))` ==> `req_modkey` - if identify_package(modkey, req_modkey.name) != req_modkey - @debug "Rejecting cache file $cachefile because uuid mapping for $modkey => $req_modkey has changed" + pkg = identify_package(modkey, req_modkey.name) + if pkg != req_modkey + @debug "Rejecting cache file $cachefile because uuid mapping for $modkey => $req_modkey has changed, expected $modkey => $pkg" return true end end From 259efcd52b778c3bc9c27ec68bbf36717a52b92b Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Tue, 8 Aug 2023 21:10:53 +0200 Subject: [PATCH 07/21] evict mtime from precompile files Update base/loading.jl undo deletion --- base/loading.jl | 36 ++++++++++-------------------------- src/precompile.c | 2 +- src/staticdata.c | 2 +- src/staticdata_utils.c | 12 +++--------- test/precompile.jl | 2 +- 5 files changed, 16 insertions(+), 38 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 6eda9e5254d80..cc438a7e15def 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1419,7 +1419,6 @@ function isprecompiled(pkg::PkgId; end try # update timestamp of precompilation file so that it is the first to be tried by code loading - # TODO Still relevant? touch(path_to_try) catch ex # file might be read-only and then we fail to update timestamp, which is fine @@ -1661,7 +1660,7 @@ const include_callbacks = Any[] # used to optionally track dependencies when requiring a module: const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them -const _require_dependencies = Any[] # a list of (mod, path, mtime, fsize, hash, depot_alias) tuples that are the file dependencies of the module currently being precompiled +const _require_dependencies = Any[] # a list of (mod, path, fsize, hash, depot_alias) tuples that are the file dependencies of the module currently being precompiled const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies function _include_dependency(mod::Module, _path::AbstractString) prev = source_path(nothing) @@ -1677,7 +1676,7 @@ function _include_dependency(mod::Module, _path::AbstractString) else UInt64(0), UInt32(0) end - push!(_require_dependencies, (mod, path, mtime(path), fsize, hash, replace_depot_path(path))) + push!(_require_dependencies, (mod, path, fsize, hash, replace_depot_path(path))) end end return path, prev @@ -1794,7 +1793,7 @@ function __require(into::Module, mod::Symbol) uuidkey, env = uuidkey_env if _track_dependencies[] path = binpack(uuidkey) - push!(_require_dependencies, (into, path, 0.0, UInt64(0), UInt32(0), replace_depot_path(path))) + push!(_require_dependencies, (into, path, UInt64(0), UInt32(0), replace_depot_path(path))) end return _require_prelocked(uuidkey, env) finally @@ -2590,7 +2589,6 @@ end struct CacheHeaderIncludes id::PkgId filename::String - mtime::Float64 fsize::UInt64 hash::UInt32 modpath::Vector{String} # seemingly not needed in Base, but used by Revise @@ -2621,8 +2619,6 @@ function parse_cache_header(f::IO) end depname = String(read(f, n2)) totbytes -= n2 - mtime = read(f, Float64) - totbytes -= 8 fsize = read(f, UInt64) totbytes -= 8 hash = read(f, UInt32) @@ -2647,7 +2643,7 @@ function parse_cache_header(f::IO) if depname[1] == '\0' push!(requires, modkey => binunpack(depname)) else - push!(includes, CacheHeaderIncludes(modkey, resolve_depot_path(depname), mtime, fsize, hash, modpath)) + push!(includes, CacheHeaderIncludes(modkey, resolve_depot_path(depname), fsize, hash, modpath)) end end prefs = String[] @@ -2717,7 +2713,7 @@ end function cache_dependencies(f::IO) _, (includes, _), modules, _... = parse_cache_header(f) - return modules, map(chi -> (chi.filename, chi.mtime), includes) # return just filename and mtime + return modules, map(chi -> chi.filename, includes) # return just filename end function cache_dependencies(cachefile::String) @@ -3194,13 +3190,13 @@ end # check if this file is going to provide one of our concrete dependencies # or if it provides a version that conflicts with our concrete dependencies # or neither - skip_timecheck = false + skip_hashcheck = false for (req_key, req_build_id) in _concrete_dependencies build_id = get(modules, req_key, UInt64(0)) if build_id !== UInt64(0) build_id |= UInt128(checksum) << 64 if build_id === req_build_id - skip_timecheck = true + skip_hashcheck = true break end @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))" @@ -3208,8 +3204,8 @@ end end end - # now check if this file is fresh relative to its source files - if !skip_timecheck + # now check if this file's content hash has changed relative to its source files + if !skip_hashcheck if !samefile(includes[1].filename, modpath) && !samefile(fixup_stdlib_path(includes[1].filename), modpath) @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath" return true # cache file was compiled from a different path @@ -3223,11 +3219,10 @@ end end end for chi in includes - f, ftime_req, fsize_req, hash_req = chi.filename, chi.mtime, chi.fsize, chi.hash + f, fsize_req, hash_req = chi.filename, chi.fsize, chi.hash if !ispath(f) _f = fixup_stdlib_path(f) if isfile(_f) && startswith(_f, Sys.STDLIB) - # mtime is changed by extraction continue end @debug "Rejecting stale cache file $cachefile because file $f does not exist" @@ -3243,17 +3238,6 @@ end @debug "Rejecting stale cache file $cachefile (hash $hash_req) because file $f (hash $hash) has changed" return true end - # ftime = mtime(f) - # is_stale = ( ftime != ftime_req ) && - # ( ftime != floor(ftime_req) ) && # Issue #13606, PR #13613: compensate for Docker images rounding mtimes - # ( ftime != ceil(ftime_req) ) && # PR: #47433 Compensate for CirceCI's truncating of timestamps in its caching - # ( ftime != trunc(ftime_req, digits=6) ) && # Issue #20837, PR #20840: compensate for GlusterFS truncating mtimes to microseconds - # ( ftime != 1.0 ) && # PR #43090: provide compatibility with Nix mtime. - # !( 0 < (ftime_req - ftime) < 1e-6 ) # PR #45552: Compensate for Windows tar giving mtimes that may be incorrect by up to one microsecond - # if is_stale - # @debug "Rejecting stale cache file $cachefile (mtime $ftime_req) because file $f (mtime $ftime) has changed" - # return true - # end end end diff --git a/src/precompile.c b/src/precompile.c index 2542a4f9f4124..da28a9be3cf6c 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -53,7 +53,7 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { depstr); continue; } - jl_value_t *depalias = jl_fieldref(deptuple, 5); // file @depot alias + jl_value_t *depalias = jl_fieldref(deptuple, 4); // file @depot alias size_t slen = jl_string_len(depalias); write_int32(f, slen); ios_write(f, jl_string_data(depalias), slen); diff --git a/src/staticdata.c b/src/staticdata.c index 5102247dc5b19..a2bc4b317a2db 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2691,7 +2691,7 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a write_uint8(f, jl_cache_flags()); // write description of contents (name, uuid, buildid) write_worklist_for_header(f, worklist); - // Determine unique (module, abspath, mtime, hash, fsize) dependencies for the files defining modules in the worklist + // Determine unique (module, abspath, hash, fsize, depotalias) dependencies for the files defining modules in the worklist // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header. // Also write Preferences. // last word of the dependency list is the end of the data / start of the srctextpos diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 16a46b5939764..602294837f8f7 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -1,11 +1,6 @@ // inverse of backedges graph (caller=>callees hash) jl_array_t *edges_map JL_GLOBALLY_ROOTED = NULL; // rooted for the duration of our uses of this -static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT -{ - write_uint64(s, *((uint64_t*)&x)); -} - // Decide if `t` must be new, because it points to something new. // If it is new, the object (in particular, the super field) might not be entirely // valid for the cache, so we want to finish transforming it before attempting @@ -713,13 +708,12 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t size_t i, l = udeps ? jl_array_len(udeps) : 0; for (i = 0; i < l; i++) { jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *depalias = jl_fieldref(deptuple, 5); // file @depot alias + jl_value_t *depalias = jl_fieldref(deptuple, 4); // file @depot alias size_t slen = jl_string_len(depalias); write_int32(s, slen); ios_write(s, jl_string_data(depalias), slen); - write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime - write_uint64(s, jl_unbox_uint64(jl_fieldref(deptuple, 3))); // fsize - write_uint32(s, jl_unbox_uint32(jl_fieldref(deptuple, 4))); // hash + write_uint64(s, jl_unbox_uint64(jl_fieldref(deptuple, 2))); // fsize + write_uint32(s, jl_unbox_uint32(jl_fieldref(deptuple, 3))); // hash jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module jl_module_t *depmod_top = depmod; while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top) diff --git a/test/precompile.jl b/test/precompile.jl index fe46984c8614c..7f892ab5545ae 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -382,7 +382,7 @@ precompile_test_harness(false) do dir @test string(Base.Docs.doc(Foo.Bar)) == "Bar module\n" modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile) - discard_module = mod_fl_mt -> (mod_fl_mt.filename, mod_fl_mt.mtime) + discard_module = mod_fl_mt -> mod_fl_mt.filename @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) % UInt64 ] @test map(x -> x.filename, deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ] @test requires == [ Base.PkgId(Foo) => Base.PkgId(string(FooBase_module)), From 081f467d4593dc4b80aa7c778fea337914a90478 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Thu, 10 Aug 2023 23:11:48 +0200 Subject: [PATCH 08/21] only forward depot aliases to precompilers --- base/loading.jl | 6 +++--- base/sysimg.jl | 6 ++++-- src/precompile.c | 25 +++++++++++++++++++++---- src/staticdata.c | 2 +- src/staticdata_utils.c | 2 +- 5 files changed, 30 insertions(+), 11 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index cc438a7e15def..caf405c498594 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1660,7 +1660,7 @@ const include_callbacks = Any[] # used to optionally track dependencies when requiring a module: const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them -const _require_dependencies = Any[] # a list of (mod, path, fsize, hash, depot_alias) tuples that are the file dependencies of the module currently being precompiled +const _require_dependencies = Any[] # a list of (mod, depot_alias, fsize, hash) tuples that are the file dependencies of the module currently being precompiled const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies function _include_dependency(mod::Module, _path::AbstractString) prev = source_path(nothing) @@ -1676,7 +1676,7 @@ function _include_dependency(mod::Module, _path::AbstractString) else UInt64(0), UInt32(0) end - push!(_require_dependencies, (mod, path, fsize, hash, replace_depot_path(path))) + push!(_require_dependencies, (mod, replace_depot_path(path), fsize, hash)) end end return path, prev @@ -1793,7 +1793,7 @@ function __require(into::Module, mod::Symbol) uuidkey, env = uuidkey_env if _track_dependencies[] path = binpack(uuidkey) - push!(_require_dependencies, (into, path, UInt64(0), UInt32(0), replace_depot_path(path))) + push!(_require_dependencies, (into, replace_depot_path(path), UInt64(0), UInt32(0))) end return _require_prelocked(uuidkey, env) finally diff --git a/base/sysimg.jl b/base/sysimg.jl index bf8de0bc3f75e..51dd19390e479 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -59,8 +59,10 @@ let print_time(stdlib, tt) end for dep in Base._require_dependencies - dep[3] == 0.0 && continue - push!(Base._included_files, dep[1:2]) + mod, depotpath, fsize = dep[1], dep[2], dep[3] + fsize == 0 && continue + path = Base.resolve_depot_path(depotpath) + push!(Base._included_files, (mod, path)) end empty!(Base._require_dependencies) Base._track_dependencies[] = false diff --git a/src/precompile.c b/src/precompile.c index da28a9be3cf6c..6147cb464b666 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -36,15 +36,32 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { // char*: src text // At the end we write int32(0) as a terminal sentinel. size_t len = jl_array_len(udeps); + static jl_value_t *resolve_depot_func = NULL; + if (!resolve_depot_func) + resolve_depot_func = jl_get_global(jl_base_module, jl_symbol("resolve_depot_path")); ios_t srctext; + jl_value_t *deptuple = NULL; + JL_GC_PUSH2(&deptuple, &udeps); for (size_t i = 0; i < len; i++) { - jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); + deptuple = jl_array_ptr_ref(udeps, i); jl_value_t *depmod = jl_fieldref(deptuple, 0); // module // Dependencies declared with `include_dependency` are excluded // because these may not be Julia code (and could be huge) if (depmod != (jl_value_t*)jl_main_module) { - jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath - const char *depstr = jl_string_data(dep); + jl_value_t *depalias = jl_fieldref(deptuple, 1); // file @depot alias + + jl_value_t **resolve_depot_args; + JL_GC_PUSHARGS(resolve_depot_args, 2); + resolve_depot_args[0] = resolve_depot_func; + resolve_depot_args[1] = depalias; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_atomic_load_acquire(&jl_world_counter); + jl_value_t *abspath = (jl_value_t*)jl_apply(resolve_depot_args, 2); + ct->world_age = last_age; + JL_GC_POP(); + + const char *depstr = jl_string_data(abspath); if (!depstr[0]) continue; ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0); @@ -53,7 +70,6 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { depstr); continue; } - jl_value_t *depalias = jl_fieldref(deptuple, 4); // file @depot alias size_t slen = jl_string_len(depalias); write_int32(f, slen); ios_write(f, jl_string_data(depalias), slen); @@ -66,6 +82,7 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { ios_seek_end(f); } } + JL_GC_POP(); } write_int32(f, 0); // mark the end of the source text } diff --git a/src/staticdata.c b/src/staticdata.c index a2bc4b317a2db..805199bc096c0 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2691,7 +2691,7 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a write_uint8(f, jl_cache_flags()); // write description of contents (name, uuid, buildid) write_worklist_for_header(f, worklist); - // Determine unique (module, abspath, hash, fsize, depotalias) dependencies for the files defining modules in the worklist + // Determine unique (module, depotalias, hash, fsize) dependencies for the files defining modules in the worklist // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header. // Also write Preferences. // last word of the dependency list is the end of the data / start of the srctextpos diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 602294837f8f7..413711a9ebf69 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -708,7 +708,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t size_t i, l = udeps ? jl_array_len(udeps) : 0; for (i = 0; i < l; i++) { jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *depalias = jl_fieldref(deptuple, 4); // file @depot alias + jl_value_t *depalias = jl_fieldref(deptuple, 1); // file @depot alias size_t slen = jl_string_len(depalias); write_int32(s, slen); ios_write(s, jl_string_data(depalias), slen); From 80b1204533d81fe7df674a0986ac929de22e805d Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Sat, 12 Aug 2023 22:41:54 +0200 Subject: [PATCH 09/21] rework depot path logic so that relocation only works when caches and pkg sources are located in the same depot fixup comment --- base/initdefs.jl | 54 +++++++++++++++++++----------------------- base/loading.jl | 49 +++++++++++++++++++++----------------- base/sysimg.jl | 3 +-- src/precompile.c | 41 ++++++++++++++++---------------- src/staticdata.c | 5 ++-- src/staticdata_utils.c | 19 ++++++++++++++- test/precompile.jl | 2 +- 7 files changed, 96 insertions(+), 77 deletions(-) diff --git a/base/initdefs.jl b/base/initdefs.jl index f6c133e623e55..f541a891a0a18 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -115,45 +115,41 @@ function init_depot_path() nothing end -# replace leading dirname with `@depot, @stdlib` if `path` is located within any of DEPOT_PATH or Sys.STDLIB -function replace_depot_path(path::AbstractString) - if startswith(path, Sys.STDLIB) - subpath = path[nextind(path,length(Sys.STDLIB)):end] - if isabspath(subpath) - subpath = subpath[nextind(subpath,1):end] - end - return joinpath("@stdlib", subpath) - end +# replace leading dirname of `path` with `@depot` if `path` and `cachefile` are located +# on the same depot path; otherwise just return `path` +function replace_depot_path(path::AbstractString, cachefile::AbstractString) for depot in DEPOT_PATH - if startswith(path, joinpath(depot, "compiled")) || startswith(path, joinpath(depot, "packages")) - subpath = path[nextind(path,length(depot)):end] - if isabspath(subpath) - subpath = subpath[nextind(subpath,1):end] - end - return joinpath("@depot", subpath) + if startswith(path, depot) && startswith(cachefile, depot) + depot_w_sep = joinpath(depot, "") # append the path separator + alias = joinpath("@depot", path[1+length(depot_w_sep):end]) + return alias end end + # TODO Emit debug message when cachefile is not in a depot; can this even happen? + # TODO Emit debug message when path is not in the same depot as cachefile the cache file. return path end -# resolve leading `@depot, @stdlib` alias from `path` to point to a valid path in any -# of DEPOT_PATH/compiled or Sys.STDLIb -function resolve_depot_path(path::AbstractString) - if startswith(path, "@stdlib") - fullpath = joinpath(Sys.STDLIB, path[nextind(path,length("@stdlib")+1):end]) - ispath(fullpath) && return fullpath - error("Failed to resolve `$path` ($fullpath) to a stdlib path in `$(Sys.STDLIB)`") - elseif startswith(path, joinpath("@depot")) - dir = path[nextind(path,length("@depot")+1):end] - for depot in DEPOT_PATH - fullpath = joinpath(depot, dir) - ispath(fullpath) && return fullpath +# resolve a leading `@depot` tag from `depotalias` to point to a valid path located +# in the same depot as the `cachefile` it was loaded from +# TODO IDK if returning an unresolved @depot on failure is ok? atm it is need to not break precompile tests +function resolve_depot_path(depotalias::AbstractString, cachefile::AbstractString) + !startswith(depotalias, "@depot") && return depotalias + for depot in DEPOT_PATH + if startswith(cachefile, depot) + tag = joinpath("@depot", "") # append the path separator + path = joinpath(depot, depotalias[1+length(tag):end]) + # if !ispath(path) + # error("Failed to resolve `$depotalias` to a valid path located in the same depot (`$depot`) as the cachefile `$cachefile`") + # end + return path end - error("Failed to resolve `$path` to a valid path for any depot in `DEPOT_PATH`") end - return path + return depotalias + # error("Failed to resolve `$depotalias` to a valid path, because the cachefile `$cachefile` is not on located on any `DEPOT_PATH`") end + ## LOAD_PATH & ACTIVE_PROJECT ## # JULIA_LOAD_PATH: split on `:` (or `;` on Windows) diff --git a/base/loading.jl b/base/loading.jl index caf405c498594..326a0aec3f027 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1503,7 +1503,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union io = open(path, "r") try iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.") - _, _, depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io) + _, _, depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io, path) pkgimage = !isempty(clone_targets) if pkgimage ocachepath !== nothing || return ArgumentError("Expected ocachepath to be provided") @@ -1660,7 +1660,7 @@ const include_callbacks = Any[] # used to optionally track dependencies when requiring a module: const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them -const _require_dependencies = Any[] # a list of (mod, depot_alias, fsize, hash) tuples that are the file dependencies of the module currently being precompiled +const _require_dependencies = Any[] # a list of (mod, abspath, fsize, hash) tuples that are the file dependencies of the module currently being precompiled const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies function _include_dependency(mod::Module, _path::AbstractString) prev = source_path(nothing) @@ -1676,7 +1676,7 @@ function _include_dependency(mod::Module, _path::AbstractString) else UInt64(0), UInt32(0) end - push!(_require_dependencies, (mod, replace_depot_path(path), fsize, hash)) + push!(_require_dependencies, (mod, path, fsize, hash)) end end return path, prev @@ -1688,6 +1688,7 @@ end In a module, declare that the file, directory, or symbolic link specified by `path` (relative or absolute) is a dependency for precompilation; that is, the module will need to be recompiled if the modification time of `path` changes. +TODO: Update docs because we now use file size and content hash This is only needed if your module depends on a path that is not used via [`include`](@ref). It has no effect outside of compilation. @@ -1793,7 +1794,7 @@ function __require(into::Module, mod::Symbol) uuidkey, env = uuidkey_env if _track_dependencies[] path = binpack(uuidkey) - push!(_require_dependencies, (into, replace_depot_path(path), UInt64(0), UInt32(0))) + push!(_require_dependencies, (into, path, UInt64(0), UInt32(0))) end return _require_prelocked(uuidkey, env) finally @@ -2594,7 +2595,7 @@ struct CacheHeaderIncludes modpath::Vector{String} # seemingly not needed in Base, but used by Revise end -function parse_cache_header(f::IO) +function parse_cache_header(f::IO, cachefile::AbstractString) flags = read(f, UInt8) modules = Vector{Pair{PkgId, UInt64}}() while true @@ -2643,7 +2644,8 @@ function parse_cache_header(f::IO) if depname[1] == '\0' push!(requires, modkey => binunpack(depname)) else - push!(includes, CacheHeaderIncludes(modkey, resolve_depot_path(depname), fsize, hash, modpath)) + push!(includes, CacheHeaderIncludes(modkey, resolve_depot_path(depname, cachefile), + fsize, hash, modpath)) end end prefs = String[] @@ -2683,10 +2685,10 @@ function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) io = open(cachefile, "r") try iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) - ret = parse_cache_header(io) + ret = parse_cache_header(io, cachefile) srcfiles_only || return ret _, (includes, _), _, srctextpos, _... = ret - srcfiles = srctext_files(io, srctextpos) + srcfiles = srctext_files(io, cachefile, srctextpos) delidx = Int[] for (i, chi) in enumerate(includes) chi.filename ∈ srcfiles || push!(delidx, i) @@ -2698,21 +2700,21 @@ function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) end end -preferences_hash(f::IO) = parse_cache_header(f)[6] +preferences_hash(io::IO, cachefile) = parse_cache_header(io, cachefile)[6] function preferences_hash(cachefile::String) io = open(cachefile, "r") try if iszero(isvalid_cache_header(io)) throw(ArgumentError("Invalid header in cache file $cachefile.")) end - return preferences_hash(io) + return preferences_hash(io, cachefile) finally close(io) end end -function cache_dependencies(f::IO) - _, (includes, _), modules, _... = parse_cache_header(f) +function cache_dependencies(io::IO, cachefile) + _, (includes, _), modules, _... = parse_cache_header(io, cachefile) return modules, map(chi -> chi.filename, includes) # return just filename end @@ -2720,24 +2722,26 @@ function cache_dependencies(cachefile::String) io = open(cachefile, "r") try iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) - return cache_dependencies(io) + return cache_dependencies(io, cachefile) finally close(io) end end -function read_dependency_src(io::IO, filename::AbstractString) - srctextpos = parse_cache_header(io)[4] +function read_dependency_src(io::IO, cachefile::AbstractString, filename::AbstractString) + srctextpos = parse_cache_header(io, cachefile)[4] srctextpos == 0 && error("no source-text stored in cache file") seek(io, srctextpos) - return _read_dependency_src(io, filename) + return _read_dependency_src(io, cachefile, filename) end -function _read_dependency_src(io::IO, filename::AbstractString) +function _read_dependency_src(io::IO, cachefile::AbstractString, filename::AbstractString) + msg = "" while !eof(io) filenamelen = read(io, Int32) filenamelen == 0 && break - fn = String(read(io, filenamelen)) + depotfn = String(read(io, filenamelen)) + fn = resolve_depot_path(depotfn, cachefile) len = read(io, UInt64) if fn == filename return String(read(io, len)) @@ -2751,20 +2755,21 @@ function read_dependency_src(cachefile::String, filename::AbstractString) io = open(cachefile, "r") try iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) - return read_dependency_src(io, filename) + return read_dependency_src(io, cachefile, filename) finally close(io) end end -function srctext_files(f::IO, srctextpos::Int64) +function srctext_files(f::IO, cachefile::AbstractString, srctextpos::Int64) files = Set{String}() srctextpos == 0 && return files seek(f, srctextpos) while !eof(f) filenamelen = read(f, Int32) filenamelen == 0 && break - fn = String(read(f, filenamelen)) + depotfn = String(read(f, filenamelen)) + fn = resolve_depot_path(depotfn, cachefile) len = read(f, UInt64) push!(files, fn) seek(f, position(f) + len) @@ -3109,7 +3114,7 @@ end @debug "Rejecting cache file $cachefile due to it containing an invalid cache header" return true # invalid cache file end - modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io) + modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io, cachefile) if isempty(modules) return true # ignore empty file end diff --git a/base/sysimg.jl b/base/sysimg.jl index 51dd19390e479..a1309f48a4ec2 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -59,9 +59,8 @@ let print_time(stdlib, tt) end for dep in Base._require_dependencies - mod, depotpath, fsize = dep[1], dep[2], dep[3] + mod, path, fsize = dep[1], dep[2], dep[3] fsize == 0 && continue - path = Base.resolve_depot_path(depotpath) push!(Base._included_files, (mod, path)) end empty!(Base._require_dependencies) diff --git a/src/precompile.c b/src/precompile.c index 6147cb464b666..01b30c497b43b 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -30,15 +30,15 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { write_uint64(f, posfile); ios_seek_end(f); // Each source-text file is written as - // int32: length of depot alias - // char*: depot alias + // int32: length of abspath + // char*: abspath // uint64: length of src text // char*: src text // At the end we write int32(0) as a terminal sentinel. size_t len = jl_array_len(udeps); - static jl_value_t *resolve_depot_func = NULL; - if (!resolve_depot_func) - resolve_depot_func = jl_get_global(jl_base_module, jl_symbol("resolve_depot_path")); + static jl_value_t *replace_depot_func = NULL; + if (!replace_depot_func) + replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path")); ios_t srctext; jl_value_t *deptuple = NULL; JL_GC_PUSH2(&deptuple, &udeps); @@ -48,28 +48,29 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { // Dependencies declared with `include_dependency` are excluded // because these may not be Julia code (and could be huge) if (depmod != (jl_value_t*)jl_main_module) { - jl_value_t *depalias = jl_fieldref(deptuple, 1); // file @depot alias + jl_value_t *abspath = jl_fieldref(deptuple, 1); // file abspath + const char *abspathstr = jl_string_data(abspath); + if (!abspathstr[0]) + continue; + ios_t *srctp = ios_file(&srctext, abspathstr, 1, 0, 0, 0); + if (!srctp) { + jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n", + abspathstr); + continue; + } - jl_value_t **resolve_depot_args; - JL_GC_PUSHARGS(resolve_depot_args, 2); - resolve_depot_args[0] = resolve_depot_func; - resolve_depot_args[1] = depalias; + jl_value_t **replace_depot_args; + JL_GC_PUSHARGS(replace_depot_args, 3); + replace_depot_args[0] = replace_depot_func; + replace_depot_args[1] = abspath; + replace_depot_args[2] = jl_cstr_to_string(jl_options.outputji); jl_task_t *ct = jl_current_task; size_t last_age = ct->world_age; ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - jl_value_t *abspath = (jl_value_t*)jl_apply(resolve_depot_args, 2); + jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 3); ct->world_age = last_age; JL_GC_POP(); - const char *depstr = jl_string_data(abspath); - if (!depstr[0]) - continue; - ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0); - if (!srctp) { - jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n", - depstr); - continue; - } size_t slen = jl_string_len(depalias); write_int32(f, slen); ios_write(f, jl_string_data(depalias), slen); diff --git a/src/staticdata.c b/src/staticdata.c index 805199bc096c0..a2b8af91e7067 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2691,8 +2691,9 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a write_uint8(f, jl_cache_flags()); // write description of contents (name, uuid, buildid) write_worklist_for_header(f, worklist); - // Determine unique (module, depotalias, hash, fsize) dependencies for the files defining modules in the worklist - // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header. + // Determine unique (module, abspath, fsize, hash) dependencies for the files defining modules in the worklist + // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header + // (abspath will be converted to a relocateable @depot path before writing, Base.replace_depot_path). // Also write Preferences. // last word of the dependency list is the end of the data / start of the srctextpos *srctextpos = write_dependency_list(f, worklist, udeps); // srctextpos: position of srctext entry in header index (update later) diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 413711a9ebf69..a3c6042790a78 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -701,6 +701,10 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL); ct->world_age = last_age; + static jl_value_t *replace_depot_func = NULL; + if (!replace_depot_func) + replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path")); + // write a placeholder for total size so that we can quickly seek past all of the // dependencies if we don't need them initial_pos = ios_pos(s); @@ -708,7 +712,20 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t size_t i, l = udeps ? jl_array_len(udeps) : 0; for (i = 0; i < l; i++) { jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *depalias = jl_fieldref(deptuple, 1); // file @depot alias + jl_value_t *abspath = jl_fieldref(deptuple, 1); + + jl_value_t **replace_depot_args; + JL_GC_PUSHARGS(replace_depot_args, 3); + replace_depot_args[0] = replace_depot_func; + replace_depot_args[1] = abspath; + replace_depot_args[2] = jl_cstr_to_string(jl_options.outputji); + ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_atomic_load_acquire(&jl_world_counter); + jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 3); + ct->world_age = last_age; + JL_GC_POP(); + size_t slen = jl_string_len(depalias); write_int32(s, slen); ios_write(s, jl_string_data(depalias), slen); diff --git a/test/precompile.jl b/test/precompile.jl index 7f892ab5545ae..146ff26e714c2 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -1712,7 +1712,7 @@ precompile_test_harness("PkgCacheInspector") do load_path try # isvalid_cache_header returns checksum id or zero Base.isvalid_cache_header(io) == 0 && throw(ArgumentError("Invalid header in cache file $cachefile.")) - depmodnames = Base.parse_cache_header(io)[3] + depmodnames = Base.parse_cache_header(io, cachefile)[3] Base.isvalid_file_crc(io) || throw(ArgumentError("Invalid checksum in cache file $cachefile.")) finally close(io) From 9bd0c226336caf65933ef196bb0f78805bc564b4 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Sun, 20 Aug 2023 21:58:33 +0200 Subject: [PATCH 10/21] gitignore relocatedepot test artifacts --- test/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/test/.gitignore b/test/.gitignore index a1af9ae3d44bf..6b9b927242ac8 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -2,3 +2,4 @@ /ccalltest /ccalltest.s /libccalltest.* +/relocatedepot From a51dbd6fc334d71a5b69c29c7da2e5110545e702 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Sun, 20 Aug 2023 22:00:18 +0200 Subject: [PATCH 11/21] rework depot path resolution logic, take 2 Only resolve a @depot tag to a depot in DEPOT_PATH if all dependencies can be resolved to files located in one and the same depot. The first depot on DEPOT_PATH which matches wins. --- base/initdefs.jl | 34 ------------------ base/loading.jl | 88 +++++++++++++++++++++++++++++++++++----------- test/precompile.jl | 2 +- 3 files changed, 68 insertions(+), 56 deletions(-) diff --git a/base/initdefs.jl b/base/initdefs.jl index f541a891a0a18..595ad6a82ec58 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -115,40 +115,6 @@ function init_depot_path() nothing end -# replace leading dirname of `path` with `@depot` if `path` and `cachefile` are located -# on the same depot path; otherwise just return `path` -function replace_depot_path(path::AbstractString, cachefile::AbstractString) - for depot in DEPOT_PATH - if startswith(path, depot) && startswith(cachefile, depot) - depot_w_sep = joinpath(depot, "") # append the path separator - alias = joinpath("@depot", path[1+length(depot_w_sep):end]) - return alias - end - end - # TODO Emit debug message when cachefile is not in a depot; can this even happen? - # TODO Emit debug message when path is not in the same depot as cachefile the cache file. - return path -end - -# resolve a leading `@depot` tag from `depotalias` to point to a valid path located -# in the same depot as the `cachefile` it was loaded from -# TODO IDK if returning an unresolved @depot on failure is ok? atm it is need to not break precompile tests -function resolve_depot_path(depotalias::AbstractString, cachefile::AbstractString) - !startswith(depotalias, "@depot") && return depotalias - for depot in DEPOT_PATH - if startswith(cachefile, depot) - tag = joinpath("@depot", "") # append the path separator - path = joinpath(depot, depotalias[1+length(tag):end]) - # if !ispath(path) - # error("Failed to resolve `$depotalias` to a valid path located in the same depot (`$depot`) as the cachefile `$cachefile`") - # end - return path - end - end - return depotalias - # error("Failed to resolve `$depotalias` to a valid path, because the cachefile `$cachefile` is not on located on any `DEPOT_PATH`") -end - ## LOAD_PATH & ACTIVE_PROJECT ## diff --git a/base/loading.jl b/base/loading.jl index 326a0aec3f027..873130984408c 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1503,7 +1503,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union io = open(path, "r") try iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.") - _, _, depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io, path) + _, _, depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io) pkgimage = !isempty(clone_targets) if pkgimage ocachepath !== nothing || return ArgumentError("Expected ocachepath to be provided") @@ -2587,7 +2587,7 @@ function isvalid_pkgimage_crc(f::IOStream, ocachefile::String) expected_crc_so == crc_so end -struct CacheHeaderIncludes +mutable struct CacheHeaderIncludes id::PkgId filename::String fsize::UInt64 @@ -2595,7 +2595,43 @@ struct CacheHeaderIncludes modpath::Vector{String} # seemingly not needed in Base, but used by Revise end -function parse_cache_header(f::IO, cachefile::AbstractString) +# replace leading dirname of `path` with `@depot` if `path` and `cachefile` are located +# on the same depot path; otherwise just return `path` +function replace_depot_path(path::AbstractString, cachefile::AbstractString) + for depot in DEPOT_PATH + if startswith(path, depot) && startswith(cachefile, depot) + depot_w_sep = joinpath(depot, "") # append the path separator + alias = joinpath("@depot", path[1+length(depot_w_sep):end]) + return alias + end + end + # TODO Emit debug message when cachefile is not in a depot; can this even happen? + # TODO Emit debug message when path is not in the same depot as cachefile the cache file. + # TODO To be consistent with resolve_depot_paths!, this function should only + # insert a @depot alias when all files are located in one depot + return path +end + +# Attempt to resolve all leading `@depot` tags from the included files if they +# can all be located in one and the same depot (first depot in DEPOT_PATH with match wins). +# Returns true if tags could be resolved. +function resolve_depot_paths!(includes::Vector{CacheHeaderIncludes}) + for depot in DEPOT_PATH + all_in_depot = all(includes) do inc + isfile(replace(inc.filename, "@depot" => depot)) + end + if all_in_depot + for inc in includes + inc.filename = replace(inc.filename, "@depot" => depot) + end + return true + end + end + # error("Failed to resolve `$depotalias` to a valid path, because the cachefile `$cachefile` is not on located on any `DEPOT_PATH`") + return false +end + +function parse_cache_header(f::IO) flags = read(f, UInt8) modules = Vector{Pair{PkgId, UInt64}}() while true @@ -2644,8 +2680,7 @@ function parse_cache_header(f::IO, cachefile::AbstractString) if depname[1] == '\0' push!(requires, modkey => binunpack(depname)) else - push!(includes, CacheHeaderIncludes(modkey, resolve_depot_path(depname, cachefile), - fsize, hash, modpath)) + push!(includes, CacheHeaderIncludes(modkey, depname, fsize, hash, modpath)) end end prefs = String[] @@ -2678,6 +2713,9 @@ function parse_cache_header(f::IO, cachefile::AbstractString) l = read(f, Int32) clone_targets = read(f, l) + # TODO @debug for when we failed resolving @depot + resolve_depot_paths!(includes) + return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags end @@ -2685,10 +2723,10 @@ function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) io = open(cachefile, "r") try iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) - ret = parse_cache_header(io, cachefile) + ret = parse_cache_header(io) srcfiles_only || return ret _, (includes, _), _, srctextpos, _... = ret - srcfiles = srctext_files(io, cachefile, srctextpos) + srcfiles = srctext_files(io, srctextpos, includes) delidx = Int[] for (i, chi) in enumerate(includes) chi.filename ∈ srcfiles || push!(delidx, i) @@ -2700,21 +2738,21 @@ function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) end end -preferences_hash(io::IO, cachefile) = parse_cache_header(io, cachefile)[6] +preferences_hash(io::IO) = parse_cache_header(io)[6] function preferences_hash(cachefile::String) io = open(cachefile, "r") try if iszero(isvalid_cache_header(io)) throw(ArgumentError("Invalid header in cache file $cachefile.")) end - return preferences_hash(io, cachefile) + return preferences_hash(io) finally close(io) end end -function cache_dependencies(io::IO, cachefile) - _, (includes, _), modules, _... = parse_cache_header(io, cachefile) +function cache_dependencies(io::IO) + _, (includes, _), modules, _... = parse_cache_header(io) return modules, map(chi -> chi.filename, includes) # return just filename end @@ -2722,27 +2760,31 @@ function cache_dependencies(cachefile::String) io = open(cachefile, "r") try iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) - return cache_dependencies(io, cachefile) + return cache_dependencies(io) finally close(io) end end -function read_dependency_src(io::IO, cachefile::AbstractString, filename::AbstractString) - srctextpos = parse_cache_header(io, cachefile)[4] +function read_dependency_src(io::IO, filename::AbstractString) + _, (includes, _), _, srctextpos, _, _, _, _ = parse_cache_header(io)[4] srctextpos == 0 && error("no source-text stored in cache file") seek(io, srctextpos) - return _read_dependency_src(io, cachefile, filename) + return _read_dependency_src(io, includes, filename) end -function _read_dependency_src(io::IO, cachefile::AbstractString, filename::AbstractString) +function _read_dependency_src(io::IO, includes::Vector{CacheHeaderIncludes}, filename::AbstractString) msg = "" while !eof(io) filenamelen = read(io, Int32) filenamelen == 0 && break depotfn = String(read(io, filenamelen)) - fn = resolve_depot_path(depotfn, cachefile) len = read(io, UInt64) + basefn = replace(depotfn, "@depot" => "") + idx = findfirst(includes) do inc + endswith(inc.filename, basefn) + end + fn = isnothing(idx) ? depotfn : includes[idx].filename if fn == filename return String(read(io, len)) end @@ -2755,13 +2797,13 @@ function read_dependency_src(cachefile::String, filename::AbstractString) io = open(cachefile, "r") try iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) - return read_dependency_src(io, cachefile, filename) + return read_dependency_src(io, filename) finally close(io) end end -function srctext_files(f::IO, cachefile::AbstractString, srctextpos::Int64) +function srctext_files(f::IO, srctextpos::Int64, includes::Vector{CacheHeaderIncludes}) files = Set{String}() srctextpos == 0 && return files seek(f, srctextpos) @@ -2769,8 +2811,12 @@ function srctext_files(f::IO, cachefile::AbstractString, srctextpos::Int64) filenamelen = read(f, Int32) filenamelen == 0 && break depotfn = String(read(f, filenamelen)) - fn = resolve_depot_path(depotfn, cachefile) len = read(f, UInt64) + basefn = replace(depotfn, "@depot" => "") + idx = findfirst(includes) do inc + endswith(inc.filename, basefn) + end + fn = isnothing(idx) ? depotfn : includes[idx].filename push!(files, fn) seek(f, position(f) + len) end @@ -3114,7 +3160,7 @@ end @debug "Rejecting cache file $cachefile due to it containing an invalid cache header" return true # invalid cache file end - modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io, cachefile) + modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io) if isempty(modules) return true # ignore empty file end diff --git a/test/precompile.jl b/test/precompile.jl index 146ff26e714c2..7f892ab5545ae 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -1712,7 +1712,7 @@ precompile_test_harness("PkgCacheInspector") do load_path try # isvalid_cache_header returns checksum id or zero Base.isvalid_cache_header(io) == 0 && throw(ArgumentError("Invalid header in cache file $cachefile.")) - depmodnames = Base.parse_cache_header(io, cachefile)[3] + depmodnames = Base.parse_cache_header(io)[3] Base.isvalid_file_crc(io) || throw(ArgumentError("Invalid checksum in cache file $cachefile.")) finally close(io) From 0aabc2966ad015c6c1e473b42a5672c61cdbc6f4 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Fri, 25 Aug 2023 20:58:52 +0200 Subject: [PATCH 12/21] address review comments --- base/loading.jl | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 873130984408c..f32f0935edd7d 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1687,8 +1687,7 @@ end In a module, declare that the file, directory, or symbolic link specified by `path` (relative or absolute) is a dependency for precompilation; that is, the module will need -to be recompiled if the modification time of `path` changes. -TODO: Update docs because we now use file size and content hash +to be recompiled if its file contents change. This is only needed if your module depends on a path that is not used via [`include`](@ref). It has no effect outside of compilation. @@ -2588,15 +2587,14 @@ function isvalid_pkgimage_crc(f::IOStream, ocachefile::String) end mutable struct CacheHeaderIncludes - id::PkgId + const id::PkgId filename::String - fsize::UInt64 - hash::UInt32 - modpath::Vector{String} # seemingly not needed in Base, but used by Revise + const fsize::UInt64 + const hash::UInt32 + const modpath::Vector{String} # seemingly not needed in Base, but used by Revise end -# replace leading dirname of `path` with `@depot` if `path` and `cachefile` are located -# on the same depot path; otherwise just return `path` +# replace leading dirname of `path` with a `@depot` tag if the file is located inside of a DEPOT_PATH function replace_depot_path(path::AbstractString, cachefile::AbstractString) for depot in DEPOT_PATH if startswith(path, depot) && startswith(cachefile, depot) From 0efd62273f4f4a5de91baa16d3788fe42aabbdf4 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Fri, 25 Aug 2023 21:01:51 +0200 Subject: [PATCH 13/21] clean up - use regex for depot path replacement - add debug logging when @depot resolution failed --- base/initdefs.jl | 1 - base/loading.jl | 89 ++++++++++++++++++++++-------------------- src/precompile.c | 5 +-- src/staticdata.c | 2 +- src/staticdata_utils.c | 5 +-- test/precompile.jl | 2 +- 6 files changed, 53 insertions(+), 51 deletions(-) diff --git a/base/initdefs.jl b/base/initdefs.jl index 595ad6a82ec58..c04a97971eff2 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -115,7 +115,6 @@ function init_depot_path() nothing end - ## LOAD_PATH & ACTIVE_PROJECT ## # JULIA_LOAD_PATH: split on `:` (or `;` on Windows) diff --git a/base/loading.jl b/base/loading.jl index f32f0935edd7d..c167f290d0779 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1503,7 +1503,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union io = open(path, "r") try iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.") - _, _, depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io) + _, _, depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io, path) pkgimage = !isempty(clone_targets) if pkgimage ocachepath !== nothing || return ArgumentError("Expected ocachepath to be provided") @@ -2594,42 +2594,40 @@ mutable struct CacheHeaderIncludes const modpath::Vector{String} # seemingly not needed in Base, but used by Revise end -# replace leading dirname of `path` with a `@depot` tag if the file is located inside of a DEPOT_PATH -function replace_depot_path(path::AbstractString, cachefile::AbstractString) +function replace_depot_path(path::AbstractString) for depot in DEPOT_PATH - if startswith(path, depot) && startswith(cachefile, depot) - depot_w_sep = joinpath(depot, "") # append the path separator - alias = joinpath("@depot", path[1+length(depot_w_sep):end]) - return alias + if startswith(path, depot) + path = replace(path, depot => "@depot") + break end end - # TODO Emit debug message when cachefile is not in a depot; can this even happen? - # TODO Emit debug message when path is not in the same depot as cachefile the cache file. - # TODO To be consistent with resolve_depot_paths!, this function should only - # insert a @depot alias when all files are located in one depot return path end # Attempt to resolve all leading `@depot` tags from the included files if they -# can all be located in one and the same depot (first depot in DEPOT_PATH with match wins). +# can all be located in one and the same depot. # Returns true if tags could be resolved. function resolve_depot_paths!(includes::Vector{CacheHeaderIncludes}) + if any(includes) do inc + !startswith(inc.filename, "@depot") + end + return + end for depot in DEPOT_PATH all_in_depot = all(includes) do inc - isfile(replace(inc.filename, "@depot" => depot)) + isfile(replace(inc.filename, r"^@depot" => depot)) end if all_in_depot for inc in includes - inc.filename = replace(inc.filename, "@depot" => depot) + inc.filename = replace(inc.filename, r"^@depot" => depot) end return true end end - # error("Failed to resolve `$depotalias` to a valid path, because the cachefile `$cachefile` is not on located on any `DEPOT_PATH`") return false end -function parse_cache_header(f::IO) +function parse_cache_header(f::IO, cachefile::AbstractString) flags = read(f, UInt8) modules = Vector{Pair{PkgId, UInt64}}() while true @@ -2640,7 +2638,6 @@ function parse_cache_header(f::IO) build_id = read(f, UInt64) # build UUID (mostly just a timestamp) push!(modules, PkgId(uuid, sym) => build_id) end - # write_dependency_list writes UInt64 here totbytes = Int64(read(f, UInt64)) # total bytes for file dependencies + preferences # read the list of requirements # and split the list into include and requires statements @@ -2703,7 +2700,6 @@ function parse_cache_header(f::IO) n == 0 && break sym = String(read(f, n)) # module name uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID - # why not build_id = UInt128(read(f, UInt64), read(f, UInt64)) build_id = UInt128(read(f, UInt64)) << 64 build_id |= read(f, UInt64) push!(required_modules, PkgId(uuid, sym) => build_id) @@ -2711,8 +2707,10 @@ function parse_cache_header(f::IO) l = read(f, Int32) clone_targets = read(f, l) - # TODO @debug for when we failed resolving @depot - resolve_depot_paths!(includes) + resolved = resolve_depot_paths!(includes) + if !isnothing(resolved) && !resolved + @debug "Failed to resolve @depot tags for includes listed in cache file $cachefile." + end return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags end @@ -2721,9 +2719,9 @@ function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) io = open(cachefile, "r") try iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) - ret = parse_cache_header(io) - srcfiles_only || return ret + ret = parse_cache_header(io, cachefile) _, (includes, _), _, srctextpos, _... = ret + srcfiles_only || return ret srcfiles = srctext_files(io, srctextpos, includes) delidx = Int[] for (i, chi) in enumerate(includes) @@ -2736,21 +2734,21 @@ function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) end end -preferences_hash(io::IO) = parse_cache_header(io)[6] +preferences_hash(f::IO, cachefile::AbstractString) = parse_cache_header(f, cachefile)[6] function preferences_hash(cachefile::String) io = open(cachefile, "r") try if iszero(isvalid_cache_header(io)) throw(ArgumentError("Invalid header in cache file $cachefile.")) end - return preferences_hash(io) + return preferences_hash(io, cachefile) finally close(io) end end -function cache_dependencies(io::IO) - _, (includes, _), modules, _... = parse_cache_header(io) +function cache_dependencies(f::IO, cachefile::AbstractString) + _, (includes, _), modules, _... = parse_cache_header(f, cachefile) return modules, map(chi -> chi.filename, includes) # return just filename end @@ -2758,31 +2756,34 @@ function cache_dependencies(cachefile::String) io = open(cachefile, "r") try iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) - return cache_dependencies(io) + return cache_dependencies(io, cachefile) finally close(io) end end -function read_dependency_src(io::IO, filename::AbstractString) - _, (includes, _), _, srctextpos, _, _, _, _ = parse_cache_header(io)[4] +function read_dependency_src(io::IO, cachefile::AbstractString, filename::AbstractString) + _, (includes, _), _, srctextpos, _, _, _, _ = parse_cache_header(io, cachefile) srctextpos == 0 && error("no source-text stored in cache file") seek(io, srctextpos) - return _read_dependency_src(io, includes, filename) + return _read_dependency_src(io, filename, includes) end -function _read_dependency_src(io::IO, includes::Vector{CacheHeaderIncludes}, filename::AbstractString) - msg = "" +function _read_dependency_src(io::IO, filename::AbstractString, includes::Vector{CacheHeaderIncludes}=CacheHeaderIncludes[]) while !eof(io) filenamelen = read(io, Int32) filenamelen == 0 && break depotfn = String(read(io, filenamelen)) len = read(io, UInt64) - basefn = replace(depotfn, "@depot" => "") - idx = findfirst(includes) do inc - endswith(inc.filename, basefn) + fn = if !startswith(depotfn, "@depot") + depotfn + else + basefn = replace(depotfn, r"^@depot" => "") + idx = findfirst(includes) do inc + endswith(inc.filename, basefn) + end + isnothing(idx) ? depotfn : includes[idx].filename end - fn = isnothing(idx) ? depotfn : includes[idx].filename if fn == filename return String(read(io, len)) end @@ -2795,7 +2796,7 @@ function read_dependency_src(cachefile::String, filename::AbstractString) io = open(cachefile, "r") try iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) - return read_dependency_src(io, filename) + return read_dependency_src(io, cachefile, filename) finally close(io) end @@ -2810,11 +2811,15 @@ function srctext_files(f::IO, srctextpos::Int64, includes::Vector{CacheHeaderInc filenamelen == 0 && break depotfn = String(read(f, filenamelen)) len = read(f, UInt64) - basefn = replace(depotfn, "@depot" => "") - idx = findfirst(includes) do inc - endswith(inc.filename, basefn) + fn = if !startswith(depotfn, "@depot") + depotfn + else + basefn = replace(depotfn, r"^@depot" => "") + idx = findfirst(includes) do inc + endswith(inc.filename, basefn) + end + isnothing(idx) ? depotfn : includes[idx].filename end - fn = isnothing(idx) ? depotfn : includes[idx].filename push!(files, fn) seek(f, position(f) + len) end @@ -3158,7 +3163,7 @@ end @debug "Rejecting cache file $cachefile due to it containing an invalid cache header" return true # invalid cache file end - modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io) + modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io, cachefile) if isempty(modules) return true # ignore empty file end diff --git a/src/precompile.c b/src/precompile.c index 01b30c497b43b..ad33dcd9b6d65 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -60,14 +60,13 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { } jl_value_t **replace_depot_args; - JL_GC_PUSHARGS(replace_depot_args, 3); + JL_GC_PUSHARGS(replace_depot_args, 2); replace_depot_args[0] = replace_depot_func; replace_depot_args[1] = abspath; - replace_depot_args[2] = jl_cstr_to_string(jl_options.outputji); jl_task_t *ct = jl_current_task; size_t last_age = ct->world_age; ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 3); + jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 2); ct->world_age = last_age; JL_GC_POP(); diff --git a/src/staticdata.c b/src/staticdata.c index a2b8af91e7067..6383a3943a7ea 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2693,7 +2693,7 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a write_worklist_for_header(f, worklist); // Determine unique (module, abspath, fsize, hash) dependencies for the files defining modules in the worklist // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header - // (abspath will be converted to a relocateable @depot path before writing, Base.replace_depot_path). + // (abspath will be converted to a relocateable @depot path before writing, cf. Base.replace_depot_path). // Also write Preferences. // last word of the dependency list is the end of the data / start of the srctextpos *srctextpos = write_dependency_list(f, worklist, udeps); // srctextpos: position of srctext entry in header index (update later) diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index a3c6042790a78..544d4820906f3 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -715,14 +715,13 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t jl_value_t *abspath = jl_fieldref(deptuple, 1); jl_value_t **replace_depot_args; - JL_GC_PUSHARGS(replace_depot_args, 3); + JL_GC_PUSHARGS(replace_depot_args, 2); replace_depot_args[0] = replace_depot_func; replace_depot_args[1] = abspath; - replace_depot_args[2] = jl_cstr_to_string(jl_options.outputji); ct = jl_current_task; size_t last_age = ct->world_age; ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 3); + jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 2); ct->world_age = last_age; JL_GC_POP(); diff --git a/test/precompile.jl b/test/precompile.jl index 7f892ab5545ae..146ff26e714c2 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -1712,7 +1712,7 @@ precompile_test_harness("PkgCacheInspector") do load_path try # isvalid_cache_header returns checksum id or zero Base.isvalid_cache_header(io) == 0 && throw(ArgumentError("Invalid header in cache file $cachefile.")) - depmodnames = Base.parse_cache_header(io)[3] + depmodnames = Base.parse_cache_header(io, cachefile)[3] Base.isvalid_file_crc(io) || throw(ArgumentError("Invalid checksum in cache file $cachefile.")) finally close(io) From 6ab1ef3101bf51bcd1ba59c07048aed5f80fbb8c Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Sat, 30 Sep 2023 18:28:08 +0200 Subject: [PATCH 14/21] only consider srctext files when resolving depot path --- base/loading.jl | 73 ++++++++++++++++++++-------------------------- test/precompile.jl | 9 +++--- 2 files changed, 37 insertions(+), 45 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index c167f290d0779..8e927ab824f13 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2604,29 +2604,26 @@ function replace_depot_path(path::AbstractString) return path end -# Attempt to resolve all leading `@depot` tags from the included files if they -# can all be located in one and the same depot. -# Returns true if tags could be resolved. -function resolve_depot_paths!(includes::Vector{CacheHeaderIncludes}) +# Find depot in DEPOT_PATH for which all @depot tags from the `includes` +# can be replaced so that they point to a file on disk each. +# Return nothing when no depot matched. +function resolve_depot(includes) if any(includes) do inc - !startswith(inc.filename, "@depot") + !startswith(inc, "@depot") end - return + return nothing end for depot in DEPOT_PATH - all_in_depot = all(includes) do inc - isfile(replace(inc.filename, r"^@depot" => depot)) - end - if all_in_depot - for inc in includes - inc.filename = replace(inc.filename, r"^@depot" => depot) + if all(includes) do inc + isfile(replace(inc, r"^@depot" => depot)) end - return true + return depot end end - return false + return nothing end + function parse_cache_header(f::IO, cachefile::AbstractString) flags = read(f, UInt8) modules = Vector{Pair{PkgId, UInt64}}() @@ -2707,27 +2704,30 @@ function parse_cache_header(f::IO, cachefile::AbstractString) l = read(f, Int32) clone_targets = read(f, l) - resolved = resolve_depot_paths!(includes) - if !isnothing(resolved) && !resolved - @debug "Failed to resolve @depot tags for includes listed in cache file $cachefile." + # determine path for @depot replacement from srctext files only, e.g. ignore any include_dependency files + srcfiles = srctext_files(f, srctextpos, includes) + depot = resolve_depot(srcfiles) + keepidx = Int[] + for (i, chi) in enumerate(includes) + chi.filename ∈ srcfiles && push!(keepidx, i) + end + if isnothing(depot) + @debug "Failed to determine depot from srctext files in cache file $cachefile." + else + for inc in includes + inc.filename = replace(inc.filename, r"^@depot" => depot) + end end + includes_srcfiles_only = includes[keepidx] - return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags + return modules, (includes, includes_srcfiles_only, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags end -function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) +function parse_cache_header(cachefile::String) io = open(cachefile, "r") try iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) ret = parse_cache_header(io, cachefile) - _, (includes, _), _, srctextpos, _... = ret - srcfiles_only || return ret - srcfiles = srctext_files(io, srctextpos, includes) - delidx = Int[] - for (i, chi) in enumerate(includes) - chi.filename ∈ srcfiles || push!(delidx, i) - end - deleteat!(includes, delidx) return ret finally close(io) @@ -2748,7 +2748,7 @@ function preferences_hash(cachefile::String) end function cache_dependencies(f::IO, cachefile::AbstractString) - _, (includes, _), modules, _... = parse_cache_header(f, cachefile) + _, (includes, _, _), modules, _... = parse_cache_header(f, cachefile) return modules, map(chi -> chi.filename, includes) # return just filename end @@ -2763,7 +2763,7 @@ function cache_dependencies(cachefile::String) end function read_dependency_src(io::IO, cachefile::AbstractString, filename::AbstractString) - _, (includes, _), _, srctextpos, _, _, _, _ = parse_cache_header(io, cachefile) + _, (includes, _, _), _, srctextpos, _, _, _, _ = parse_cache_header(io, cachefile) srctextpos == 0 && error("no source-text stored in cache file") seek(io, srctextpos) return _read_dependency_src(io, filename, includes) @@ -2809,18 +2809,9 @@ function srctext_files(f::IO, srctextpos::Int64, includes::Vector{CacheHeaderInc while !eof(f) filenamelen = read(f, Int32) filenamelen == 0 && break - depotfn = String(read(f, filenamelen)) + filename = String(read(f, filenamelen)) len = read(f, UInt64) - fn = if !startswith(depotfn, "@depot") - depotfn - else - basefn = replace(depotfn, r"^@depot" => "") - idx = findfirst(includes) do inc - endswith(inc.filename, basefn) - end - isnothing(idx) ? depotfn : includes[idx].filename - end - push!(files, fn) + push!(files, filename) seek(f, position(f) + len) end return files @@ -3163,7 +3154,7 @@ end @debug "Rejecting cache file $cachefile due to it containing an invalid cache header" return true # invalid cache file end - modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io, cachefile) + modules, (includes, _, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io, cachefile) if isempty(modules) return true # ignore empty file end diff --git a/test/precompile.jl b/test/precompile.jl index 146ff26e714c2..ebc131ab90c50 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -381,7 +381,7 @@ precompile_test_harness(false) do dir @test string(Base.Docs.doc(Foo.Bar.bar)) == "bar function\n" @test string(Base.Docs.doc(Foo.Bar)) == "Bar module\n" - modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile) + modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile) discard_module = mod_fl_mt -> mod_fl_mt.filename @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) % UInt64 ] @test map(x -> x.filename, deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ] @@ -422,7 +422,8 @@ precompile_test_harness(false) do dir @test Dict(modules) == modules_ok @test discard_module.(deps) == deps1 - modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile; srcfiles_only=true) + # modules, (_, deps, requires), required_modules, _... = Base.parse_cache_header(cachefile; srcfiles_only=true) + modules, (_, deps, requires), required_modules, _... = Base.parse_cache_header(cachefile) @test map(x -> x.filename, deps) == [Foo_file] @test current_task()(0x01, 0x4000, 0x30031234) == 2 @@ -485,7 +486,7 @@ precompile_test_harness(false) do dir """) Nest = Base.require(Main, Nest_module) cachefile = joinpath(cachedir, "$Nest_module.ji") - modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile) + modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile) @test last(deps).modpath == ["NestInner"] UsesB_module = :UsesB4b3a94a1a081a8cb @@ -507,7 +508,7 @@ precompile_test_harness(false) do dir """) UsesB = Base.require(Main, UsesB_module) cachefile = joinpath(cachedir, "$UsesB_module.ji") - modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile) + modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile) id1, id2 = only(requires) @test Base.pkgorigins[id1].cachepath == cachefile @test Base.pkgorigins[id2].cachepath == joinpath(cachedir, "$B_module.ji") From 01989ba5d18bc8763b87a5612d37afe42f9c0d4d Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Sun, 1 Oct 2023 22:29:23 +0200 Subject: [PATCH 15/21] add RelocationTestPkg for unit tests fixup unit test fixup unit tests part 2 --- test/.gitignore | 1 + test/Makefile | 10 +-- test/RelocationTestPkg/Project.toml | 3 + .../src/RelocationTestPkg.jl | 8 +++ test/precompile.jl | 1 - test/relocatedepot.jl | 62 ++++++++++++++----- 6 files changed, 66 insertions(+), 19 deletions(-) create mode 100644 test/RelocationTestPkg/Project.toml create mode 100644 test/RelocationTestPkg/src/RelocationTestPkg.jl diff --git a/test/.gitignore b/test/.gitignore index 6b9b927242ac8..666c74c38ab73 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -3,3 +3,4 @@ /ccalltest.s /libccalltest.* /relocatedepot +/RelocationTestPkg/src/foo.txt diff --git a/test/Makefile b/test/Makefile index 970ff702a630d..d575bcf2399c2 100644 --- a/test/Makefile +++ b/test/Makefile @@ -37,20 +37,22 @@ $(addprefix revise-, $(TESTS)): revise-% : relocatedepot: @rm -rf $(SRCDIR)/relocatedepot @cd $(SRCDIR) && \ - $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@) + $(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@) @mkdir $(SRCDIR)/relocatedepot @cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot + @cp -R $(SRCDIR)/RelocationTestPkg $(SRCDIR)/relocatedepot @cd $(SRCDIR) && \ - $(call PRINT_JULIA, $(call spawn,RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@) + $(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@) revise-relocatedepot: revise-% : @rm -rf $(SRCDIR)/relocatedepot @cd $(SRCDIR) && \ - $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*) + $(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*) @mkdir $(SRCDIR)/relocatedepot @cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot + @cp -R $(SRCDIR)/RelocationTestPkg $(SRCDIR)/relocatedepot @cd $(SRCDIR) && \ - $(call PRINT_JULIA, $(call spawn,RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*) + $(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*) embedding: @$(MAKE) -C $(SRCDIR)/$@ check $(EMBEDDING_ARGS) diff --git a/test/RelocationTestPkg/Project.toml b/test/RelocationTestPkg/Project.toml new file mode 100644 index 0000000000000..fbd6b9695bbdf --- /dev/null +++ b/test/RelocationTestPkg/Project.toml @@ -0,0 +1,3 @@ +name = "RelocationTestPkg" +uuid = "84019e0e-37c6-44aa-be55-38c734c0a527" +version = "0.1.0" diff --git a/test/RelocationTestPkg/src/RelocationTestPkg.jl b/test/RelocationTestPkg/src/RelocationTestPkg.jl new file mode 100644 index 0000000000000..e584688ae6baa --- /dev/null +++ b/test/RelocationTestPkg/src/RelocationTestPkg.jl @@ -0,0 +1,8 @@ +module RelocationTestPkg + + +include_dependency("foo.txt") +greet() = print("Hello World!") + + +end # module RelocationTestPkg diff --git a/test/precompile.jl b/test/precompile.jl index ebc131ab90c50..fc4ab2490c4a8 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -422,7 +422,6 @@ precompile_test_harness(false) do dir @test Dict(modules) == modules_ok @test discard_module.(deps) == deps1 - # modules, (_, deps, requires), required_modules, _... = Base.parse_cache_header(cachefile; srcfiles_only=true) modules, (_, deps, requires), required_modules, _... = Base.parse_cache_header(cachefile) @test map(x -> x.filename, deps) == [Foo_file] diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl index f512d17764ccb..76d47b35aaefe 100644 --- a/test/relocatedepot.jl +++ b/test/relocatedepot.jl @@ -1,23 +1,57 @@ using Test +using Logging include("testenv.jl") -@testset "compile and load relocated pkg" begin - pkg = Base.identify_package("DelimitedFiles") - if !test_relocated_depot - # precompile - Base.require(Main, :DelimitedFiles) - @test Base.root_module_exists(pkg) == true - else - path = Base.locate_package(pkg) - iscached = @lock Base.require_lock begin - m = Base._require_search_from_serialized(pkg, path, UInt128(0)) - m isa Module +if !test_relocated_depot + + @testset "precompile RelocationTestPkg" begin + load_path = copy(LOAD_PATH) + depot_path = copy(DEPOT_PATH) + push!(LOAD_PATH, @__DIR__) + push!(DEPOT_PATH, @__DIR__) + try + pkg = Base.identify_package("RelocationTestPkg") + cachefiles = Base.find_all_in_cache_path(pkg) + rm.(cachefiles, force=true) + rm(joinpath(@__DIR__, "RelocationTestPkg", "src", "foo.txt"), force=true) + @test Base.isprecompiled(pkg) == false # include_dependency foo.txt is missing + Base.require(pkg) # precompile + @test Base.isprecompiled(pkg, ignore_loaded=true) == false # foo.txt still missing + touch(joinpath(@__DIR__, "RelocationTestPkg", "src", "foo.txt")) + @test Base.isprecompiled(pkg, ignore_loaded=true) == true + finally + copy!(LOAD_PATH, load_path) + copy!(DEPOT_PATH, depot_path) + end + end + +else + + @testset "load stdlib from test/relocatedepot" begin + # stdlib should be already precompiled + pkg = Base.identify_package("DelimitedFiles") + @test Base.isprecompiled(pkg, ignore_loaded=true) == true + end + + @testset "load RelocationTestPkg from test/relocatedepot" begin + load_path = copy(LOAD_PATH) + depot_path = copy(DEPOT_PATH) + # moved source and depot into test/relocatedepot + push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot")) + push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) + try + pkg = Base.identify_package("RelocationTestPkg") + rm(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg", "src", "foo.txt"), force=true) + @test Base.isprecompiled(pkg, ignore_loaded=true) == false # foo.txt missing + touch(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg", "src", "foo.txt")) + @test Base.isprecompiled(pkg) == true + finally + copy!(LOAD_PATH, load_path) + copy!(DEPOT_PATH, depot_path) end - @test iscached == true # can load from cache - Base.require(Main, :DelimitedFiles) - @test Base.root_module_exists(pkg) == true end + end From 3047c277b89cee0d8b12d9d677fdda021bc31bfb Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Tue, 3 Oct 2023 23:12:07 +0200 Subject: [PATCH 16/21] include_dependency should also track filesize of directories and symlinks --- base/loading.jl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 8e927ab824f13..5630e1049e9c6 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1671,11 +1671,8 @@ function _include_dependency(mod::Module, _path::AbstractString) end if _track_dependencies[] @lock require_lock begin - fsize, hash = if isfile(path) - UInt64(filesize(path)), open(_crc32c, path, "r") - else - UInt64(0), UInt32(0) - end + fsize = filesize(path) + hash = isfile(path) ? open(_crc32c, path, "r") : UInt32(0) push!(_require_dependencies, (mod, path, fsize, hash)) end end @@ -1687,7 +1684,7 @@ end In a module, declare that the file, directory, or symbolic link specified by `path` (relative or absolute) is a dependency for precompilation; that is, the module will need -to be recompiled if its file contents change. +to be recompiled if the file's size changes. This is only needed if your module depends on a path that is not used via [`include`](@ref). It has no effect outside of compilation. From 5bf58138335bc1dcc3dc0c04c093333b12407631 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Tue, 3 Oct 2023 23:12:45 +0200 Subject: [PATCH 17/21] refine debug logs when parsing cache header --- base/loading.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 5630e1049e9c6..61c1f60f09e4b 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2608,7 +2608,7 @@ function resolve_depot(includes) if any(includes) do inc !startswith(inc, "@depot") end - return nothing + return :missing_tag end for depot in DEPOT_PATH if all(includes) do inc @@ -2617,7 +2617,7 @@ function resolve_depot(includes) return depot end end - return nothing + return :no_depot end @@ -2708,8 +2708,10 @@ function parse_cache_header(f::IO, cachefile::AbstractString) for (i, chi) in enumerate(includes) chi.filename ∈ srcfiles && push!(keepidx, i) end - if isnothing(depot) + if depot === :no_depot @debug "Failed to determine depot from srctext files in cache file $cachefile." + elseif depot === :missing_tag + @debug "Missing @depot tag for include dependencies in cache file $cachefile." else for inc in includes inc.filename = replace(inc.filename, r"^@depot" => depot) From 1d451558ee63f0d299141c21efb526e8e608ceaa Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Tue, 3 Oct 2023 23:13:14 +0200 Subject: [PATCH 18/21] fixup stale cache file debug log --- base/loading.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 61c1f60f09e4b..5e8e7e93597ff 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -3273,13 +3273,14 @@ end return true end fsize = filesize(f) - if fsize != chi.fsize - @debug "Rejecting stale cache file $cachefile (file size $fsize_req) because file $f (file size $fsize) has changed" + @debug (f,fsize) + if fsize != fsize_req + @debug "Rejecting stale cache file $cachefile because file size of $f has changed (file size $fsize, before $fsize_req)" return true end hash = open(_crc32c, f, "r") - if hash != chi.hash - @debug "Rejecting stale cache file $cachefile (hash $hash_req) because file $f (hash $hash) has changed" + if hash != hash_req + @debug "Rejecting stale cache file $cachefile because file size of $f has changed (hash $hash, before $hash_req)" return true end end From 39cc0340f1529ca39b8cb1e8bfcf081f79f5651c Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Wed, 4 Oct 2023 00:12:39 +0200 Subject: [PATCH 19/21] throw when we can't find a depot --- base/loading.jl | 12 +++++++----- test/relocatedepot.jl | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 5e8e7e93597ff..a589c49740bfb 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2608,7 +2608,7 @@ function resolve_depot(includes) if any(includes) do inc !startswith(inc, "@depot") end - return :missing_tag + return :missing_depot_tag end for depot in DEPOT_PATH if all(includes) do inc @@ -2617,7 +2617,7 @@ function resolve_depot(includes) return depot end end - return :no_depot + return :no_depot_found end @@ -2708,9 +2708,11 @@ function parse_cache_header(f::IO, cachefile::AbstractString) for (i, chi) in enumerate(includes) chi.filename ∈ srcfiles && push!(keepidx, i) end - if depot === :no_depot - @debug "Failed to determine depot from srctext files in cache file $cachefile." - elseif depot === :missing_tag + if depot === :no_depot_found + throw(ArgumentError(""" + Failed to determine depot from srctext files in cache file $cachefile. + - Make sure you have adjusted DEPOT_PATH in case you relocated depots.""")) + elseif depot === :missing_depot_tag @debug "Missing @depot tag for include dependencies in cache file $cachefile." else for inc in includes diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl index 76d47b35aaefe..01fd7ee8e24a0 100644 --- a/test/relocatedepot.jl +++ b/test/relocatedepot.jl @@ -54,4 +54,20 @@ else end end + + @testset "throw when failed to find a depot for RelocationTestPkg" begin + load_path = copy(LOAD_PATH) + push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot")) + try + pkg = Base.identify_package("RelocationTestPkg") + touch(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg", "src", "foo.txt")) + cachefile = only(Base.find_all_in_cache_path(pkg)) + @test_throws ArgumentError(""" + Failed to determine depot from srctext files in cache file $cachefile. + - Make sure you have adjusted DEPOT_PATH in case you relocated depots.""") Base.isprecompiled(pkg) + finally + copy!(LOAD_PATH, load_path) + end + end + end From edcc5d19c7c08413c8e57035aad7ad03a836e330 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Tue, 10 Oct 2023 22:09:10 +0200 Subject: [PATCH 20/21] track mtime only for include_dependency add assert msg fixup mtime check --- base/loading.jl | 69 ++++++---- base/sysimg.jl | 4 +- src/staticdata.c | 2 +- src/staticdata_utils.c | 6 + test/.gitignore | 2 +- test/Makefile | 6 +- test/RelocationTestPkg/Project.toml | 3 - test/RelocationTestPkg1/Project.toml | 4 + .../src/RelocationTestPkg1.jl | 5 + test/RelocationTestPkg1/src/foo.txt | 0 test/RelocationTestPkg2/Project.toml | 4 + .../src/RelocationTestPkg2.jl} | 6 +- test/RelocationTestPkg2/src/foo.txt | 0 test/relocatedepot.jl | 119 +++++++++++------- 14 files changed, 150 insertions(+), 80 deletions(-) delete mode 100644 test/RelocationTestPkg/Project.toml create mode 100644 test/RelocationTestPkg1/Project.toml create mode 100644 test/RelocationTestPkg1/src/RelocationTestPkg1.jl create mode 100644 test/RelocationTestPkg1/src/foo.txt create mode 100644 test/RelocationTestPkg2/Project.toml rename test/{RelocationTestPkg/src/RelocationTestPkg.jl => RelocationTestPkg2/src/RelocationTestPkg2.jl} (52%) create mode 100644 test/RelocationTestPkg2/src/foo.txt diff --git a/base/loading.jl b/base/loading.jl index a589c49740bfb..8000c61ce172c 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1660,9 +1660,9 @@ const include_callbacks = Any[] # used to optionally track dependencies when requiring a module: const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them -const _require_dependencies = Any[] # a list of (mod, abspath, fsize, hash) tuples that are the file dependencies of the module currently being precompiled +const _require_dependencies = Any[] # a list of (mod, abspath, fsize, hash, mtime) tuples that are the file dependencies of the module currently being precompiled const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies -function _include_dependency(mod::Module, _path::AbstractString) +function _include_dependency(mod::Module, _path::AbstractString; track_content=true) prev = source_path(nothing) if prev === nothing path = abspath(_path) @@ -1671,9 +1671,15 @@ function _include_dependency(mod::Module, _path::AbstractString) end if _track_dependencies[] @lock require_lock begin - fsize = filesize(path) - hash = isfile(path) ? open(_crc32c, path, "r") : UInt32(0) - push!(_require_dependencies, (mod, path, fsize, hash)) + if track_content + @assert isfile(path) "can only hash files" + # use mtime=-1.0 here so that fsize==0 && mtime==0.0 corresponds to a missing include_dependency + push!(_require_dependencies, + (mod, path, filesize(path), open(_crc32c, path, "r"), -1.0)) + else + push!(_require_dependencies, + (mod, path, UInt64(0), UInt32(0), mtime(path))) + end end end return path, prev @@ -1684,13 +1690,13 @@ end In a module, declare that the file, directory, or symbolic link specified by `path` (relative or absolute) is a dependency for precompilation; that is, the module will need -to be recompiled if the file's size changes. +to be recompiled if the modification time of `path` changes. This is only needed if your module depends on a path that is not used via [`include`](@ref). It has no effect outside of compilation. """ function include_dependency(path::AbstractString) - _include_dependency(Main, path) + _include_dependency(Main, path, track_content=false) return nothing end @@ -1790,7 +1796,7 @@ function __require(into::Module, mod::Symbol) uuidkey, env = uuidkey_env if _track_dependencies[] path = binpack(uuidkey) - push!(_require_dependencies, (into, path, UInt64(0), UInt32(0))) + push!(_require_dependencies, (into, path, UInt64(0), UInt32(0), 0.0)) end return _require_prelocked(uuidkey, env) finally @@ -2588,6 +2594,7 @@ mutable struct CacheHeaderIncludes filename::String const fsize::UInt64 const hash::UInt32 + const mtime::Float64 const modpath::Vector{String} # seemingly not needed in Base, but used by Revise end @@ -2649,6 +2656,8 @@ function parse_cache_header(f::IO, cachefile::AbstractString) totbytes -= 8 hash = read(f, UInt32) totbytes -= 4 + mtime = read(f, Float64) + totbytes -= 8 n1 = read(f, Int32) totbytes -= 4 # map ids to keys @@ -2669,7 +2678,7 @@ function parse_cache_header(f::IO, cachefile::AbstractString) if depname[1] == '\0' push!(requires, modkey => binunpack(depname)) else - push!(includes, CacheHeaderIncludes(modkey, depname, fsize, hash, modpath)) + push!(includes, CacheHeaderIncludes(modkey, depname, fsize, hash, mtime, modpath)) end end prefs = String[] @@ -3236,13 +3245,13 @@ end # check if this file is going to provide one of our concrete dependencies # or if it provides a version that conflicts with our concrete dependencies # or neither - skip_hashcheck = false + skip_check = false for (req_key, req_build_id) in _concrete_dependencies build_id = get(modules, req_key, UInt64(0)) if build_id !== UInt64(0) build_id |= UInt128(checksum) << 64 if build_id === req_build_id - skip_hashcheck = true + skip_check = true break end @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))" @@ -3251,7 +3260,7 @@ end end # now check if this file's content hash has changed relative to its source files - if !skip_hashcheck + if !skip_check if !samefile(includes[1].filename, modpath) && !samefile(fixup_stdlib_path(includes[1].filename), modpath) @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath" return true # cache file was compiled from a different path @@ -3265,7 +3274,7 @@ end end end for chi in includes - f, fsize_req, hash_req = chi.filename, chi.fsize, chi.hash + f, fsize_req, hash_req, ftime_req = chi.filename, chi.fsize, chi.hash, chi.mtime if !ispath(f) _f = fixup_stdlib_path(f) if isfile(_f) && startswith(_f, Sys.STDLIB) @@ -3274,16 +3283,30 @@ end @debug "Rejecting stale cache file $cachefile because file $f does not exist" return true end - fsize = filesize(f) - @debug (f,fsize) - if fsize != fsize_req - @debug "Rejecting stale cache file $cachefile because file size of $f has changed (file size $fsize, before $fsize_req)" - return true - end - hash = open(_crc32c, f, "r") - if hash != hash_req - @debug "Rejecting stale cache file $cachefile because file size of $f has changed (hash $hash, before $hash_req)" - return true + if ftime_req >= 0.0 + # this is an include_dependency for which we only recorded the mtime + ftime = mtime(f) + is_stale = ( ftime != ftime_req ) && + ( ftime != floor(ftime_req) ) && # Issue #13606, PR #13613: compensate for Docker images rounding mtimes + ( ftime != ceil(ftime_req) ) && # PR: #47433 Compensate for CirceCI's truncating of timestamps in its caching + ( ftime != trunc(ftime_req, digits=6) ) && # Issue #20837, PR #20840: compensate for GlusterFS truncating mtimes to microseconds + ( ftime != 1.0 ) && # PR #43090: provide compatibility with Nix mtime. + !( 0 < (ftime_req - ftime) < 1e-6 ) # PR #45552: Compensate for Windows tar giving mtimes that may be incorrect by up to one microsecond + if is_stale + @debug "Rejecting stale cache file $cachefile because mtime of include_dependency $f has changed (mtime $ftime, before $ftime_req)" + return true + end + else + fsize = filesize(f) + if fsize != fsize_req + @debug "Rejecting stale cache file $cachefile because file size of $f has changed (file size $fsize, before $fsize_req)" + return true + end + hash = open(_crc32c, f, "r") + if hash != hash_req + @debug "Rejecting stale cache file $cachefile because hash of $f has changed (hash $hash, before $hash_req)" + return true + end end end end diff --git a/base/sysimg.jl b/base/sysimg.jl index a1309f48a4ec2..a4bf21786c633 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -59,8 +59,8 @@ let print_time(stdlib, tt) end for dep in Base._require_dependencies - mod, path, fsize = dep[1], dep[2], dep[3] - fsize == 0 && continue + mod, path, fsize, mtime = dep[1], dep[2], dep[3], dep[5] + (fsize == 0 || mtime == 0.0) && continue push!(Base._included_files, (mod, path)) end empty!(Base._require_dependencies) diff --git a/src/staticdata.c b/src/staticdata.c index 6383a3943a7ea..69226408f711b 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2691,7 +2691,7 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a write_uint8(f, jl_cache_flags()); // write description of contents (name, uuid, buildid) write_worklist_for_header(f, worklist); - // Determine unique (module, abspath, fsize, hash) dependencies for the files defining modules in the worklist + // Determine unique (module, abspath, fsize, hash, mtime) dependencies for the files defining modules in the worklist // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header // (abspath will be converted to a relocateable @depot path before writing, cf. Base.replace_depot_path). // Also write Preferences. diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 544d4820906f3..047042795d128 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -1,6 +1,11 @@ // inverse of backedges graph (caller=>callees hash) jl_array_t *edges_map JL_GLOBALLY_ROOTED = NULL; // rooted for the duration of our uses of this +static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT +{ + write_uint64(s, *((uint64_t*)&x)); +} + // Decide if `t` must be new, because it points to something new. // If it is new, the object (in particular, the super field) might not be entirely // valid for the cache, so we want to finish transforming it before attempting @@ -730,6 +735,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t ios_write(s, jl_string_data(depalias), slen); write_uint64(s, jl_unbox_uint64(jl_fieldref(deptuple, 2))); // fsize write_uint32(s, jl_unbox_uint32(jl_fieldref(deptuple, 3))); // hash + write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 4))); // mtime jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module jl_module_t *depmod_top = depmod; while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top) diff --git a/test/.gitignore b/test/.gitignore index 666c74c38ab73..fc55a0df3a173 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -3,4 +3,4 @@ /ccalltest.s /libccalltest.* /relocatedepot -/RelocationTestPkg/src/foo.txt +/RelocationTestPkg2/src/foo.txt diff --git a/test/Makefile b/test/Makefile index d575bcf2399c2..d8605341ff9b9 100644 --- a/test/Makefile +++ b/test/Makefile @@ -40,7 +40,8 @@ relocatedepot: $(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@) @mkdir $(SRCDIR)/relocatedepot @cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot - @cp -R $(SRCDIR)/RelocationTestPkg $(SRCDIR)/relocatedepot + @cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot + @cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot @cd $(SRCDIR) && \ $(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@) @@ -50,7 +51,8 @@ revise-relocatedepot: revise-% : $(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*) @mkdir $(SRCDIR)/relocatedepot @cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot - @cp -R $(SRCDIR)/RelocationTestPkg $(SRCDIR)/relocatedepot + @cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot + @cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot @cd $(SRCDIR) && \ $(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*) diff --git a/test/RelocationTestPkg/Project.toml b/test/RelocationTestPkg/Project.toml deleted file mode 100644 index fbd6b9695bbdf..0000000000000 --- a/test/RelocationTestPkg/Project.toml +++ /dev/null @@ -1,3 +0,0 @@ -name = "RelocationTestPkg" -uuid = "84019e0e-37c6-44aa-be55-38c734c0a527" -version = "0.1.0" diff --git a/test/RelocationTestPkg1/Project.toml b/test/RelocationTestPkg1/Project.toml new file mode 100644 index 0000000000000..826980207d508 --- /dev/null +++ b/test/RelocationTestPkg1/Project.toml @@ -0,0 +1,4 @@ +name = "RelocationTestPkg1" +uuid = "854e1adb-5a97-46bf-a391-1cfe05ac726d" +authors = ["flo "] +version = "0.1.0" diff --git a/test/RelocationTestPkg1/src/RelocationTestPkg1.jl b/test/RelocationTestPkg1/src/RelocationTestPkg1.jl new file mode 100644 index 0000000000000..a86543a61b3f8 --- /dev/null +++ b/test/RelocationTestPkg1/src/RelocationTestPkg1.jl @@ -0,0 +1,5 @@ +module RelocationTestPkg1 + +greet() = print("Hello World!") + +end # module RelocationTestPkg1 diff --git a/test/RelocationTestPkg1/src/foo.txt b/test/RelocationTestPkg1/src/foo.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/RelocationTestPkg2/Project.toml b/test/RelocationTestPkg2/Project.toml new file mode 100644 index 0000000000000..68da889785215 --- /dev/null +++ b/test/RelocationTestPkg2/Project.toml @@ -0,0 +1,4 @@ +name = "RelocationTestPkg2" +uuid = "8d933983-b090-4b0b-a37e-c34793f459d1" +authors = ["flo "] +version = "0.1.0" diff --git a/test/RelocationTestPkg/src/RelocationTestPkg.jl b/test/RelocationTestPkg2/src/RelocationTestPkg2.jl similarity index 52% rename from test/RelocationTestPkg/src/RelocationTestPkg.jl rename to test/RelocationTestPkg2/src/RelocationTestPkg2.jl index e584688ae6baa..0d8b5e15edf06 100644 --- a/test/RelocationTestPkg/src/RelocationTestPkg.jl +++ b/test/RelocationTestPkg2/src/RelocationTestPkg2.jl @@ -1,8 +1,6 @@ -module RelocationTestPkg - +module RelocationTestPkg2 include_dependency("foo.txt") greet() = print("Hello World!") - -end # module RelocationTestPkg +end # module RelocationTestPkg2 diff --git a/test/RelocationTestPkg2/src/foo.txt b/test/RelocationTestPkg2/src/foo.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl index 01fd7ee8e24a0..91f03d59cc168 100644 --- a/test/relocatedepot.jl +++ b/test/relocatedepot.jl @@ -5,68 +5,99 @@ using Logging include("testenv.jl") +function test_harness(@nospecialize(fn)) + load_path = copy(LOAD_PATH) + depot_path = copy(DEPOT_PATH) + try + fn() + finally + copy!(LOAD_PATH, load_path) + copy!(DEPOT_PATH, depot_path) + end +end + + if !test_relocated_depot - @testset "precompile RelocationTestPkg" begin - load_path = copy(LOAD_PATH) - depot_path = copy(DEPOT_PATH) - push!(LOAD_PATH, @__DIR__) - push!(DEPOT_PATH, @__DIR__) - try - pkg = Base.identify_package("RelocationTestPkg") + @testset "precompile RelocationTestPkg1" begin + pkgname = "RelocationTestPkg1" + test_harness() do + push!(LOAD_PATH, @__DIR__) + push!(DEPOT_PATH, @__DIR__) + pkg = Base.identify_package(pkgname) cachefiles = Base.find_all_in_cache_path(pkg) rm.(cachefiles, force=true) - rm(joinpath(@__DIR__, "RelocationTestPkg", "src", "foo.txt"), force=true) - @test Base.isprecompiled(pkg) == false # include_dependency foo.txt is missing + @test Base.isprecompiled(pkg) == false Base.require(pkg) # precompile - @test Base.isprecompiled(pkg, ignore_loaded=true) == false # foo.txt still missing - touch(joinpath(@__DIR__, "RelocationTestPkg", "src", "foo.txt")) @test Base.isprecompiled(pkg, ignore_loaded=true) == true - finally - copy!(LOAD_PATH, load_path) - copy!(DEPOT_PATH, depot_path) + end + end + + @testset "precompile RelocationTestPkg2 (contains include_dependency)" begin + pkgname = "RelocationTestPkg2" + test_harness() do + push!(LOAD_PATH, @__DIR__) + push!(DEPOT_PATH, @__DIR__) + pkg = Base.identify_package(pkgname) + cachefiles = Base.find_all_in_cache_path(pkg) + rm.(cachefiles, force=true) + @test Base.isprecompiled(pkg) == false + touch(joinpath(@__DIR__, pkgname, "src", "foo.txt")) + Base.require(pkg) # precompile + @info "SERS OIDA" + @test Base.isprecompiled(pkg, ignore_loaded=true) == true end end else - @testset "load stdlib from test/relocatedepot" begin - # stdlib should be already precompiled - pkg = Base.identify_package("DelimitedFiles") - @test Base.isprecompiled(pkg, ignore_loaded=true) == true + # must come before any of the load tests, because the will recompile and generate new cache files + @testset "attempt loading precompiled pkgs when depot is missing" begin + test_harness() do + empty!(LOAD_PATH) + push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot")) + for pkgname in ("RelocationTestPkg1", "RelocationTestPkg2") + pkg = Base.identify_package(pkgname) + cachefile = only(Base.find_all_in_cache_path(pkg)) + @info cachefile + @test_throws ArgumentError(""" + Failed to determine depot from srctext files in cache file $cachefile. + - Make sure you have adjusted DEPOT_PATH in case you relocated depots.""") Base.isprecompiled(pkg) + end + end end - @testset "load RelocationTestPkg from test/relocatedepot" begin - load_path = copy(LOAD_PATH) - depot_path = copy(DEPOT_PATH) - # moved source and depot into test/relocatedepot - push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot")) - push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) - try - pkg = Base.identify_package("RelocationTestPkg") - rm(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg", "src", "foo.txt"), force=true) - @test Base.isprecompiled(pkg, ignore_loaded=true) == false # foo.txt missing - touch(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg", "src", "foo.txt")) + @testset "load stdlib from test/relocatedepot" begin + test_harness() do + push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot")) + push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) + # stdlib should be already precompiled + pkg = Base.identify_package("DelimitedFiles") @test Base.isprecompiled(pkg) == true - finally - copy!(LOAD_PATH, load_path) - copy!(DEPOT_PATH, depot_path) end end + @testset "load RelocationTestPkg1 from test/relocatedepot" begin + pkgname = "RelocationTestPkg1" + test_harness() do + push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot")) + push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) + pkg = Base.identify_package(pkgname) + @test Base.isprecompiled(pkg) == true + Base.require(pkg) # re-precompile + @test Base.isprecompiled(pkg) == true + end + end - @testset "throw when failed to find a depot for RelocationTestPkg" begin - load_path = copy(LOAD_PATH) - push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot")) - try - pkg = Base.identify_package("RelocationTestPkg") - touch(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg", "src", "foo.txt")) - cachefile = only(Base.find_all_in_cache_path(pkg)) - @test_throws ArgumentError(""" - Failed to determine depot from srctext files in cache file $cachefile. - - Make sure you have adjusted DEPOT_PATH in case you relocated depots.""") Base.isprecompiled(pkg) - finally - copy!(LOAD_PATH, load_path) + @testset "load RelocationTestPkg2 (contains include_dependency) from test/relocatedepot" begin + pkgname = "RelocationTestPkg2" + test_harness() do + push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot")) + push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) + pkg = Base.identify_package(pkgname) + @test Base.isprecompiled(pkg) == false # moving depot changes mtime of include_dependency + Base.require(pkg) # re-precompile + @test Base.isprecompiled(pkg) == true end end From 4e705a7d371fdf9415e7087f24452f08b9e9bee3 Mon Sep 17 00:00:00 2001 From: Florian Atteneder Date: Tue, 17 Oct 2023 23:03:28 +0200 Subject: [PATCH 21/21] update docs --- doc/src/manual/modules.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/src/manual/modules.md b/doc/src/manual/modules.md index b329dbc91b923..4be08edc56f38 100644 --- a/doc/src/manual/modules.md +++ b/doc/src/manual/modules.md @@ -447,10 +447,12 @@ recompiled upon `using` or `import`. Dependencies are modules it imports, the Julia build, files it includes, or explicit dependencies declared by [`include_dependency(path)`](@ref) in the module file(s). -For file dependencies, a change is determined by examining whether the modification time (`mtime`) -of each file loaded by `include` or added explicitly by `include_dependency` is unchanged, or equal -to the modification time truncated to the nearest second (to accommodate systems that can't copy -mtime with sub-second accuracy). It also takes into account whether the path to the file chosen +For file dependencies loaded by `include`, a change is determined by examining whether the +file size (`fsize`) or content (condensed into a hash) is unchanged. +For file dependencies loaded by `include_dependency` a change is determined by examining whether the modification time (`mtime`) +is unchanged, or equal to the modification time truncated to the nearest second +(to accommodate systems that can't copy mtime with sub-second accuracy). +It also takes into account whether the path to the file chosen by the search logic in `require` matches the path that had created the precompile file. It also takes into account the set of dependencies already loaded into the current process and won't recompile those modules, even if their files change or disappear, in order to avoid creating incompatibilities between