Skip to content

Commit

Permalink
Auto merge of #120855 - bjorn3:split_metadata4, r=<try>
Browse files Browse the repository at this point in the history
Introduce -Zsplit-metadata option

This will split the crate metadata out of library files. Instead only the svh and a bit of extra metadata is preserved to allow for loading the right rmeta file. This significantly reduces library size. In addition it allows for cheaper checks if different library files are the same crate.

A fair amount of the complexity in this PR is to work around the fact that cargo doesn't directly support this option yet.

Fixes #23366
Fixes #57076

Revives #93945
  • Loading branch information
bors committed Feb 9, 2024
2 parents e28fae5 + 7770002 commit 8fb65ac
Show file tree
Hide file tree
Showing 22 changed files with 212 additions and 120 deletions.
1 change: 1 addition & 0 deletions compiler/rustc_codegen_cranelift/build_system/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ impl<'a> TestRunner<'a> {
cmd.arg("-Zunstable-options");
cmd.arg("--check-cfg=cfg(no_unstable_features)");
cmd.arg("--check-cfg=cfg(jit)");
cmd.arg("--emit=metadata,link");
cmd.args(args);
cmd
}
Expand Down
7 changes: 5 additions & 2 deletions compiler/rustc_codegen_ssa/src/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,11 @@ fn link_rlib<'a>(

let trailing_metadata = match flavor {
RlibFlavor::Normal => {
let (metadata, metadata_position) =
create_wrapper_file(sess, b".rmeta".to_vec(), codegen_results.metadata.raw_data());
let (metadata, metadata_position) = create_wrapper_file(
sess,
b".rmeta".to_vec(),
codegen_results.metadata.maybe_reference(),
);
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
match metadata_position {
MetadataPosition::First => {
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_ssa/src/back/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,8 +528,8 @@ pub fn create_compressed_metadata_file(
symbol_name: &str,
) -> Vec<u8> {
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.raw_data());
packed_metadata.write_all(&(metadata.maybe_reference().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.maybe_reference());

let Some(mut file) = create_object_file(sess) else {
return packed_metadata.to_vec();
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_driver_impl/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,7 @@ fn list_metadata(early_dcx: &EarlyDiagCtxt, sess: &Session, metadata_loader: &dy
metadata_loader,
&mut v,
&sess.opts.unstable_opts.ls,
sess.cfg_version,
)
.unwrap();
safe_println!("{}", String::from_utf8(v).unwrap());
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_interface/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,7 @@ fn test_unstable_options_tracking_hash() {
untracked!(shell_argfiles, true);
untracked!(span_debug, true);
untracked!(span_free_formats, true);
untracked!(split_metadata, true);
untracked!(temps_dir, Some(String::from("abc")));
untracked!(threads, 99);
untracked!(time_llvm_passes, true);
Expand Down
15 changes: 10 additions & 5 deletions compiler/rustc_metadata/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
let metadata_reference_filename = metadata_tmpdir.as_ref().join("ref.rmeta");

// Always create a file at `metadata_filename`, even if we have nothing to write to it.
// This simplifies the creation of the output `out_filename` when requested.
Expand All @@ -60,9 +61,12 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
});
std::fs::File::create(&metadata_reference_filename).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
});
}
MetadataKind::Uncompressed | MetadataKind::Compressed => {
encode_metadata(tcx, &metadata_filename);
encode_metadata(tcx, &metadata_filename, &metadata_reference_filename)
}
};

Expand Down Expand Up @@ -100,9 +104,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {

// Load metadata back to memory: codegen may need to include it in object files.
let metadata =
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
});
EncodedMetadata::from_path(metadata_filename, metadata_reference_filename, metadata_tmpdir)
.unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
});

let need_metadata_module = metadata_kind == MetadataKind::Compressed;

Expand Down
131 changes: 62 additions & 69 deletions compiler/rustc_metadata/src/locator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ use std::{cmp, fmt};
pub(crate) struct CrateLocator<'a> {
// Immutable per-session configuration.
only_needs_metadata: bool,
sysroot: &'a Path,
metadata_loader: &'a dyn MetadataLoader,
cfg_version: &'static str,

Expand Down Expand Up @@ -319,7 +318,6 @@ impl<'a> CrateLocator<'a> {

CrateLocator {
only_needs_metadata,
sysroot: &sess.sysroot,
metadata_loader,
cfg_version: sess.cfg_version,
crate_name,
Expand Down Expand Up @@ -569,31 +567,47 @@ impl<'a> CrateLocator<'a> {
debug!("skipping empty file");
continue;
}
let (hash, metadata) =
match get_metadata_section(self.target, flavor, &lib, self.metadata_loader) {
Ok(blob) => {
if let Some(h) = self.crate_matches(&blob, &lib) {
(h, blob)
} else {
info!("metadata mismatch");
continue;
}
}
Err(MetadataError::LoadFailure(err)) => {
info!("no metadata found: {}", err);
// The file was present and created by the same compiler version, but we
// couldn't load it for some reason. Give a hard error instead of silently
// ignoring it, but only if we would have given an error anyway.
self.crate_rejections
.via_invalid
.push(CrateMismatch { path: lib, got: err });
continue;
}
Err(err @ MetadataError::NotPresent(_)) => {
info!("no metadata found: {}", err);
let (hash, metadata) = match get_metadata_section(
self.target,
flavor,
&lib,
self.metadata_loader,
self.cfg_version,
) {
Ok(blob) => {
if let Some(h) = self.crate_matches(&blob, &lib) {
(h, blob)
} else {
info!("metadata mismatch");
continue;
}
};
}
Err(MetadataError::VersionMismatch { expected_version, found_version }) => {
// The file was present and created by the same compiler version, but we
// couldn't load it for some reason. Give a hard error instead of silently
// ignoring it, but only if we would have given an error anyway.
info!(
"Rejecting via version: expected {} got {}",
expected_version, found_version
);
self.crate_rejections
.via_version
.push(CrateMismatch { path: lib, got: found_version });
continue;
}
Err(MetadataError::LoadFailure(err)) => {
info!("no metadata found: {}", err);
// The file was present and created by the same compiler version, but we
// couldn't load it for some reason. Give a hard error instead of silently
// ignoring it, but only if we would have given an error anyway.
self.crate_rejections.via_invalid.push(CrateMismatch { path: lib, got: err });
continue;
}
Err(err @ MetadataError::NotPresent(_)) => {
info!("no metadata found: {}", err);
continue;
}
};
// If we see multiple hashes, emit an error about duplicate candidates.
if slot.as_ref().is_some_and(|s| s.0 != hash) {
if let Some(candidates) = err_data {
Expand All @@ -610,33 +624,11 @@ impl<'a> CrateLocator<'a> {
continue;
}

// Ok so at this point we've determined that `(lib, kind)` above is
// a candidate crate to load, and that `slot` is either none (this
// is the first crate of its kind) or if some the previous path has
// the exact same hash (e.g., it's the exact same crate).
//
// In principle these two candidate crates are exactly the same so
// we can choose either of them to link. As a stupidly gross hack,
// however, we favor crate in the sysroot.
//
// You can find more info in rust-lang/rust#39518 and various linked
// issues, but the general gist is that during testing libstd the
// compilers has two candidates to choose from: one in the sysroot
// and one in the deps folder. These two crates are the exact same
// crate but if the compiler chooses the one in the deps folder
// it'll cause spurious errors on Windows.
//
// As a result, we favor the sysroot crate here. Note that the
// candidates are all canonicalized, so we canonicalize the sysroot
// as well.
if let Some((prev, _)) = &ret {
let sysroot = self.sysroot;
let sysroot = try_canonicalize(sysroot).unwrap_or_else(|_| sysroot.to_path_buf());
if prev.starts_with(&sysroot) {
continue;
}
if !metadata.get_header().is_reference {
// FIXME nicer error when only an rlib or dylib with is_reference is found
// and no .rmeta?
*slot = Some((hash, metadata, lib.clone()));
}
*slot = Some((hash, metadata, lib.clone()));
ret = Some((lib, kind));
}

Expand All @@ -648,16 +640,6 @@ impl<'a> CrateLocator<'a> {
}

fn crate_matches(&mut self, metadata: &MetadataBlob, libpath: &Path) -> Option<Svh> {
let rustc_version = rustc_version(self.cfg_version);
let found_version = metadata.get_rustc_version();
if found_version != rustc_version {
info!("Rejecting via version: expected {} got {}", rustc_version, found_version);
self.crate_rejections
.via_version
.push(CrateMismatch { path: libpath.to_path_buf(), got: found_version });
return None;
}

let header = metadata.get_header();
if header.is_proc_macro_crate != self.is_proc_macro {
info!(
Expand Down Expand Up @@ -736,10 +718,12 @@ impl<'a> CrateLocator<'a> {
let loc_canon = loc.canonicalized().clone();
let loc = loc.original();
if loc.file_name().unwrap().to_str().unwrap().ends_with(".rlib") {
rmetas.insert(loc_canon.with_extension("rmeta"), PathKind::ExternFlag);
rlibs.insert(loc_canon, PathKind::ExternFlag);
} else if loc.file_name().unwrap().to_str().unwrap().ends_with(".rmeta") {
rmetas.insert(loc_canon, PathKind::ExternFlag);
} else {
rmetas.insert(loc_canon.with_extension("rmeta"), PathKind::ExternFlag);
dylibs.insert(loc_canon, PathKind::ExternFlag);
}
} else {
Expand Down Expand Up @@ -770,6 +754,7 @@ fn get_metadata_section<'p>(
flavor: CrateFlavor,
filename: &'p Path,
loader: &dyn MetadataLoader,
cfg_version: &'static str,
) -> Result<MetadataBlob, MetadataError<'p>> {
if !filename.exists() {
return Err(MetadataError::NotPresent(filename));
Expand Down Expand Up @@ -847,13 +832,12 @@ fn get_metadata_section<'p>(
}
};
let blob = MetadataBlob(raw_bytes);
if blob.is_compatible() {
Ok(blob)
} else {
Err(MetadataError::LoadFailure(format!(
"invalid metadata version found: {}",
filename.display()
)))
match blob.check_compatibility(cfg_version) {
Ok(()) => Ok(blob),
Err(version) => Err(MetadataError::VersionMismatch {
expected_version: cfg_version,
found_version: version,
}),
}
}

Expand All @@ -864,9 +848,10 @@ pub fn list_file_metadata(
metadata_loader: &dyn MetadataLoader,
out: &mut dyn Write,
ls_kinds: &[String],
cfg_version: &'static str,
) -> IoResult<()> {
let flavor = get_flavor_from_path(path);
match get_metadata_section(target, flavor, path, metadata_loader) {
match get_metadata_section(target, flavor, path, metadata_loader, cfg_version) {
Ok(metadata) => metadata.list_crate_metadata(out, ls_kinds),
Err(msg) => write!(out, "{msg}\n"),
}
Expand Down Expand Up @@ -932,6 +917,8 @@ enum MetadataError<'a> {
NotPresent(&'a Path),
/// The file was present and invalid.
LoadFailure(String),
/// The file was present, but compiled with a different rustc version.
VersionMismatch { expected_version: &'static str, found_version: String },
}

impl fmt::Display for MetadataError<'_> {
Expand All @@ -941,6 +928,12 @@ impl fmt::Display for MetadataError<'_> {
f.write_str(&format!("no such file: '{}'", filename.display()))
}
MetadataError::LoadFailure(msg) => f.write_str(msg),
MetadataError::VersionMismatch { expected_version, found_version } => {
f.write_str(&format!(
"rustc version mismatch. expected {}, found {}",
expected_version, found_version,
))
}
}
}
}
Expand Down
22 changes: 16 additions & 6 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -680,13 +680,23 @@ impl<'a, 'tcx, I: Idx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyTable<I, T>
implement_ty_decoder!(DecodeContext<'a, 'tcx>);

impl MetadataBlob {
pub(crate) fn is_compatible(&self) -> bool {
self.blob().starts_with(METADATA_HEADER)
}
pub(crate) fn check_compatibility(&self, cfg_version: &'static str) -> Result<(), String> {
if !self.blob().starts_with(METADATA_HEADER) {
if self.blob().starts_with(b"rust") {
return Err("<unknown rustc version>".to_string());
}
return Err("<invalid metadata header>".to_string());
}

pub(crate) fn get_rustc_version(&self) -> String {
LazyValue::<String>::from_position(NonZeroUsize::new(METADATA_HEADER.len() + 8).unwrap())
.decode(self)
let found_version = LazyValue::<String>::from_position(
NonZeroUsize::new(METADATA_HEADER.len() + 8).unwrap(),
)
.decode(self);
if rustc_version(cfg_version) != found_version {
return Err(found_version);
}

Ok(())
}

fn root_pos(&self) -> NonZeroUsize {
Expand Down
Loading

0 comments on commit 8fb65ac

Please sign in to comment.