From a5423448adf1ecd5f7defa53d5e75c97ff76f5aa Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Mon, 9 Aug 2021 14:55:18 -0500 Subject: [PATCH 1/2] Protect crate metadata from corruption via SHA-256 hash We now compute a SHA-256 of the raw (encoded) crate metadata, and append it to the final crate metadata that we store on disk. When we load the metadata, we compute the hash from the metadata blob, and verify that matches the hash stored at the end of the blob. This allows us to detect on-disk corruption of the metadata file, which might later cause a build failure much later in compilation. If anyone is manually editing crate metadata on-disk, they will need to re-compute and modify the hash at the end of the blob. This will allow us to determine whether or not crate metadata corruption is causing some of the unusual incr-comp failures we've been seeing. The incremental compilation data itself will be hashed in a follow-up PR. --- Cargo.lock | 1 + compiler/rustc_metadata/Cargo.toml | 1 + compiler/rustc_metadata/src/locator.rs | 2 +- compiler/rustc_metadata/src/rmeta/decoder.rs | 24 ++++++++++++++++++-- compiler/rustc_metadata/src/rmeta/encoder.rs | 11 +++++++-- 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 940608975c5ec..1fe6e77e40d01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3994,6 +3994,7 @@ dependencies = [ "rustc_session", "rustc_span", "rustc_target", + "sha2", "smallvec", "snap", "tracing", diff --git a/compiler/rustc_metadata/Cargo.toml b/compiler/rustc_metadata/Cargo.toml index 7c79aa5e00bba..14b4b35bbb646 100644 --- a/compiler/rustc_metadata/Cargo.toml +++ b/compiler/rustc_metadata/Cargo.toml @@ -10,6 +10,7 @@ doctest = false libc = "0.2" snap = "1" tracing = "0.1" +sha2 = "0.9" smallvec = { version = "1.6.1", features = ["union", "may_dangle"] } rustc_middle = { path = "../rustc_middle" } rustc_attr = { path = "../rustc_attr" } diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index 8d1bf6f55df09..b80c2dfbcd519 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -773,7 +773,7 @@ fn get_metadata_section( rustc_erase_owner!(OwningRef::new(mmap).map_owner_box()) } }; - let blob = MetadataBlob::new(raw_bytes); + let blob = MetadataBlob::new(raw_bytes, filename); if blob.is_compatible() { Ok(blob) } else { diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 8bdd4313de4c3..31b0a504ee470 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -36,6 +36,7 @@ use rustc_span::symbol::{sym, Ident, Symbol}; use rustc_span::{self, BytePos, ExpnId, Pos, Span, SyntaxContext, DUMMY_SP}; use proc_macro::bridge::client::ProcMacro; +use sha2::{Digest, Sha256}; use std::io; use std::mem; use std::num::NonZeroUsize; @@ -583,8 +584,10 @@ where implement_ty_decoder!(DecodeContext<'a, 'tcx>); impl MetadataBlob { - crate fn new(metadata_ref: MetadataRef) -> MetadataBlob { - MetadataBlob(metadata_ref) + crate fn new(metadata_ref: MetadataRef, filename: &Path) -> MetadataBlob { + let blob = MetadataBlob(metadata_ref); + blob.check_hash(filename); + blob } crate fn is_compatible(&self) -> bool { @@ -596,6 +599,23 @@ impl MetadataBlob { .decode(self) } + // Hashes the entire contents of the metadata blob, + // panicking if the computed hash is not equal to + // the original hash stored in the file. + fn check_hash(&self, filename: &Path) { + // We store our 32-byte (256-bit) SHA256 hash at + // the end of the file + let hash_offset = self.raw_bytes().len() - 32; + let stored_hash = &self.raw_bytes()[hash_offset..]; + let recomputed_hash = Sha256::digest(&self.raw_bytes()[..hash_offset]); + if stored_hash != &*recomputed_hash { + panic!( + "Expected metadata hash {:?}, found {:?} for file {:?}", + stored_hash, recomputed_hash, filename + ); + } + } + crate fn get_root(&self) -> CrateRoot<'tcx> { let slice = self.raw_bytes(); let offset = METADATA_HEADER.len(); diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index b7921c403bb7b..76af8c3e71f8d 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -35,6 +35,7 @@ use rustc_span::{ RealFileName, }; use rustc_target::abi::VariantIdx; +use sha2::{Digest, Sha256}; use std::hash::Hash; use std::num::NonZeroUsize; use std::path::Path; @@ -2144,7 +2145,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata { // culminating in the `CrateRoot` which points to all of it. let root = ecx.encode_crate_root(); - let mut result = ecx.opaque.into_inner(); + let result = &mut ecx.opaque.data; // Encode the root position. let header = METADATA_HEADER.len(); @@ -2154,5 +2155,11 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata { result[header + 2] = (pos >> 8) as u8; result[header + 3] = (pos >> 0) as u8; - EncodedMetadata { raw_data: result } + // Encode a hash of the file contents. We check + // this when decoding the blob, to protect + // against file corruption. + let hash = Sha256::digest(&result); + ecx.opaque.emit_raw_bytes(&hash).unwrap(); + + EncodedMetadata { raw_data: ecx.opaque.into_inner() } } From 7ee897297117c61d335392eb0463cb19ef38b63f Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Thu, 12 Aug 2021 14:06:43 -0500 Subject: [PATCH 2/2] Disable hash checking --- compiler/rustc_metadata/src/rmeta/decoder.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 31b0a504ee470..de7f588b81155 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -603,6 +603,9 @@ impl MetadataBlob { // panicking if the computed hash is not equal to // the original hash stored in the file. fn check_hash(&self, filename: &Path) { + if true { + return; + } // We store our 32-byte (256-bit) SHA256 hash at // the end of the file let hash_offset = self.raw_bytes().len() - 32;