Skip to content

Commit

Permalink
fix: track version in fingerprint dep-info files
Browse files Browse the repository at this point in the history
Encodes the version information into Cargo's fingerprint dep-info files,
so that when the format encoding changes in the future,
Cargo understands a dep-info file was outdated and doesn't bother parsing it.

Since there was no version info encoded in the old format (call it v0),
to be compatible with older cargoes,
this PR works around it with a horrible hack.
It is explained in the doc comment of `EncodedDepInfo`.
  • Loading branch information
weihanglo committed Oct 30, 2024
1 parent 6f2b5d9 commit 5c17b23
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 6 deletions.
6 changes: 6 additions & 0 deletions crates/cargo-test-support/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1597,6 +1597,12 @@ pub fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path,
assert!(files.next().is_none(), "expected only 1 dep-info file");
let dep_info = fs::read(&info_path).unwrap();
let dep_info = &mut &dep_info[..];

// Consume the magic marker and version. Here they don't really matter.
read_usize(dep_info);
read_u8(dep_info);
read_u8(dep_info);

let deps = (0..read_usize(dep_info))
.map(|_| {
let ty = read_u8(dep_info);
Expand Down
68 changes: 62 additions & 6 deletions src/cargo/core/compiler/fingerprint/dep_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ use cargo_util::Sha256;
use crate::CargoResult;
use crate::CARGO_ENV;

/// The current format version of [`EncodedDepInfo`].
const CURRENT_ENCODED_DEP_INFO_VERSION: u8 = 1;

/// The representation of the `.d` dep-info file generated by rustc
#[derive(Default)]
pub struct RustcDepInfo {
Expand Down Expand Up @@ -61,20 +64,35 @@ pub enum DepInfoPathType {
/// Currently the format looks like:
///
/// ```text
/// +------------+------------+---------------+---------------+
/// | # of files | file paths | # of env vars | env var pairs |
/// +------------+------------+---------------+---------------+
/// +--------+---------+------------+------------+---------------+---------------+
/// | marker | version | # of files | file paths | # of env vars | env var pairs |
/// +--------+---------+------------+------------+---------------+---------------+
/// ```
///
/// Each field represents
///
/// * _Marker_ --- A magic marker to ensure older Cargoes that only recognize
/// format v0 (prior to checksum support in [`f4ca7390`]) not go further
/// parsing newer formats. Since [`EncodedDepInfo`] is just an optimization,
/// to avoid adding any complexity, Cargo recognizes only one version of
/// [`CURRENT_ENCODED_DEP_INFO_VERSION`].
/// The current layout looks like
/// ```text
/// +----------------------------+
/// | [0x01 0x00 0x00 0x00 0xff] |
/// +----------------------------+
/// ```
/// These bytes will be interpreted as "one file tracked and invalid
/// [`DepInfoPathType`] variant with 255" by older Cargoes, so they will just
/// stop parsing. This could prevent some bad parsing in rust-lang/cargo#14712.
/// * _Version_ --- The current format version.
/// * _Number of files/envs_ --- A `u32` representing the number of things.
/// * _File paths_ --- Zero or more paths of files the dep-info file depends on.
/// Each path is encoded as the following:
///
/// ```text
/// +-----------+-------------+------------+---------------+-----------+-------+
/// | Path type | len of path | path bytes | cksum exists? | file size | cksum |
/// | path type | len of path | path bytes | cksum exists? | file size | cksum |
/// +-----------+-------------+------------+---------------+-----------+-------+
/// ```
/// * _Env var pairs_ --- Zero or more env vars the dep-info file depends on.
Expand All @@ -84,6 +102,8 @@ pub enum DepInfoPathType {
/// | len of key | key bytes | value exists? | len of value | value bytes |
/// +------------+-----------+---------------+--------------+-------------+
/// ```
///
/// [`f4ca7390`]: https://github.com/rust-lang/cargo/commit/f4ca739073185ea5e1148ff100bb4a06d3bf721d
#[derive(Default, Debug, PartialEq, Eq)]
pub struct EncodedDepInfo {
pub files: Vec<(DepInfoPathType, PathBuf, Option<(u64, String)>)>,
Expand All @@ -93,6 +113,12 @@ pub struct EncodedDepInfo {
impl EncodedDepInfo {
pub fn parse(mut bytes: &[u8]) -> Option<EncodedDepInfo> {
let bytes = &mut bytes;
read_magic_marker(bytes)?;
let version = read_u8(bytes)?;
if version != CURRENT_ENCODED_DEP_INFO_VERSION {
return None;
}

let nfiles = read_usize(bytes)?;
let mut files = Vec::with_capacity(nfiles);
for _ in 0..nfiles {
Expand Down Expand Up @@ -129,6 +155,18 @@ impl EncodedDepInfo {
}
return Some(EncodedDepInfo { files, env });

/// See [`EncodedDepInfo`] for why a magic marker exists.
fn read_magic_marker(bytes: &mut &[u8]) -> Option<()> {
let _size = read_usize(bytes)?;
let path_type = read_u8(bytes)?;
if path_type != u8::MAX {
// Old depinfo. Give up parsing it.
return None;
} else {
Some(())
}
}

fn read_usize(bytes: &mut &[u8]) -> Option<usize> {
let ret = bytes.get(..4)?;
*bytes = &bytes[4..];
Expand Down Expand Up @@ -162,6 +200,10 @@ impl EncodedDepInfo {
pub fn serialize(&self) -> CargoResult<Vec<u8>> {
let mut ret = Vec::new();
let dst = &mut ret;

write_magic_marker(dst);
dst.push(CURRENT_ENCODED_DEP_INFO_VERSION);

write_usize(dst, self.files.len());
for (ty, file, checksum_info) in self.files.iter() {
match ty {
Expand Down Expand Up @@ -189,6 +231,14 @@ impl EncodedDepInfo {
}
return Ok(ret);

/// See [`EncodedDepInfo`] for why a magic marker exists.
///
/// There is an assumption that there is always at least a file.
fn write_magic_marker(dst: &mut Vec<u8>) {
write_usize(dst, 1);
dst.push(u8::MAX);
}

fn write_bytes(dst: &mut Vec<u8>, val: impl AsRef<[u8]>) {
let val = val.as_ref();
write_usize(dst, val.len());
Expand Down Expand Up @@ -673,7 +723,7 @@ mod encoded_dep_info {
#[rustfmt::skip]
let data = [
0x01, 0x00, 0x00, 0x00, 0xff, // magic marker
0x01, // version
CURRENT_ENCODED_DEP_INFO_VERSION, // version
0x01, 0x00, 0x00, 0x00, // # of files
0x00, // path type
0x04, 0x00, 0x00, 0x00, // len of path
Expand All @@ -682,7 +732,13 @@ mod encoded_dep_info {
0x00, 0x00, 0x00, 0x00, // # of env vars
];
// The current cargo doesn't recognize the magic marker.
assert!(EncodedDepInfo::parse(&data).is_none());
assert_eq!(
EncodedDepInfo::parse(&data).unwrap(),
EncodedDepInfo {
files: vec![(DepInfoPathType::PackageRootRelative, "rust".into(), None)],
env: Vec::new(),
}
);
}

#[test]
Expand Down

0 comments on commit 5c17b23

Please sign in to comment.