diff --git a/libs/Cargo.lock b/libs/Cargo.lock index 6e658cf8..f50c914a 100644 --- a/libs/Cargo.lock +++ b/libs/Cargo.lock @@ -175,6 +175,12 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arrayref" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" + [[package]] name = "arrayvec" version = "0.7.6" @@ -308,6 +314,19 @@ dependencies = [ "serde", ] +[[package]] +name = "blake3" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -1794,6 +1813,17 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "merkle_hash" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3edd3572d1a7b4e1b7ce5bb3af05405a8aeab2ec04b29d9779e72ad576ce4f38" +dependencies = [ + "blake3", + "camino", + "rayon", +] + [[package]] name = "miette" version = "7.2.0" @@ -2361,6 +2391,7 @@ dependencies = [ "indexmap", "itertools 0.12.1", "la-arena", + "merkle_hash", "miette", "num_cpus", "once_cell", diff --git a/libs/Cargo.toml b/libs/Cargo.toml index ec298f9e..27cefed5 100644 --- a/libs/Cargo.toml +++ b/libs/Cargo.toml @@ -57,6 +57,7 @@ libtest-mimic = "0.7.0" liquid = "0.26.4" liquid-core = "0.26.4" matchit = "0.7" +merkle_hash = "3.7.0" miette = "7.2.0" mime = "0.3" num_cpus = "1.15.0" diff --git a/libs/pavexc/Cargo.toml b/libs/pavexc/Cargo.toml index 03171fe3..4ec76a2d 100644 --- a/libs/pavexc/Cargo.toml +++ b/libs/pavexc/Cargo.toml @@ -52,6 +52,7 @@ semver = { workspace = true } persist_if_changed = { path = "../persist_if_changed", version = "0.1.48" } matchit = { version = "0.7", package = "pavex_matchit" } relative-path = { workspace = true } +merkle_hash = { workspace = true } # Sqlite cache xdg-home = { workspace = true } diff --git a/libs/pavexc/src/rustdoc/compute/cache.rs b/libs/pavexc/src/rustdoc/compute/cache.rs index 3a6b6e3c..8e802881 100644 --- a/libs/pavexc/src/rustdoc/compute/cache.rs +++ b/libs/pavexc/src/rustdoc/compute/cache.rs @@ -7,6 +7,7 @@ use guppy::{ PackageId, }; use itertools::Itertools; +use merkle_hash::Encodable; use r2d2_sqlite::SqliteConnectionManager; use rusqlite::params; use tracing::instrument; @@ -376,6 +377,7 @@ impl ThirdPartyCrateCache { WHERE crate_name = ? AND crate_source = ? AND crate_version = ? AND + crate_hash = ? AND cargo_fingerprint = ? AND rustdoc_options = ? AND default_feature_is_enabled = ? AND @@ -387,6 +389,7 @@ impl ThirdPartyCrateCache { cache_key.crate_name, cache_key.crate_source, cache_key.crate_version, + cache_key.crate_hash, cache_key.cargo_fingerprint, cache_key.rustdoc_options, cache_key.default_feature_is_enabled, @@ -450,6 +453,7 @@ impl ThirdPartyCrateCache { crate_name, crate_source, crate_version, + crate_hash, cargo_fingerprint, rustdoc_options, default_feature_is_enabled, @@ -464,12 +468,13 @@ impl ThirdPartyCrateCache { id2private_import_paths, import_path2id, re_exports - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", )?; stmt.execute(params![ cache_key.crate_name, cache_key.crate_source, cache_key.crate_version, + cache_key.crate_hash, cache_key.cargo_fingerprint, cache_key.rustdoc_options, cache_key.default_feature_is_enabled, @@ -494,6 +499,7 @@ impl ThirdPartyCrateCache { crate_name TEXT NOT NULL, crate_source TEXT NOT NULL, crate_version TEXT NOT NULL, + crate_hash TEXT, cargo_fingerprint TEXT NOT NULL, rustdoc_options TEXT NOT NULL, default_feature_is_enabled INTEGER NOT NULL, @@ -508,7 +514,7 @@ impl ThirdPartyCrateCache { id2private_import_paths BLOB NOT NULL, import_path2id BLOB NOT NULL, re_exports BLOB NOT NULL, - PRIMARY KEY (crate_name, crate_source, crate_version, cargo_fingerprint, rustdoc_options, default_feature_is_enabled, active_named_features) + PRIMARY KEY (crate_name, crate_source, crate_version, crate_hash, cargo_fingerprint, rustdoc_options, default_feature_is_enabled, active_named_features) )", [] )?; @@ -640,6 +646,9 @@ pub(super) struct ThirdPartyCrateCacheKey<'a> { pub crate_name: &'a str, pub crate_source: &'a str, pub crate_version: String, + /// The hash of the crate's source code, computed via BLAKE3. + /// It is only populated for path dependencies. + pub crate_hash: Option, pub cargo_fingerprint: &'a str, pub rustdoc_options: String, pub default_feature_is_enabled: bool, @@ -652,10 +661,39 @@ impl<'a> ThirdPartyCrateCacheKey<'a> { package_metadata: &'a PackageMetadata<'a>, cargo_fingerprint: &'a str, ) -> Option> { - // We don't want to cache the docs for workspace crates and path dependencies. - let Some(source) = package_metadata.source().external_source() else { - return None; + let source = match package_metadata.source() { + guppy::graph::PackageSource::Workspace(_) => { + // We don't want to cache the docs for workspace crates. + return None; + } + guppy::graph::PackageSource::Path(p) => p.as_str(), + guppy::graph::PackageSource::External(e) => e, }; + let crate_hash = + if let guppy::graph::PackageSource::Path(package_path) = package_metadata.source() { + // We need to compute the hash of the package's contents, + // to invalidate the cache when the package changes. + // This is only relevant for path dependencies. + // We don't need to do this for external dependencies, + // since they are assumed to be immutable. + // We visit all files in the package directory, traversing + // subdirectories recursively, and hash the contents of each file. + let Ok(tree) = merkle_hash::MerkleTree::builder(package_path) + .algorithm(merkle_hash::Algorithm::Blake3) + .hash_names(true) + .build() + else { + tracing::warn!( + "Failed to compute the hash of the package at {:?}. + I won't cache its JSON documentation to avoid serving stale data.", + package_metadata.id() + ); + return None; + }; + Some(tree.root.item.hash.to_hex_string()) + } else { + None + }; let features = package_metadata .to_feature_set(StandardFeatures::Default) .features_for(package_metadata.id()) @@ -669,6 +707,7 @@ impl<'a> ThirdPartyCrateCacheKey<'a> { crate_name: package_metadata.name(), crate_source: source, crate_version: package_metadata.version().to_string(), + crate_hash, cargo_fingerprint, default_feature_is_enabled, // SQLite doesn't support arrays, so we have to serialize these two collections as strings.