Skip to content

Commit

Permalink
Start caching path dependencies, but compute the hash of their conten…
Browse files Browse the repository at this point in the history
…ts to avoid serving stale data
  • Loading branch information
LukeMathWalker committed Sep 7, 2024
1 parent bbc2209 commit 87085a7
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 5 deletions.
31 changes: 31 additions & 0 deletions libs/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions libs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ libtest-mimic = "0.7.0"
liquid = "0.26.4"
liquid-core = "0.26.4"
matchit = "0.7"
merkle_hash = "3.7.0"
miette = "7.2.0"
mime = "0.3"
num_cpus = "1.15.0"
Expand Down
1 change: 1 addition & 0 deletions libs/pavexc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ semver = { workspace = true }
persist_if_changed = { path = "../persist_if_changed", version = "0.1.48" }
matchit = { version = "0.7", package = "pavex_matchit" }
relative-path = { workspace = true }
merkle_hash = { workspace = true }

# Sqlite cache
xdg-home = { workspace = true }
Expand Down
49 changes: 44 additions & 5 deletions libs/pavexc/src/rustdoc/compute/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use guppy::{
PackageId,
};
use itertools::Itertools;
use merkle_hash::Encodable;
use r2d2_sqlite::SqliteConnectionManager;
use rusqlite::params;
use tracing::instrument;
Expand Down Expand Up @@ -376,6 +377,7 @@ impl ThirdPartyCrateCache {
WHERE crate_name = ? AND
crate_source = ? AND
crate_version = ? AND
crate_hash = ? AND
cargo_fingerprint = ? AND
rustdoc_options = ? AND
default_feature_is_enabled = ? AND
Expand All @@ -387,6 +389,7 @@ impl ThirdPartyCrateCache {
cache_key.crate_name,
cache_key.crate_source,
cache_key.crate_version,
cache_key.crate_hash,
cache_key.cargo_fingerprint,
cache_key.rustdoc_options,
cache_key.default_feature_is_enabled,
Expand Down Expand Up @@ -450,6 +453,7 @@ impl ThirdPartyCrateCache {
crate_name,
crate_source,
crate_version,
crate_hash,
cargo_fingerprint,
rustdoc_options,
default_feature_is_enabled,
Expand All @@ -464,12 +468,13 @@ impl ThirdPartyCrateCache {
id2private_import_paths,
import_path2id,
re_exports
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
)?;
stmt.execute(params![
cache_key.crate_name,
cache_key.crate_source,
cache_key.crate_version,
cache_key.crate_hash,
cache_key.cargo_fingerprint,
cache_key.rustdoc_options,
cache_key.default_feature_is_enabled,
Expand All @@ -494,6 +499,7 @@ impl ThirdPartyCrateCache {
crate_name TEXT NOT NULL,
crate_source TEXT NOT NULL,
crate_version TEXT NOT NULL,
crate_hash TEXT,
cargo_fingerprint TEXT NOT NULL,
rustdoc_options TEXT NOT NULL,
default_feature_is_enabled INTEGER NOT NULL,
Expand All @@ -508,7 +514,7 @@ impl ThirdPartyCrateCache {
id2private_import_paths BLOB NOT NULL,
import_path2id BLOB NOT NULL,
re_exports BLOB NOT NULL,
PRIMARY KEY (crate_name, crate_source, crate_version, cargo_fingerprint, rustdoc_options, default_feature_is_enabled, active_named_features)
PRIMARY KEY (crate_name, crate_source, crate_version, crate_hash, cargo_fingerprint, rustdoc_options, default_feature_is_enabled, active_named_features)
)",
[]
)?;
Expand Down Expand Up @@ -640,6 +646,9 @@ pub(super) struct ThirdPartyCrateCacheKey<'a> {
pub crate_name: &'a str,
pub crate_source: &'a str,
pub crate_version: String,
/// The hash of the crate's source code, computed via BLAKE3.
/// It is only populated for path dependencies.
pub crate_hash: Option<String>,
pub cargo_fingerprint: &'a str,
pub rustdoc_options: String,
pub default_feature_is_enabled: bool,
Expand All @@ -652,10 +661,39 @@ impl<'a> ThirdPartyCrateCacheKey<'a> {
package_metadata: &'a PackageMetadata<'a>,
cargo_fingerprint: &'a str,
) -> Option<ThirdPartyCrateCacheKey<'a>> {
// We don't want to cache the docs for workspace crates and path dependencies.
let Some(source) = package_metadata.source().external_source() else {
return None;
let source = match package_metadata.source() {
guppy::graph::PackageSource::Workspace(_) => {
// We don't want to cache the docs for workspace crates.
return None;
}
guppy::graph::PackageSource::Path(p) => p.as_str(),
guppy::graph::PackageSource::External(e) => e,
};
let crate_hash =
if let guppy::graph::PackageSource::Path(package_path) = package_metadata.source() {
// We need to compute the hash of the package's contents,
// to invalidate the cache when the package changes.
// This is only relevant for path dependencies.
// We don't need to do this for external dependencies,
// since they are assumed to be immutable.
// We visit all files in the package directory, traversing
// subdirectories recursively, and hash the contents of each file.
let Ok(tree) = merkle_hash::MerkleTree::builder(package_path)
.algorithm(merkle_hash::Algorithm::Blake3)
.hash_names(true)
.build()
else {
tracing::warn!(
"Failed to compute the hash of the package at {:?}.
I won't cache its JSON documentation to avoid serving stale data.",
package_metadata.id()
);
return None;
};
Some(tree.root.item.hash.to_hex_string())
} else {
None
};
let features = package_metadata
.to_feature_set(StandardFeatures::Default)
.features_for(package_metadata.id())
Expand All @@ -669,6 +707,7 @@ impl<'a> ThirdPartyCrateCacheKey<'a> {
crate_name: package_metadata.name(),
crate_source: source,
crate_version: package_metadata.version().to_string(),
crate_hash,
cargo_fingerprint,
default_feature_is_enabled,
// SQLite doesn't support arrays, so we have to serialize these two collections as strings.
Expand Down

0 comments on commit 87085a7

Please sign in to comment.