Skip to content

Commit

Permalink
Make cache tracking resilient to unexpected files
Browse files Browse the repository at this point in the history
This makes the cache tracking synchronization code resilient to
unexpected files in the cache directory. Previously the code was
assuming that all entries in paths like `registry/index/*` are
directories. However, in circumstances like opening the directories in
macOS's finder, that can create files called `.DS_Store`. This caused it
to fail to scan within that path, since it isn't a directory.

This could in theory be made stricter, such as expecting directories to
have a particular pattern for its name. However, it seems like we never
enacted restrictions on what names are used for the git directories, so
it wouldn't work very well for that. Though that is something we could
consider in the future.
  • Loading branch information
ehuss committed Feb 5, 2025
1 parent dffa757 commit 39cc418
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 25 deletions.
46 changes: 32 additions & 14 deletions src/cargo/core/global_cache_tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,8 +657,19 @@ impl GlobalCacheTracker {
Ok(())
}

/// Returns a list of directory entries in the given path.
fn names_from(path: &Path) -> CargoResult<Vec<String>> {
/// Returns a list of directory entries in the given path that are
/// themselves directories.
fn list_dir_names(path: &Path) -> CargoResult<Vec<String>> {
Self::read_dir_with_filter(path, &|entry| {
entry.file_type().map_or(false, |ty| ty.is_dir())
})
}

/// Returns a list of names in a directory, filtered by the given callback.
fn read_dir_with_filter(
path: &Path,
filter: &dyn Fn(&std::fs::DirEntry) -> bool,
) -> CargoResult<Vec<String>> {
let entries = match path.read_dir() {
Ok(e) => e,
Err(e) => {
Expand All @@ -672,7 +683,9 @@ impl GlobalCacheTracker {
}
};
let names = entries
.filter_map(|entry| entry.ok()?.file_name().into_string().ok())
.filter_map(|entry| entry.ok())
.filter(|entry| filter(entry))
.filter_map(|entry| entry.file_name().into_string().ok())
.collect();
Ok(names)
}
Expand Down Expand Up @@ -803,7 +816,7 @@ impl GlobalCacheTracker {
base_path: &Path,
) -> CargoResult<()> {
trace!(target: "gc", "checking for untracked parent to add to {parent_table_name}");
let names = Self::names_from(base_path)?;
let names = Self::list_dir_names(base_path)?;

let mut stmt = conn.prepare_cached(&format!(
"INSERT INTO {parent_table_name} (name, timestamp)
Expand Down Expand Up @@ -896,21 +909,26 @@ impl GlobalCacheTracker {
VALUES (?1, ?2, ?3, ?4)
ON CONFLICT DO NOTHING",
)?;
let index_names = Self::names_from(&base_path)?;
let index_names = Self::list_dir_names(&base_path)?;
for index_name in index_names {
let Some(id) = Self::id_from_name(conn, REGISTRY_INDEX_TABLE, &index_name)? else {
// The id is missing from the database. This should be resolved
// via update_db_parent_for_removed_from_disk.
continue;
};
let index_path = base_path.join(index_name);
for crate_name in Self::names_from(&index_path)? {
if crate_name.ends_with(".crate") {
// Missing files should have already been taken care of by
// update_db_for_removed.
let size = paths::metadata(index_path.join(&crate_name))?.len();
insert_stmt.execute(params![id, crate_name, size, now])?;
}
let crates = Self::read_dir_with_filter(&index_path, &|entry| {
entry.file_type().map_or(false, |ty| ty.is_file())
&& entry
.file_name()
.to_str()
.map_or(false, |name| name.ends_with(".crate"))
})?;
for crate_name in crates {
// Missing files should have already been taken care of by
// update_db_for_removed.
let size = paths::metadata(index_path.join(&crate_name))?.len();
insert_stmt.execute(params![id, crate_name, size, now])?;
}
}
Ok(())
Expand All @@ -931,7 +949,7 @@ impl GlobalCacheTracker {
) -> CargoResult<()> {
trace!(target: "gc", "populating untracked files for {table_name}");
// Gather names (and make sure they are in the database).
let id_names = Self::names_from(&base_path)?;
let id_names = Self::list_dir_names(&base_path)?;

// This SELECT is used to determine if the directory is already
// tracked. We don't want to do the expensive size computation unless
Expand All @@ -954,7 +972,7 @@ impl GlobalCacheTracker {
continue;
};
let index_path = base_path.join(id_name);
let names = Self::names_from(&index_path)?;
let names = Self::list_dir_names(&index_path)?;
let max = names.len();
for (i, name) in names.iter().enumerate() {
if select_stmt.exists(params![id, name])? {
Expand Down
12 changes: 1 addition & 11 deletions tests/testsuite/global_cache_tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2145,18 +2145,8 @@ fn resilient_to_unexpected_files() {

p.cargo("clean gc -Zgc")
.masquerade_as_nightly_cargo(&["gc"])
.with_status(101)
.with_stderr_data(str![[r#"
[ERROR] failed to clean entries from the global cache
Caused by:
failed to sync tracking database
Caused by:
failed to read path `"[ROOT]/home/.cargo/registry/cache/foo"`
Caused by:
Not a directory (os error 20)
[REMOVED] [FILE_NUM] files, [FILE_SIZE]B total
"#]])
.run();
Expand Down

0 comments on commit 39cc418

Please sign in to comment.