Skip to content

Commit

Permalink
storage: add garbage collection in CasMgr
Browse files Browse the repository at this point in the history
- Changed `delete_blobs` method in `CasDb` to take an immutable reference (`&self`) instead of a mutable reference (`&mut self`).
- Updated `dedup_chunk` method in `CasMgr` to correctly handle the deletion of non-existent blob files from both the file descriptor cache and the database.
- Implemented the `gc` (garbage collection) method in `CasMgr` to identify and remove blobs that no longer exist on the filesystem, ensuring the database and cache remain consistent.

Signed-off-by: Yadong Ding <ding_yadong@foxmail.com>
  • Loading branch information
Desiki-high committed Sep 22, 2024
1 parent aa54630 commit 0d78c8b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
2 changes: 1 addition & 1 deletion storage/src/cache/dedup/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ impl CasDb {
Ok(conn.last_insert_rowid() as u64)
}

pub fn delete_blobs(&mut self, blobs: &[String]) -> Result<()> {
pub fn delete_blobs(&self, blobs: &[String]) -> Result<()> {
let delete_blobs_sql = "DELETE FROM Blobs WHERE BlobId = (?1)";
let delete_chunks_sql = "DELETE FROM Chunks WHERE BlobId = (?1)";
let mut conn = self.get_connection()?;
Expand Down
36 changes: 36 additions & 0 deletions storage/src/cache/dedup/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,15 @@ impl CasMgr {
}
}
}
} else if d_file.as_ref().unwrap().metadata().is_err() {
// If the blob file no longer exists, delete if from fds and db.
let mut guard = self.fds.write().unwrap();
guard.remove(&path);
let blob_ids: &[String] = &[path];
if let Err(e) = self.db.delete_blobs(&blob_ids) {
warn!("failed to delete blobs: {}", e);
}
return false;
}

if let Some(f) = d_file {
Expand Down Expand Up @@ -176,6 +185,33 @@ impl CasMgr {
blob.digester().to_string() + ":" + &chunk.chunk_id().to_string()
}
}

// Check if blobs in the database still exist on the filesystem and perform garbage collection.
pub fn gc(&self) -> Result<()> {
let all_blobs = self.db.get_all_blobs()?;
let mut blobs_not_exist = Vec::new();
for (_, file_path) in all_blobs {
if !std::path::Path::new(&file_path).exists() {
blobs_not_exist.push(file_path);
}
}

// If there are any non-existent blobs, delete them from the database.
if !blobs_not_exist.is_empty() {
self.db.delete_blobs(&blobs_not_exist).map_err(|e| {
warn!("failed to delete blobs: {}", e);
e
})?;
}

let mut guard = self.fds.write().unwrap();
for path in blobs_not_exist {
// Remove the non-existent blob paths from the cache.
guard.remove(&path);
}

Ok(())
}
}

#[cfg(test)]
Expand Down

0 comments on commit 0d78c8b

Please sign in to comment.