Skip to content

Commit

Permalink
feat: Add a way to remove files from the storage reflecting the chang…
Browse files Browse the repository at this point in the history
…es in the original files

also add a command to cleanup orphans
  • Loading branch information
mirkobrombin committed Feb 25, 2024
1 parent e70255c commit 0dfc1dd
Show file tree
Hide file tree
Showing 5 changed files with 406 additions and 20 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ was listening to when I started the project. So... I'm blue, da ba dee da ba daa
## What is left to do?

- [ ] Add tests
- [ ] Add a logger
- [ ] Add a progress bar
- [ ] Provide better logs and ask user input when needed
- [ ] Make access to storage more robust, with a lock file
- [ ] Add a way to remove files from the storage reflecting the changes in the
- [x] Add a way to remove files from the storage reflecting the changes in the
original files
- [x] Provide an option to respect metadata (uid, gid, permissions)
- [x] Split cmd and lib in two different packages
95 changes: 92 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ func main() {
var rootCmd = &cobra.Command{Use: "dabadee"}

var withMetadata bool
var additionalPaths []string

var cpCmd = &cobra.Command{
Use: "cp [source] [dest] [storage]",
Expand All @@ -34,7 +35,29 @@ func main() {
}
dedupCmd.Flags().BoolVarP(&withMetadata, "with-metadata", "m", false, "Include file metadata in hash calculation")

rootCmd.AddCommand(cpCmd, dedupCmd)
var findLinksCmd = &cobra.Command{
Use: "find-links [source] [storage]",
Short: "Find all hard links to the specified file",
Args: cobra.ExactArgs(2),
Run: findLinksCommand,
}
findLinksCmd.Flags().StringSliceVarP(&additionalPaths, "additional-paths", "p", []string{}, "Additional paths to search for links")

var removeOrphansCmd = &cobra.Command{
Use: "remove-orphans [storage]",
Short: "Remove all orphaned files from the storage",
Args: cobra.ExactArgs(1),
Run: removeOrphansCommand,
}

var rmCmd = &cobra.Command{
Use: "rm [source] [storage]",
Short: "Remove a file and its link from storage",
Args: cobra.ExactArgs(2),
Run: removeCommand,
}

rootCmd.AddCommand(cpCmd, dedupCmd, findLinksCmd, removeOrphansCmd, rmCmd)
if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
Expand All @@ -45,7 +68,16 @@ func cpCommand(cmd *cobra.Command, args []string) {
source, dest, storagePath := args[0], args[1], args[2]
withMetadata, _ := cmd.Flags().GetBool("with-metadata")

s := storage.NewStorage(storagePath)
storageOpts := storage.StorageOptions{
Root: storagePath,
WithMetadata: withMetadata,
}

s, err := storage.NewStorage(storageOpts)
if err != nil {
log.Fatalf("Error creating storage: %v", err)
}

h := hash.NewSHA256Generator()
processor := processor.NewCpProcessor(source, dest, s, h, withMetadata)
d := dabadee.NewDaBaDee(processor)
Expand All @@ -63,11 +95,68 @@ func dedupCommand(cmd *cobra.Command, args []string) {

withMetadata, _ := cmd.Flags().GetBool("with-metadata")

s := storage.NewStorage(storagePath)
storageOpts := storage.StorageOptions{
Root: storagePath,
WithMetadata: withMetadata,
}

s, err := storage.NewStorage(storageOpts)
if err != nil {
log.Fatalf("Error creating storage: %v", err)
}

h := hash.NewSHA256Generator()
processor := processor.NewDedupProcessor(source, s, h, workers, withMetadata)
d := dabadee.NewDaBaDee(processor)
if err := d.Run(); err != nil {
log.Fatalf("Error during deduplication: %v", err)
}
}

func findLinksCommand(cmd *cobra.Command, args []string) {
path, storagePath := args[0], args[1]

additionalPaths, _ := cmd.Flags().GetStringSlice("additional-paths")

s, err := storage.NewStorage(storage.StorageOptions{Root: storagePath})
if err != nil {
log.Fatalf("Error creating storage: %v", err)
}

links, err := s.FindLinks(path, additionalPaths)
if err != nil {
log.Fatalf("Error finding links: %v", err)
}

for _, link := range links {
fmt.Println(link)
}
}

func removeOrphansCommand(cmd *cobra.Command, args []string) {
storagePath := args[0]

s, err := storage.NewStorage(storage.StorageOptions{Root: storagePath})
if err != nil {
log.Fatalf("Error creating storage: %v", err)
}

err = s.RemoveOrphans()
if err != nil {
log.Fatalf("Error removing orphans: %v", err)
}
}

func removeCommand(cmd *cobra.Command, args []string) {
source, storagePath := args[0], args[1]

s, err := storage.NewStorage(storage.StorageOptions{Root: storagePath})
if err != nil {
log.Fatalf("Error creating storage: %v", err)
}

err = s.RemoveFile(source)
if err != nil {
log.Fatalf("Error removing file: %v", err)
}
}
2 changes: 1 addition & 1 deletion pkg/processor/cp.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func (p *CpProcessor) Process() (err error) {
}
}

dedupPath := filepath.Join(p.Storage.Path, finalHash)
dedupPath := filepath.Join(p.Storage.Opts.Root, finalHash)

// Check if the deduplicated file already exists in storage
exists, err := p.Storage.FileExists(dedupPath)
Expand Down
4 changes: 2 additions & 2 deletions pkg/processor/dedup.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func (p *DedupProcessor) Process() error {
if err != nil {
return err
}
if !info.IsDir() && path != p.Storage.Path {
if !info.IsDir() && path != p.Storage.Opts.Root {
jobs <- path
}
return nil
Expand Down Expand Up @@ -143,7 +143,7 @@ func (p *DedupProcessor) processFile(path string) (err error) {
}

// Check if a file with the same hash already exists in storage
dedupPath := filepath.Join(p.Storage.Path, finalHash)
dedupPath := filepath.Join(p.Storage.Opts.Root, finalHash)
exists, err := p.Storage.FileExists(dedupPath)
if err != nil {
finishProcessing(finalHash)
Expand Down
Loading

0 comments on commit 0dfc1dd

Please sign in to comment.