Skip to content

Commit

Permalink
feat: Add manifest output option to dedup command
Browse files Browse the repository at this point in the history
this manifest contains the pairing of origins and their hashes in the store
  • Loading branch information
mirkobrombin committed May 7, 2024
1 parent 37a182f commit 290afa6
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
18 changes: 18 additions & 0 deletions cmd/dedup.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package cmd

import (
"encoding/json"
"log"
"os"
"strconv"

"github.com/mirkobrombin/dabadee/pkg/dabadee"
Expand All @@ -21,6 +23,7 @@ func NewDedupCommand() *cobra.Command {

cmd.Flags().BoolP("with-metadata", "m", false, "Include file metadata in hash calculation")
cmd.Flags().BoolP("verbose", "v", false, "Verbose output")
cmd.Flags().String("manifest-output", "", "Output manifest file to the given path")

return cmd
}
Expand All @@ -29,6 +32,7 @@ func dedupCommand(cmd *cobra.Command, args []string) {
source, storagePath, workersStr := args[0], args[1], args[2]
withMetadata, _ := cmd.Flags().GetBool("with-metadata")
verbose, _ := cmd.Flags().GetBool("verbose")
outputManifest, _ := cmd.Flags().GetString("manifest-output")
workers, err := strconv.Atoi(workersStr)
if err != nil {
log.Fatalf("Invalid number of workers: %v", err)
Expand Down Expand Up @@ -57,5 +61,19 @@ func dedupCommand(cmd *cobra.Command, args []string) {
log.Fatalf("Error during deduplication: %v", err)
}

// Output manifest
if outputManifest != "" {
log.Printf("Writing manifest to %s..", outputManifest)

manifest, err := json.Marshal(processor.FileMap)
if err != nil {
log.Fatalf("Error marshalling manifest: %v\n\nPrinting to stdout instead:\n\n%v", err, processor.FileMap)
}

if err := os.WriteFile(outputManifest, manifest, 0644); err != nil {
log.Fatalf("Error writing manifest: %v\n\nPrinting to stdout instead:\n\n%v", err, processor.FileMap)
}
}

log.Print("Done")
}
12 changes: 12 additions & 0 deletions pkg/processor/dedup.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ type DedupProcessor struct {

// Workers is the number of workers to use
Workers int

// FileMap is a map of original file paths to their hash in storage
FileMap map[string]string

// mapMutex is a mutex to protect the FileMap from concurrent access
mapMutex sync.Mutex
}

// NewDedupProcessor creates a new DedupProcessor
Expand All @@ -40,6 +46,7 @@ func NewDedupProcessor(source string, storage *storage.Storage, hashGen hash.Gen
Storage: storage,
HashGen: hashGen,
Workers: workers,
FileMap: make(map[string]string),
}
}

Expand Down Expand Up @@ -164,6 +171,11 @@ func (p *DedupProcessor) processFile(path string) (err error) {
}
}

// Store the original path of the file
p.mapMutex.Lock()
p.FileMap[path] = finalHash
p.mapMutex.Unlock()

if _, err := os.Lstat(path); os.IsNotExist(err) {
err = os.Link(dedupPath, path)
if err != nil {
Expand Down

0 comments on commit 290afa6

Please sign in to comment.