Skip to content

Commit

Permalink
Refactor tar utilities to use common implementation (#269)
Browse files Browse the repository at this point in the history
* [wip] with zst cache archives

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* ignore *.tar.zst

Now that grype-db sometimes writes a *.tar.zst file, ignore those files
to prevent accidentally committing them.

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* refactor to use common archive facilities

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix log parameter to be stringer

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* Test populate tar methods

Previously close was not called, resulting in malformed gz and zst
files.

Signed-off-by: Will Murphy <will.murphy@anchore.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
Signed-off-by: Will Murphy <will.murphy@anchore.com>
Co-authored-by: Will Murphy <will.murphy@anchore.com>
  • Loading branch information
wagoodman and willmurphyscode authored Mar 27, 2024
1 parent 80af696 commit ff89657
Show file tree
Hide file tree
Showing 15 changed files with 828 additions and 232 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
/bin
.yardstick
.grype-db-manager
/tmp

/grype-db
/build
Expand All @@ -24,6 +25,7 @@ listing.json
!**/test-fixtures/**/*.db
*.tar
*tar.gz
*tar.zst
.idea/
*.log
.images
Expand Down
184 changes: 79 additions & 105 deletions cmd/grype-db/cli/commands/cache_backup.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
package commands

import (
"archive/tar"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"io/fs"
"os"
"path"
"path/filepath"
"strings"

Expand All @@ -19,6 +16,7 @@ import (
"github.com/anchore/grype-db/cmd/grype-db/application"
"github.com/anchore/grype-db/cmd/grype-db/cli/options"
"github.com/anchore/grype-db/internal/log"
"github.com/anchore/grype-db/internal/tarutil"
"github.com/anchore/grype-db/pkg/provider"
)

Expand Down Expand Up @@ -76,23 +74,11 @@ func cacheBackup(cfg cacheBackupConfig) error {
}
log.WithFields("providers", providers).Info("backing up provider state")

archive, err := os.Create(cfg.CacheArchive.Path)
writer, err := tarutil.NewWriter(cfg.CacheArchive.Path)
if err != nil {
return err
return fmt.Errorf("unable to create archive writer: %w", err)
}

gw := gzip.NewWriter(archive)
defer func(gw *gzip.Writer) {
if err := gw.Close(); err != nil {
log.Errorf("unable to close gzip writer: %w", err)
}
}(gw)
tw := tar.NewWriter(gw)
defer func(tw *tar.Writer) {
if err := tw.Close(); err != nil {
log.Errorf("unable to close tar writer: %w", err)
}
}(tw)
defer writer.Close()

allowableProviders := strset.New(cfg.Provider.IncludeFilter...)

Expand All @@ -119,7 +105,7 @@ func cacheBackup(cfg cacheBackupConfig) error {
}

log.WithFields("provider", name).Debug("archiving data")
if err := archiveProvider(cfg, cfg.Provider.Root, name, tw); err != nil {
if err := archiveProvider(cfg, name, writer); err != nil {
return err
}
}
Expand All @@ -129,131 +115,119 @@ func cacheBackup(cfg cacheBackupConfig) error {
return nil
}

func archiveProvider(cfg cacheBackupConfig, root string, name string, writer *tar.Writer) error {
func archiveProvider(cfg cacheBackupConfig, name string, writer tarutil.Writer) error {
wd, err := os.Getwd()
if err != nil {
return err
}
err = os.Chdir(root)

err = os.Chdir(cfg.Provider.Root)
if err != nil {
return err
}

defer func(dir string) {
if err := os.Chdir(dir); err != nil {
log.Errorf("unable to restore directory: %w", err)
}
}(wd)

return filepath.Walk(name,
func(path string, info os.FileInfo, err error) error {
return pathWalker(path, info, err, cfg, name, writer)
},
)
var visitor pathVisitor
if cfg.Results.ResultsOnly {
log.WithFields("provider", name).Debug("archiving results only")

visitor = newCacheResultsOnlyWorkspaceVisitStrategy(writer, name)
} else {
log.WithFields("provider", name).Debug("archiving full workspace")

visitor = cacheFullWorkspaceVisitStrategy{
writer: writer,
}
}

return filepath.Walk(name, visitor.visitPath)
}

type pathVisitor interface {
visitPath(path string, info fs.FileInfo, err error) error
}

func pathWalker(p string, info os.FileInfo, err error, cfg cacheBackupConfig, name string, writer *tar.Writer) error {
var (
_ pathVisitor = (*cacheFullWorkspaceVisitStrategy)(nil)
_ pathVisitor = (*cacheResultsOnlyWorkspaceVisitStrategy)(nil)
)

type cacheFullWorkspaceVisitStrategy struct {
writer tarutil.Writer
}

func (t cacheFullWorkspaceVisitStrategy) visitPath(p string, info fs.FileInfo, err error) error {
if err != nil {
return err
}

if info.IsDir() {
return nil
}
if cfg.Results.ResultsOnly {
if strings.Compare(p, path.Join(name, "metadata.json")) == 0 {
log.WithFields("file", path.Join(name, "metadata.json")).Debug("Marking metadata stale")

// Mark metadata stale
var state provider.State
f, err := os.Open(p)
if err != nil {
return err
}
defer f.Close()

err = json.NewDecoder(f).Decode(&state)
if err != nil {
return err
}

state.Stale = true
// Stream this to the archive
stateJSON, err := json.MarshalIndent(state, "", " ")
if err != nil {
return err
}

return addBytesToArchive(writer, p, stateJSON, info)
}
if strings.HasPrefix(p, path.Join(name, "input")) {
log.WithFields("path", p).Debug("Skipping input directory")
return nil
}
}

return addToArchive(writer, p)
return t.writer.WriteEntry(tarutil.NewEntryFromFilePath(p))
}

func addToArchive(writer *tar.Writer, filename string) error {
log.WithFields("path", filename).Trace("adding to archive")
type cacheResultsOnlyWorkspaceVisitStrategy struct {
writer tarutil.Writer
providerName string
metadataPath string
inputPath string
}

file, err := os.Open(filename)
if err != nil {
return err
func newCacheResultsOnlyWorkspaceVisitStrategy(writer tarutil.Writer, providerName string) cacheResultsOnlyWorkspaceVisitStrategy {
return cacheResultsOnlyWorkspaceVisitStrategy{
writer: writer,
providerName: providerName,
metadataPath: filepath.Join(providerName, "metadata.json"),
inputPath: filepath.Join(providerName, "input"),
}
defer file.Close()
}

info, err := file.Stat()
func (t cacheResultsOnlyWorkspaceVisitStrategy) visitPath(p string, info fs.FileInfo, err error) error {
if err != nil {
return err
}

header, err := tar.FileInfoHeader(info, info.Name())
if err != nil {
return err
if info.IsDir() {
return nil
}

// use full path as name (FileInfoHeader only takes the basename)
// If we don't do this the directory structure would
// not be preserved
// https://golang.org/src/archive/tar/common.go?#L626
header.Name = filename
switch {
case strings.HasPrefix(p, t.inputPath):
// skip input data
return nil

err = writer.WriteHeader(header)
if err != nil {
return err
}
case p == t.metadataPath:
// mark metadata stale

_, err = io.Copy(writer, file)
if err != nil {
return err
}
var state provider.State
f, err := os.Open(p)
if err != nil {
return err
}
defer f.Close()

return nil
}
err = json.NewDecoder(f).Decode(&state)
if err != nil {
return err
}

func addBytesToArchive(writer *tar.Writer, filename string, bytes []byte, info os.FileInfo) error {
log.WithFields("path", filename).Trace("adding stream to archive")
state.Stale = true

header, err := tar.FileInfoHeader(info, info.Name())
if err != nil {
return err
}
header.Name = filename
header.Size = int64(len(bytes))
err = writer.WriteHeader(header)
if err != nil {
return err
}
// stream this to the archive
stateJSON, err := json.MarshalIndent(state, "", " ")
if err != nil {
return err
}

_, err = writer.Write(bytes)
if err != nil {
return err
}
err = writer.Flush()
if err != nil {
return err
return t.writer.WriteEntry(tarutil.NewEntryFromBytes(stateJSON, p, info))
}

return nil
return t.writer.WriteEntry(tarutil.NewEntryFromFilePath(p))
}
Loading

0 comments on commit ff89657

Please sign in to comment.