From 8fbedb394e94bf9cc47e5756878965ff92835c93 Mon Sep 17 00:00:00 2001 From: Tianon Gravi Date: Mon, 8 Jan 2024 12:02:09 -0800 Subject: [PATCH] Add `ArchGitChecksum` template command in `bashbrew cat` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also finally adds `bashbrew context` as an explicit subcommand so that issues with this code are easier to test/debug (so we can generate the actual tarball and compare it to previous versions of it, versions generated by `git archive`, etc). As-is, this currently generates verbatim identical checksums to https://github.com/docker-library/meta-scripts/blob/0cde8de57dfe411ed5578feffe1b10f811e11dc2/sources.sh#L90-L96 (by design). We'll wait to do any cache bust there until we implement `Dockerfile`/context filtering: ```console $ bashbrew cat varnish:stable --format '{{ .TagEntry.GitCommit }} {{ .TagEntry.Directory }}' 0c295b528f28a98650fb2580eab6d34b30b165c4 stable/debian $ git -C "$BASHBREW_CACHE/git" archive 0c295b528f28a98650fb2580eab6d34b30b165c4:stable/debian/ | ./tar-scrubber | sha256sum 3aef5ac859b23d65dfe5e9f2a47750e9a32852222829cfba762a870c1473fad6 $ bashbrew cat --format '{{ .ArchGitChecksum arch .TagEntry }}' varnish:stable 3aef5ac859b23d65dfe5e9f2a47750e9a32852222829cfba762a870c1473fad6 ``` (Choosing `varnish:stable` there because it currently has [some 100% valid dangling symlinks](https://github.com/varnish/docker-varnish/tree/6b1c6ffedcfececac71e46a85122c1adaef25868/stable/debian/scripts) that tripped up my code beautifully 💕) From a performance perspective (which was the original reason for looking into / implementing this), running the `meta-scripts/sources.sh` script against `--all` vs this, my local system gets ~18.5m vs ~4.5m (faster being this new pure-Go implementation). --- cmd/bashbrew/git.go | 14 ++ cmd/bashbrew/main.go | 57 ++++++ cmd/bashbrew/oci-builder.go | 1 + cmd/bashbrew/sort.go | 10 +- cmd/bashbrew/tar.go | 28 +++ go.mod | 1 + go.sum | 1 + pkg/gitfs/fs.go | 342 ++++++++++++++++++++++++++++-------- pkg/gitfs/fs_test.go | 81 +++++++-- pkg/gitfs/tarscrub_test.go | 44 +++++ pkg/tarscrub/git_test.go | 73 ++++++++ pkg/tarscrub/tarscrub.go | 88 ++++++++++ 12 files changed, 641 insertions(+), 99 deletions(-) create mode 100644 cmd/bashbrew/tar.go create mode 100644 pkg/gitfs/tarscrub_test.go create mode 100644 pkg/tarscrub/git_test.go create mode 100644 pkg/tarscrub/tarscrub.go diff --git a/cmd/bashbrew/git.go b/cmd/bashbrew/git.go index 2158228d..986486ac 100644 --- a/cmd/bashbrew/git.go +++ b/cmd/bashbrew/git.go @@ -96,6 +96,20 @@ func getGitCommit(commit string) (string, error) { return h.String(), nil } +func (r Repo) archGitFS(arch string, entry *manifest.Manifest2822Entry) (fs.FS, error) { + commit, err := r.fetchGitRepo(arch, entry) + if err != nil { + return nil, fmt.Errorf("failed fetching %q: %w", r.EntryIdentifier(entry), err) + } + + gitFS, err := gitCommitFS(commit) + if err != nil { + return nil, err + } + + return fs.Sub(gitFS, entry.ArchDirectory(arch)) +} + func gitCommitFS(commit string) (fs.FS, error) { if err := ensureGitInit(); err != nil { return nil, err diff --git a/cmd/bashbrew/main.go b/cmd/bashbrew/main.go index f4e06a0c..4c235c20 100644 --- a/cmd/bashbrew/main.go +++ b/cmd/bashbrew/main.go @@ -3,11 +3,13 @@ package main import ( "fmt" "os" + "path" "path/filepath" "strings" "github.com/sirupsen/logrus" // this is used by containerd libraries, so we need to set the default log level for it "github.com/urfave/cli" + xTerm "golang.org/x/term" "github.com/docker-library/bashbrew/architecture" "github.com/docker-library/bashbrew/manifest" @@ -421,6 +423,61 @@ func main() { Category: "plumbing", }, + { + Name: "context", + Usage: "(eventually Dockerfile-filtered) git archive", + Flags: []cli.Flag{ + cli.BoolFlag{ + Name: "sha256", + Usage: `print sha256 instead of raw tar`, + }, + // TODO "unfiltered" or something for not applying Dockerfile filtering (once that's implemented) + }, + Before: subcommandBeforeFactory("context"), + Action: func(c *cli.Context) error { + repos, err := repos(false, c.Args()...) + if err != nil { + return err + } + if len(repos) != 1 { + return fmt.Errorf("'context' expects to act on exactly one architecture of one entry of one repo (got %d repos)", len(repos)) + } + + r, err := fetch(repos[0]) + if err != nil { + return err + } + + // TODO technically something like "hello-world:latest" *could* be relaxed a little if it resolves via architecture to one and only one entry 🤔 (but that's a little hard to implement with the existing internal data structures -- see TODO at the top of "sort.go") + + if r.TagEntry == nil { + return fmt.Errorf("'context' expects to act on exactly one architecture of one entry of one repo (no specific entry of %q selected)", r.RepoName) + } + if len(r.TagEntries) != 1 { + return fmt.Errorf("'context' expects to act on exactly one architecture of one entry of one repo (got %d entires)", len(r.TagEntries)) + } + + if !r.TagEntry.HasArchitecture(arch) { + return fmt.Errorf("%q does not include architecture %q", path.Join(namespace, r.RepoName)+":"+r.TagEntry.Tags[0], arch) + } + + if c.Bool("sha256") { + sum, err := r.ArchGitChecksum(arch, r.TagEntry) + if err != nil { + return err + } + fmt.Println(sum) + return nil + } else { + if xTerm.IsTerminal(int(os.Stdout.Fd())) { + return fmt.Errorf("cowardly refusing to output a tar to a terminal") + } + return r.archContextTar(arch, r.TagEntry, os.Stdout) + } + }, + + Category: "plumbing", + }, { Name: "remote", Usage: "query registries for bashbrew-related data", diff --git a/cmd/bashbrew/oci-builder.go b/cmd/bashbrew/oci-builder.go index 4b9c3a06..ea8d35ad 100644 --- a/cmd/bashbrew/oci-builder.go +++ b/cmd/bashbrew/oci-builder.go @@ -89,6 +89,7 @@ func importOCIBlob(ctx context.Context, cs content.Store, fs iofs.FS, descriptor // this is "docker build" but for "Builder: oci-import" func ociImportBuild(tags []string, commit, dir, file string) (*imagespec.Descriptor, error) { + // TODO use r.archGitFS (we have no r or arch or entry here 😅) fs, err := gitCommitFS(commit) if err != nil { return nil, err diff --git a/cmd/bashbrew/sort.go b/cmd/bashbrew/sort.go index f1bffe9e..1a88d8b1 100644 --- a/cmd/bashbrew/sort.go +++ b/cmd/bashbrew/sort.go @@ -5,6 +5,8 @@ import ( "pault.ag/go/topsort" ) +// TODO unify archFilter and applyConstraints handling by pre-filtering the full list of Repo objects such that all that remains are things we should process (thus removing all "if" statements throughout the various loops); re-doing the Architectures and Entries lists to only include ones we should process, etc + func sortRepos(repos []string, applyConstraints bool) ([]string, error) { rs := []*Repo{} rsMap := map[*Repo]string{} @@ -103,10 +105,10 @@ func sortRepoObjects(rs []*Repo, applyConstraints bool) ([]*Repo, error) { continue } /* - // TODO need archFilter here :( - if archFilter && !entry.HasArchitecture(arch) { - continue - } + // TODO need archFilter here :( + if archFilter && !entry.HasArchitecture(arch) { + continue + } */ entryArches := []string{arch} diff --git a/cmd/bashbrew/tar.go b/cmd/bashbrew/tar.go new file mode 100644 index 00000000..10f67411 --- /dev/null +++ b/cmd/bashbrew/tar.go @@ -0,0 +1,28 @@ +package main + +import ( + "crypto/sha256" + "fmt" + "io" + + "github.com/docker-library/bashbrew/manifest" + "github.com/docker-library/bashbrew/pkg/tarscrub" +) + +func (r Repo) archContextTar(arch string, entry *manifest.Manifest2822Entry, w io.Writer) error { + f, err := r.archGitFS(arch, entry) + if err != nil { + return err + } + + return tarscrub.WriteTar(f, w) +} + +func (r Repo) ArchGitChecksum(arch string, entry *manifest.Manifest2822Entry) (string, error) { + h := sha256.New() + err := r.archContextTar(arch, entry, h) + if err != nil { + return "", err + } + return fmt.Sprintf("%x", h.Sum(nil)), nil +} diff --git a/go.mod b/go.mod index 5058eb8b..c9a993be 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/sirupsen/logrus v1.9.0 github.com/urfave/cli v1.22.10 go.etcd.io/bbolt v1.3.7 + golang.org/x/term v0.5.0 pault.ag/go/debian v0.12.0 pault.ag/go/topsort v0.1.1 ) diff --git a/go.sum b/go.sum index 5cfdbb3e..ba015590 100644 --- a/go.sum +++ b/go.sum @@ -940,6 +940,7 @@ golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuX golang.org/x/term v0.0.0-20220722155259-a9ba230a4035/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/pkg/gitfs/fs.go b/pkg/gitfs/fs.go index a7870511..d9e3f36a 100644 --- a/pkg/gitfs/fs.go +++ b/pkg/gitfs/fs.go @@ -12,168 +12,356 @@ import ( goGitPlumbing "github.com/go-git/go-git/v5/plumbing" goGitPlumbingFileMode "github.com/go-git/go-git/v5/plumbing/filemode" goGitPlumbingObject "github.com/go-git/go-git/v5/plumbing/object" + goGitPlumbingStorer "github.com/go-git/go-git/v5/plumbing/storer" ) // https://github.com/go-git/go-git/issues/296 -// TODO something more clever for directories - func CommitHash(repo *goGit.Repository, commit string) (fs.FS, error) { gitCommit, err := repo.CommitObject(goGitPlumbing.NewHash(commit)) if err != nil { return nil, err } - return gitFS{ - commit: gitCommit, + tree, err := gitCommit.Tree() + if err != nil { + return nil, err + } + return gitFSFS{ + gitFS: &gitFS{ + storer: repo.Storer, + tree: tree, + name: ".", + }, }, nil } // https://pkg.go.dev/io/fs#FS +// This exists *only* because we cannot create a single object that concurrently implements *both* fs.FS *and* fs.File (Stat(string) vs Stat()). +type gitFSFS struct { + *gitFS +} + +// https://pkg.go.dev/io/fs#File +// https://pkg.go.dev/io/fs#FileInfo +// https://pkg.go.dev/io/fs#DirEntry type gitFS struct { - commit *goGitPlumbingObject.Commit + storer goGitPlumbingStorer.EncodedObjectStorer + tree *goGitPlumbingObject.Tree + entry *goGitPlumbingObject.TreeEntry // might be nil ("." at the top-level of the repo) + + // cached values + name string // full path from the repository root + size int64 // Tree.Size value for non-directories (more efficient than opening/reading the blob) + + // state for "Open" objects + reader io.ReadCloser // only set for an "Open" file + walker *goGitPlumbingObject.TreeWalker // only set for an "Open" directory } -// apparently symlinks in "io/fs" are still a big TODO (https://github.com/golang/go/issues/49580, https://github.com/golang/go/issues/45470, etc related issues); all the existing interfaces assume symlinks don't exist -// -// if the File object passed to this function represents a symlink, this returns the (resolved) path that should be looked up instead; only relative symlinks are supported (and attempts to escape the repository with too many "../" *should* result in an error -- this is a convenience/sanity check, not a security boundary; subset of https://pkg.go.dev/io/fs#ValidPath) -// -// otherwise, it will return the empty string and nil -func resolveSymlink(f *goGitPlumbingObject.File) (target string, err error) { - if f.Mode != goGitPlumbingFileMode.Symlink { - return "", nil +// clones just the load-bearing bits (basically clearing anything that's "state" +func (f gitFS) clone() *gitFS { + f.reader = nil + f.walker = nil + return &f +} + +// if our entry is a symlink, this returns the target of it +func (f gitFS) readLink() (bool, string, error) { + if f.entry == nil || f.entry.Mode != goGitPlumbingFileMode.Symlink { + return false, "", nil } - target, err = f.Contents() + file, err := f.tree.TreeEntryFile(f.entry) if err != nil { + return true, "", fmt.Errorf("TreeEntryFile(%q): %w", f.name, err) + } + + target, err := file.Contents() + return true, target, err +} + +// symlinks in "io/fs" are still a big TODO (https://github.com/golang/go/issues/49580, https://github.com/golang/go/issues/45470, etc related issues); all the existing interfaces mostly assume symlinks don't exist (fs.DirEntry.Info() and fs.WalkDir(...) as notable exceptions 🤷) +// +// if the object we're pointing at represents a symlink, this returns the (resolved) path that should be looked up instead; only relative symlinks are supported (and attempts to escape the repository with too many "../" *should* result in an error -- this is a convenience/sanity check, not a security boundary; subset of https://pkg.go.dev/io/fs#ValidPath) +// +// otherwise, it will return the empty string and nil +func (f gitFS) resolveLink() (string, error) { + isLink, target, err := f.readLink() + if !isLink || err != nil { return "", err } if target == "" { - return "", fmt.Errorf("unexpected: empty symlink %q", f.Name) + return "", fmt.Errorf("unexpected: empty symlink %q", f.name) } // we *could* implement this as absolute symlinks being relative to the root of the Git repository, but that wouldn't match the behavior of a normal repository that's been "git clone"'d on disk, so I think that would be a mistake and erroring out is saner here if path.IsAbs(target) { - return "", fmt.Errorf("unsupported: %q is an absolute symlink (%q)", f.Name, target) + return "", fmt.Errorf("unsupported: %q is an absolute symlink (%q)", f.name, target) } // symlinks are relative to the path they're in, so we need to prepend that - target = path.Join(path.Dir(f.Name), target) + target = path.Join(path.Dir(f.name), target) // now let's use path.Clean to get rid of any excess ".." or "." entries in our end result target = path.Clean(target) // once we're cleaned, we should have a full path that's relative to the root of the Git repository, so if it still starts with "../", that's a problem that will error later when we try to read it, so let's error out now to bail earlier if strings.HasPrefix(target, "../") { - return "", fmt.Errorf("unsupported: %q is a relative symlink outside the tree (%q)", f.Name, target) + return "", fmt.Errorf("unsupported: %q is a relative symlink outside the tree (%q)", f.name, target) } return target, nil } -// https://pkg.go.dev/io/fs#FS -func (fs gitFS) Open(name string) (fs.File, error) { - f, err := fs.commit.File(name) +// a helper shared between FS.Stat(...) and FS.Open(...); also the primary entrypoint to creating new gitFS objects besides gitfs.CommitHash(...) +func (f gitFS) stat(name string, followSymlinks bool) (*gitFS, error) { + if !f.IsDir() { + return nil, fmt.Errorf("cannot stat a child (%q) of non-directory %q", name, f.name) + } + if path.Join(f.name, name) == f.name { // path.Join implies path.Clean too + // (this is to defensively special-case handling of ".", which FindEntry doesn't like) + return &f, nil + } + entry, err := f.tree.FindEntry(name) if err != nil { - // TODO if it's file-not-found, we need to check whether it's a directory - return nil, err + return nil, fmt.Errorf("Tree(%q).FindEntry(%q): %w", f.name, name, err) } + return f.statEntry(name, entry, followSymlinks) +} - if target, err := resolveSymlink(f); err != nil { - return nil, err - } else if target != "" { - return fs.Open(target) +// dual-use by gitFS.stat and ReadDir (hence "followSymlinks" -- ReadDir needs to not resolve symlinks when creating sub-FS objects) +func (f gitFS) statEntry(name string, entry *goGitPlumbingObject.TreeEntry, followSymlinks bool) (*gitFS, error) { + if entry == nil { + return nil, fmt.Errorf("(%q).statEntry cannot accept a nil entry; perhaps you intended .stat(%q) instead?", f.name, name) + } + + var ( + fi = f.clone() + err error + ) + fi.entry = entry + fi.name = path.Join(fi.name, name) + + if fi.IsDir() { + fi.tree, err = goGitPlumbingObject.GetTree(f.storer, entry.Hash) // see https://github.com/go-git/go-git/blob/v5.11.0/plumbing/object/tree.go#L103 + if err != nil { + return nil, fmt.Errorf("Tree(%q): %w", fi.name, err) + return nil, err + } + return fi, nil } - reader, err := f.Reader() + fi.size, err = f.storer.EncodedObjectSize(entry.Hash) // https://github.com/go-git/go-git/blob/v5.11.0/plumbing/object/tree.go#L92 if err != nil { - return nil, err + return nil, fmt.Errorf("Size(%q): %w", fi.name, err) } - return gitFSFile{ - stat: gitFSFileInfo{ - file: f, - }, - reader: reader, - }, nil + if followSymlinks { + // TODO this should probably be an explicit loop (instead of implicit recursion) with some upper nesting limit? (symlink to symlink to symlink to ...; possibly even in an infinite cycle because symlinks) + if target, err := fi.resolveLink(); err != nil { + return nil, err + } else if target != "" { + return f.stat(target, followSymlinks) + } + } + + return fi, nil +} + +// https://pkg.go.dev/io/fs#FS +func (f gitFSFS) Open(name string) (fs.File, error) { + pathErr := &fs.PathError{ + Op: "open", + Path: name, + } + if !fs.ValidPath(name) { + pathErr.Err = fs.ErrInvalid + return nil, pathErr + } + + var fi *gitFS + fi, pathErr.Err = f.stat(name, true) + if pathErr.Err != nil { + return nil, pathErr + } + + if fi.IsDir() { + fi.walker = goGitPlumbingObject.NewTreeWalker(fi.tree, false, nil) + return fi, nil + } + + var file *goGitPlumbingObject.File + file, err := fi.tree.TreeEntryFile(fi.entry) + if err != nil { + pathErr.Err = fmt.Errorf("Tree(%q).TreeEntryFile(%q): %w", f.name, fi.name, err) + return nil, pathErr + } + + fi.reader, err = file.Reader() + if err != nil { + pathErr.Err = fmt.Errorf("File(%q).Reader(): %w", fi.name, err) + return nil, pathErr + } + + return fi, nil } // https://pkg.go.dev/io/fs#StatFS -func (fs gitFS) Stat(name string) (fs.FileInfo, error) { - f, err := fs.commit.File(name) +func (f gitFSFS) Stat(name string) (fs.FileInfo, error) { + fi, err := f.stat(name, true) if err != nil { - return nil, err + return nil, &fs.PathError{ + Op: "stat", + Path: name, + Err: err, + } } + return fi, nil +} - if target, err := resolveSymlink(f); err != nil { - return nil, err - } else if target != "" { - return fs.Stat(target) +// https://github.com/golang/go/issues/49580 ("type ReadLinkFS interface") +func (f gitFSFS) ReadLink(name string) (string, error) { + fi, err := f.stat(name, false) + if err != nil { + return "", &fs.PathError{ + Op: "readlink", + Path: name, + Err: err, + } + } + isLink, target, err := fi.readLink() + if err != nil { + return "", &fs.PathError{ + Op: "readlink", + Path: name, + Err: err, + } } + if !isLink { + return "", &fs.PathError{ + Op: "readlink", + Path: name, + Err: fmt.Errorf("not a symlink"), + } + } + return target, nil +} - return gitFSFileInfo{ - file: f, - }, nil +// https://pkg.go.dev/io/fs#SubFS +func (f gitFS) Sub(dir string) (fs.FS, error) { + fi, err := f.stat(dir, true) + if err != nil { + return nil, err + } + if !fi.IsDir() { + return nil, fmt.Errorf("%q is not a directory", fi.name) + } + return gitFSFS{gitFS: fi}, nil } // https://pkg.go.dev/io/fs#File -type gitFSFile struct { - stat fs.FileInfo - reader io.ReadCloser +func (f gitFS) Stat() (fs.FileInfo, error) { + return f, nil } -func (f gitFSFile) Stat() (fs.FileInfo, error) { - return f.stat, nil -} -func (f gitFSFile) Read(b []byte) (int, error) { +// https://pkg.go.dev/io/fs#File +func (f gitFS) Read(b []byte) (int, error) { + if f.reader == nil { + return 0, fmt.Errorf("%q not open (or not a file)", f.name) + } return f.reader.Read(b) } -func (f gitFSFile) Close() error { - return f.reader.Close() + +// https://pkg.go.dev/io/fs#File +func (f gitFS) Close() error { + if f.reader != nil { + if err := f.reader.Close(); err != nil { + return err + } + } + if f.walker != nil { + f.walker.Close() // returns no error, nothing 🤔 + } + return nil } -type gitFSFileInfo struct { - file *goGitPlumbingObject.File +// https://pkg.go.dev/io/fs#ReadDirFile +func (f gitFS) ReadDir(n int) ([]fs.DirEntry, error) { + if f.walker == nil { + return nil, fmt.Errorf("%q not open (or not a directory)", f.name) + } + ret := []fs.DirEntry{} + for i := 0; n <= 0 || i < n; i++ { + name, entry, err := f.walker.Next() + if err != nil { + if err == io.EOF && n <= 0 { + // "In this case, if ReadDir succeeds (reads all the way to the end of the directory), it returns the slice and a nil error." + break + } + return ret, err + } + fi, err := f.statEntry(name, &entry, false) + if err != nil { + return ret, err + } + ret = append(ret, fi) + } + return ret, nil } -// base name of the file -func (fi gitFSFileInfo) Name() string { - return path.Base(fi.file.Name) +// https://pkg.go.dev/io/fs#FileInfo: base name of the file +func (f gitFS) Name() string { + return path.Base(f.name) // this should be the same as f.entry.Name (except in the case of the top-level / root) } -// length in bytes for regular files; system-dependent for others -func (fi gitFSFileInfo) Size() int64 { - return fi.file.Size +// https://pkg.go.dev/io/fs#FileInfo: length in bytes for regular files; system-dependent for others +func (f gitFS) Size() int64 { + return f.size } -// file mode bits -func (fi gitFSFileInfo) Mode() fs.FileMode { +// https://pkg.go.dev/io/fs#FileInfo: file mode bits +func (f gitFS) Mode() fs.FileMode { // https://pkg.go.dev/github.com/go-git/go-git/v5@v5.4.2/plumbing/filemode#FileMode // https://pkg.go.dev/io/fs#FileMode - switch fi.file.Mode { + if f.entry == nil { + // "." at the top-level of the repository is a directory + return 0775 | fs.ModeDir + } + switch f.entry.Mode { case goGitPlumbingFileMode.Regular: - return 0644 + return 0664 case goGitPlumbingFileMode.Symlink: - return 0644 | fs.ModeSymlink + return 0777 | fs.ModeSymlink case goGitPlumbingFileMode.Executable: - return 0755 + return 0775 case goGitPlumbingFileMode.Dir: - return 0755 | fs.ModeDir + return 0775 | fs.ModeDir } return 0 | fs.ModeIrregular // TODO what to do for files whose types we don't support? 😬 } -// modification time -func (fi gitFSFileInfo) ModTime() time.Time { +// https://pkg.go.dev/io/fs#FileInfo: modification time +func (f gitFS) ModTime() time.Time { return time.Time{} // TODO maybe pass down whichever is more recent of commit.Author.When vs commit.Committer.When ? } -// abbreviation for Mode().IsDir() -func (fi gitFSFileInfo) IsDir() bool { - return fi.file.Mode == goGitPlumbingFileMode.Dir +// https://pkg.go.dev/io/fs#FileInfo: abbreviation for Mode().IsDir() +func (f gitFS) IsDir() bool { + return f.Mode().IsDir() +} + +// https://pkg.go.dev/io/fs#FileInfo: underlying data source (can return nil) +func (f gitFS) Sys() interface{} { + return nil +} + +// https://pkg.go.dev/io/fs#DirEntry +func (f gitFS) Type() fs.FileMode { + return f.Mode().Type() } -// underlying data source (can return nil) -func (fi gitFSFileInfo) Sys() interface{} { - return fi.file +// https://pkg.go.dev/io/fs#DirEntry +func (f gitFS) Info() (fs.FileInfo, error) { + return f, nil } diff --git a/pkg/gitfs/fs_test.go b/pkg/gitfs/fs_test.go index 57f48b9c..e1bb1836 100644 --- a/pkg/gitfs/fs_test.go +++ b/pkg/gitfs/fs_test.go @@ -3,7 +3,7 @@ package gitfs_test import ( "io" "testing" - // TODO "testing/fstest" + "testing/fstest" "github.com/docker-library/bashbrew/pkg/gitfs" @@ -12,7 +12,7 @@ import ( ) func TestCommitFS(t *testing.T) { - // TODO instead of cloning a remote repository, synthesize a very simple Git repository right in the test here + // TODO instead of cloning a remote repository, synthesize a very simple Git repository right in the test here (benefit of the remote repository is that it's much larger, so fstest.TestFS has a lot more data to test against) repo, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{ URL: "https://github.com/docker-library/hello-world.git", SingleBranch: true, @@ -20,31 +20,76 @@ func TestCommitFS(t *testing.T) { if err != nil { t.Fatal(err) } - fs, err := gitfs.CommitHash(repo, "480c62c690c0af4427372cf7f0de11da4e00e6c5") + f, err := gitfs.CommitHash(repo, "480c62c690c0af4427372cf7f0de11da4e00e6c5") if err != nil { t.Fatal(err) } - r, err := fs.Open("greetings/hello-world.txt") - if err != nil { - t.Fatal(err) - } - defer func() { - if err := r.Close(); err != nil { + + t.Run("Open+ReadAll", func(t *testing.T) { + r, err := f.Open("greetings/hello-world.txt") + if err != nil { + t.Fatal(err) + } + defer func() { + if err := r.Close(); err != nil { + t.Fatal(err) + } + }() + b, err := io.ReadAll(r) + if err != nil { + t.Fatal(err) + } + expected := "Hello from Docker!\n" + if string(b) != expected { + t.Fatalf("expected %q, got %q", expected, string(b)) + } + }) + + t.Run("fstest.TestFS", func(t *testing.T) { + if err := fstest.TestFS(f, "greetings/hello-world.txt"); err != nil { t.Fatal(err) } - }() - b, err := io.ReadAll(r) + }) +} + +func TestSymlinkFS(t *testing.T) { + // TODO instead of cloning a remote repository, synthesize a very simple Git repository right in the test here (benefit of the remote repository is that it's much larger, so fstest.TestFS has a lot more data to test against) + repo, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{ + URL: "https://github.com/tianon/gosu.git", // just a repository with a known symlink (`.dockerignore` -> `.gitignore`) + SingleBranch: true, + }) if err != nil { t.Fatal(err) } - expected := "Hello from Docker!\n" - if string(b) != expected { - t.Fatalf("expected %q, got %q", expected, string(b)) + f, err := gitfs.CommitHash(repo, "b73cc93b6f5b5a045c397ff0f75190e33d853946") + if err != nil { + t.Fatal(err) } - /* - TODO (we have to implement fake directory handling for this to work; it gets ".: Open: file not found" immediately) - if err := fstest.TestFS(fs, "greetings/hello-world.txt"); err != nil { + + t.Run("Open+ReadAll", func(t *testing.T) { + r, err := f.Open(".dockerignore") + if err != nil { + t.Fatal(err) + } + defer func() { + if err := r.Close(); err != nil { + t.Fatal(err) + } + }() + b, err := io.ReadAll(r) + if err != nil { t.Fatal(err) } - */ + expected := ".git\nSHA256SUMS*\ngosu*\n" + if string(b) != expected { + t.Fatalf("expected %q, got %q", expected, string(b)) + } + }) + + // might as well run fstest again, now that we have a new filesystem tree 😅 + t.Run("fstest.TestFS", func(t *testing.T) { + if err := fstest.TestFS(f, ".dockerignore", "hub/Dockerfile.debian"); err != nil { + t.Fatal(err) + } + }) } diff --git a/pkg/gitfs/tarscrub_test.go b/pkg/gitfs/tarscrub_test.go new file mode 100644 index 00000000..7de592e1 --- /dev/null +++ b/pkg/gitfs/tarscrub_test.go @@ -0,0 +1,44 @@ +package gitfs_test + +import ( + "crypto/sha256" + "fmt" + "io/fs" + + "github.com/docker-library/bashbrew/pkg/gitfs" + "github.com/docker-library/bashbrew/pkg/tarscrub" + + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/storage/memory" +) + +// this example is nice because it has some intentionally dangling symlinks in it that trip things up if they aren't implemented correctly! +// (see also pkg/tarscrub/git_test.go) +func ExampleGitVarnish() { + repo, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{ + URL: "https://github.com/varnish/docker-varnish.git", + SingleBranch: true, + }) + if err != nil { + panic(err) + } + + commit, err := gitfs.CommitHash(repo, "0c295b528f28a98650fb2580eab6d34b30b165c4") + if err != nil { + panic(err) + } + + f, err := fs.Sub(commit, "stable/debian") + if err != nil { + panic(err) + } + + h := sha256.New() + + if err := tarscrub.WriteTar(f, h); err != nil { + panic(err) + } + + fmt.Printf("%x\n", h.Sum(nil)) + // Output: 3aef5ac859b23d65dfe5e9f2a47750e9a32852222829cfba762a870c1473fad6 +} diff --git a/pkg/tarscrub/git_test.go b/pkg/tarscrub/git_test.go new file mode 100644 index 00000000..b84bf2fb --- /dev/null +++ b/pkg/tarscrub/git_test.go @@ -0,0 +1,73 @@ +package tarscrub_test + +import ( + "crypto/sha256" + "fmt" + "io/fs" + + "github.com/docker-library/bashbrew/pkg/gitfs" + "github.com/docker-library/bashbrew/pkg/tarscrub" + + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/storage/memory" +) + +func ExampleGitHello() { + repo, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{ + URL: "https://github.com/docker-library/hello-world.git", + SingleBranch: true, + }) + if err != nil { + panic(err) + } + + commit, err := gitfs.CommitHash(repo, "3fb6ebca4163bf5b9cc496ac3e8f11cb1e754aee") + if err != nil { + panic(err) + } + + f, err := fs.Sub(commit, "amd64/hello-world") + if err != nil { + panic(err) + } + + h := sha256.New() + + if err := tarscrub.WriteTar(f, h); err != nil { + panic(err) + } + + fmt.Printf("%x\n", h.Sum(nil)) + // Output: 22266b0a36deee72428cffd00859ce991f1db101260999c40904ace7d634b788 +} + +// this example is nice because it has some intentionally dangling symlinks in it that trip things up if they aren't implemented correctly! +// (see also pkg/gitfs/tarscrub_test.go) +func ExampleGitVarnish() { + repo, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{ + URL: "https://github.com/varnish/docker-varnish.git", + SingleBranch: true, + }) + if err != nil { + panic(err) + } + + commit, err := gitfs.CommitHash(repo, "0c295b528f28a98650fb2580eab6d34b30b165c4") + if err != nil { + panic(err) + } + + f, err := fs.Sub(commit, "stable/debian") + if err != nil { + panic(err) + } + + h := sha256.New() + + if err := tarscrub.WriteTar(f, h); err != nil { + panic(err) + } + + fmt.Printf("%x\n", h.Sum(nil)) + // Output: 3aef5ac859b23d65dfe5e9f2a47750e9a32852222829cfba762a870c1473fad6 +} diff --git a/pkg/tarscrub/tarscrub.go b/pkg/tarscrub/tarscrub.go new file mode 100644 index 00000000..17b4bf6b --- /dev/null +++ b/pkg/tarscrub/tarscrub.go @@ -0,0 +1,88 @@ +package tarscrub + +import ( + "archive/tar" + "fmt" + "io" + "io/fs" +) + +// TODO create an io/fs that parses a Dockerfile in an io/fs and effectively "filters" the io/fs to only return/include files that are used by that Dockerfile 👀 + +// takes a tar header object and "scrubs" it (uid/gid zeroed, timestamps zeroed) +func ScrubHeader(hdr *tar.Header) *tar.Header { + return &tar.Header{ + Typeflag: hdr.Typeflag, + Name: hdr.Name, + Linkname: hdr.Linkname, + Size: hdr.Size, + Mode: hdr.Mode, + Devmajor: hdr.Devmajor, + Devminor: hdr.Devminor, + } +} + +// this writes a "scrubbed" tarball to the given io.Writer (uid/gid zeroed, timestamps zeroed) +func WriteTar(f fs.FS, w io.Writer) error { + tw := tar.NewWriter(w) + defer tw.Flush() // note: flush instead of close to avoid the empty block at EOF + + // https://github.com/golang/go/blob/go1.22rc1/src/archive/tar/writer.go#L408-L443 + // https://cs.opensource.google/go/go/+/go1.22rc1:src/archive/tar/writer.go;l=411 + return fs.WalkDir(f, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return fmt.Errorf("%q: %w", path, err) + } + // TODO add more context to more errors + + if path == "." { + // skip "." to match "git archive" behavior -- TODO this should be optional somehow + return nil + } + + info, err := d.Info() + if err != nil { + return err + } + + hdr, err := tar.FileInfoHeader(info, "") + if err != nil { + return err + } + hdr.Name = path + if info.IsDir() { + hdr.Name += "/" + } + + if info.Mode()&fs.ModeSymlink != 0 { + // https://github.com/golang/go/issues/49580 ("type ReadLinkFS interface") + if readlinkFS, ok := f.(interface { + ReadLink(name string) (string, error) + }); ok { + hdr.Linkname, err = readlinkFS.ReadLink(path) + if err != nil { + return err + } + } else { + return fmt.Errorf("filesystem contains symlinks but does not implement ReadLinkFS (needed for symlink %q)", path) + } + } + + newHdr := ScrubHeader(hdr) + if err := tw.WriteHeader(newHdr); err != nil { + return err + } + + if info.IsDir() || hdr.Linkname != "" { + return nil + } + + file, err := f.Open(path) + if err != nil { + return err + } + defer file.Close() + _, err = io.Copy(tw, file) + return err + }) +}