diff --git a/integration/dockerfiles/Dockerfile_hardlink_base b/integration/dockerfiles/Dockerfile_hardlink_base new file mode 100644 index 0000000000..deaec7d600 --- /dev/null +++ b/integration/dockerfiles/Dockerfile_hardlink_base @@ -0,0 +1,3 @@ +FROM alpine@sha256:5ce5f501c457015c4b91f91a15ac69157d9b06f1a75cf9107bf2b62e0843983a AS stage1 +RUN apk --no-cache add git +RUN rm /usr/bin/git && ln -s /usr/libexec/git-core/git /usr/bin/git diff --git a/integration/dockerfiles/Dockerfile_test_hardlink b/integration/dockerfiles/Dockerfile_test_hardlink new file mode 100644 index 0000000000..7692bde45d --- /dev/null +++ b/integration/dockerfiles/Dockerfile_test_hardlink @@ -0,0 +1,10 @@ +FROM gcr.io/kaniko-test/hardlink-base:latest +RUN ls -al /usr/libexec/git-core/git /usr/bin/git /usr/libexec/git-core/git-diff +RUN stat /usr/bin/git +RUN stat /usr/libexec/git-core/git +RUN git --version +WORKDIR /temp/dir +RUN git init +ADD context/foo foo +RUN git add foo +RUN rm /temp/dir/.git/index diff --git a/integration/dockerfiles/Dockerfile_test_mv_add b/integration/dockerfiles/Dockerfile_test_mv_add index a81e2318a9..d01d8c55db 100644 --- a/integration/dockerfiles/Dockerfile_test_mv_add +++ b/integration/dockerfiles/Dockerfile_test_mv_add @@ -1,4 +1,5 @@ FROM busybox@sha256:1bd6df27274fef1dd36eb529d0f4c8033f61c675d6b04213dd913f902f7cafb5 ADD context/tars /tmp/tars +RUN stat /bin/sh RUN mv /tmp/tars /foo RUN echo "hi" diff --git a/integration/integration_test.go b/integration/integration_test.go index 372922a434..bc0d40d8c4 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -37,9 +37,10 @@ var config = initGCPConfig() var imageBuilder *DockerFileBuilder type gcpConfig struct { - gcsBucket string - imageRepo string - onbuildBaseImage string + gcsBucket string + imageRepo string + onbuildBaseImage string + hardlinkBaseImage string } type imageDetails struct { @@ -65,6 +66,7 @@ func initGCPConfig() *gcpConfig { c.imageRepo = c.imageRepo + "/" } c.onbuildBaseImage = c.imageRepo + "onbuild-base:latest" + c.hardlinkBaseImage = c.imageRepo + "hardlink-base:latest" return &c } @@ -141,6 +143,30 @@ func TestMain(m *testing.M) { os.Exit(1) } + fmt.Println("Building onbuild base image") + buildOnbuildBase := exec.Command("docker", "build", "-t", config.onbuildBaseImage, "-f", "dockerfiles/Dockerfile_onbuild_base", ".") + if err := buildOnbuildBase.Run(); err != nil { + fmt.Printf("error building onbuild base: %v", err) + os.Exit(1) + } + pushOnbuildBase := exec.Command("docker", "push", config.onbuildBaseImage) + if err := pushOnbuildBase.Run(); err != nil { + fmt.Printf("error pushing onbuild base %s: %v", config.onbuildBaseImage, err) + os.Exit(1) + } + + fmt.Println("Building hardlink base image") + buildHardlinkBase := exec.Command("docker", "build", "-t", config.hardlinkBaseImage, "-f", "dockerfiles/Dockerfile_hardlink_base", ".") + if err := buildHardlinkBase.Run(); err != nil { + fmt.Printf("error building hardlink base: %v", err) + os.Exit(1) + } + pushHardlinkBase := exec.Command("docker", "push", config.hardlinkBaseImage) + if err := pushHardlinkBase.Run(); err != nil { + fmt.Printf("error pushing hardlink base %s: %v", config.hardlinkBaseImage, err) + os.Exit(1) + } + dockerfiles, err := FindDockerFiles(dockerfilesPath) if err != nil { fmt.Printf("Coudn't create map of dockerfiles: %s", err) diff --git a/pkg/dockerfile/dockerfile_test.go b/pkg/dockerfile/dockerfile_test.go index d285f25a75..e51eb9e485 100644 --- a/pkg/dockerfile/dockerfile_test.go +++ b/pkg/dockerfile/dockerfile_test.go @@ -32,10 +32,10 @@ func Test_ResolveStages(t *testing.T) { FROM scratch RUN echo hi > /hi - FROM scratch AS second + FROM gcr.io/distroless/base AS second COPY --from=0 /hi /hi2 - FROM scratch + FROM another/image COPY --from=second /hi2 /hi3 ` stages, err := Parse([]byte(dockerfile)) diff --git a/pkg/util/fs_util.go b/pkg/util/fs_util.go index 1a4f958bb9..b1d88ac937 100644 --- a/pkg/util/fs_util.go +++ b/pkg/util/fs_util.go @@ -19,7 +19,9 @@ package util import ( "archive/tar" "bufio" + "bytes" "io" + "io/ioutil" "net/http" "os" "path/filepath" @@ -42,6 +44,11 @@ var whitelist = []string{ } var volumeWhitelist = []string{} +type hardlink struct { + links []*tar.Header + contents []byte +} + func GetFSFromImage(root string, img v1.Image) error { whitelist, err := fileSystemWhitelist(constants.WhitelistPath) if err != nil { @@ -55,6 +62,7 @@ func GetFSFromImage(root string, img v1.Image) error { fs := map[string]struct{}{} whiteouts := map[string]struct{}{} + hardlinks := map[string]*hardlink{} for i := len(layers) - 1; i >= 0; i-- { logrus.Infof("Unpacking layer: %d", i) @@ -63,7 +71,14 @@ func GetFSFromImage(root string, img v1.Image) error { if err != nil { return err } - tr := tar.NewReader(r) + layerContents, err := ioutil.ReadAll(r) + if err != nil { + return err + } + if err := retrieveHardlinks(layerContents, hardlinks); err != nil { + return err + } + tr := tar.NewReader(bytes.NewReader(layerContents)) for { hdr, err := tr.Next() if err == io.EOF { @@ -72,6 +87,10 @@ func GetFSFromImage(root string, img v1.Image) error { if err != nil { return err } + contents, err := ioutil.ReadAll(tr) + if err != nil { + return err + } path := filepath.Join(root, filepath.Clean(hdr.Name)) base := filepath.Base(path) dir := filepath.Dir(path) @@ -81,6 +100,9 @@ func GetFSFromImage(root string, img v1.Image) error { whiteouts[filepath.Join(dir, name)] = struct{}{} continue } + if err := resolveHardlink(hdr, contents, hardlinks); err != nil { + return err + } if checkWhiteouts(path, whiteouts) { logrus.Infof("Not adding %s because it is whited out", path) @@ -108,11 +130,93 @@ func GetFSFromImage(root string, img v1.Image) error { continue } } + if hdr.Typeflag == tar.TypeLink { + continue + } + fs[path] = struct{}{} - if err := extractFile(root, hdr, tr); err != nil { + if err := extractFile(root, hdr, bytes.NewReader(contents)); err != nil { + return err + } + } + } + // Process hardlinks + for k, h := range hardlinks { + if regularFile(k) { + for _, link := range h.links { + if err := extractFile(root, link, nil); err != nil { + return err + } + } + continue + } + original := h.links[0] + original.Typeflag = tar.TypeReg + if err := extractFile(root, original, bytes.NewReader(h.contents)); err != nil { + return err + } + for _, link := range h.links[1:] { + link.Linkname = original.Name + if err := extractFile(root, link, nil); err != nil { + return err + } + } + } + return nil +} + +func regularFile(fp string) bool { + fi, err := os.Stat(fp) + if err != nil { + return false + } + return fi.Mode().IsRegular() +} + +func retrieveHardlinks(layerContents []byte, hardlinks map[string]*hardlink) error { + tr := tar.NewReader(bytes.NewReader(layerContents)) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + if hdr.Typeflag == tar.TypeLink { + // If linkname no longer exists, extract hardlink as regular file + linkname := filepath.Clean(filepath.Join("/", hdr.Linkname)) + whitelisted, err := CheckWhitelist(linkname) + if err != nil { return err } + if whitelisted { + continue + } + if h, ok := hardlinks[linkname]; ok { + h.links = append(h.links, hdr) + continue + } + hardlinks[linkname] = &hardlink{ + links: []*tar.Header{hdr}, + } + continue + } + } + return nil +} + +func resolveHardlink(hdr *tar.Header, contents []byte, hardlinks map[string]*hardlink) error { + for k, h := range hardlinks { + if h.contents != nil { + return nil + } + if hdr.Typeflag != tar.TypeReg { + continue + } + if k == filepath.Clean(filepath.Join("/", hdr.Name)) { + h.contents = contents } } return nil @@ -219,13 +323,12 @@ func extractFile(dest string, hdr *tar.Header, tr io.Reader) error { } case tar.TypeLink: - logrus.Debugf("link from %s to %s", hdr.Linkname, path) // The base directory for a link may not exist before it is created. dir := filepath.Dir(path) if err := os.MkdirAll(dir, 0755); err != nil { return err } - if err := os.Symlink(filepath.Clean(filepath.Join("/", hdr.Linkname)), path); err != nil { + if err := os.Link(filepath.Clean(filepath.Join("/", hdr.Linkname)), path); err != nil { return err } diff --git a/pkg/util/fs_util_test.go b/pkg/util/fs_util_test.go index 2ffd0f61df..3810f5d550 100644 --- a/pkg/util/fs_util_test.go +++ b/pkg/util/fs_util_test.go @@ -343,7 +343,7 @@ func fileExists(p string) checker { return func(root string, t *testing.T) { _, err := os.Stat(filepath.Join(root, p)) if err != nil { - t.Fatalf("File does not exist") + t.Fatalf("File %s does not exist", filepath.Join(root, p)) } } } @@ -385,6 +385,24 @@ func linkPointsTo(src, dst string) checker { } } +func filesAreHardlinks(first, second string) checker { + return func(root string, t *testing.T) { + fi1, err := os.Stat(filepath.Join(root, first)) + if err != nil { + t.Fatalf("error getting file %s", first) + } + fi2, err := os.Stat(filepath.Join(second)) + if err != nil { + t.Fatalf("error getting file %s", second) + } + stat1 := getSyscallStat_t(fi1) + stat2 := getSyscallStat_t(fi2) + if stat1.Ino != stat2.Ino { + t.Errorf("%s and %s aren't hardlinks as they dont' have the same inode", first, second) + } + } +} + func fileHeader(name string, contents string, mode int64) *tar.Header { return &tar.Header{ Name: name, @@ -429,6 +447,7 @@ func TestExtractFile(t *testing.T) { type tc struct { name string hdrs []*tar.Header + tmpdir string contents []byte checkers []checker } @@ -500,13 +519,15 @@ func TestExtractFile(t *testing.T) { }, }, { - name: "hardlink", + name: "hardlink", + tmpdir: "/tmp/hardlink", hdrs: []*tar.Header{ fileHeader("/bin/gzip", "gzip-binary", 0751), - hardlinkHeader("/bin/uncompress", "/bin/gzip"), + hardlinkHeader("/bin/uncompress", "/tmp/hardlink/bin/gzip"), }, checkers: []checker{ - linkPointsTo("/bin/uncompress", "/bin/gzip"), + fileExists("/bin/gzip"), + filesAreHardlinks("/bin/uncompress", "/tmp/hardlink/bin/gzip"), }, }, } @@ -515,11 +536,19 @@ func TestExtractFile(t *testing.T) { t.Run(tc.name, func(t *testing.T) { tc := tc t.Parallel() - r, err := ioutil.TempDir("", "") - if err != nil { - t.Fatal(err) + r := "" + var err error + + if tc.tmpdir != "" { + r = tc.tmpdir + } else { + r, err = ioutil.TempDir("", "") + if err != nil { + t.Fatal(err) + } } defer os.RemoveAll(r) + for _, hdr := range tc.hdrs { if err := extractFile(r, hdr, bytes.NewReader(tc.contents)); err != nil { t.Fatal(err) diff --git a/pkg/util/tar_util.go b/pkg/util/tar_util.go index f95574deb5..64879790fa 100644 --- a/pkg/util/tar_util.go +++ b/pkg/util/tar_util.go @@ -118,24 +118,32 @@ func (t *Tar) Whiteout(p string) error { func (t *Tar) checkHardlink(p string, i os.FileInfo) (bool, string) { hardlink := false linkDst := "" - if sys := i.Sys(); sys != nil { - if stat, ok := sys.(*syscall.Stat_t); ok { - nlinks := stat.Nlink - if nlinks > 1 { - inode := stat.Ino - if original, exists := t.hardlinks[inode]; exists && original != p { - hardlink = true - logrus.Debugf("%s inode exists in hardlinks map, linking to %s", p, original) - linkDst = original - } else { - t.hardlinks[inode] = p - } + stat := getSyscallStat_t(i) + if stat != nil { + nlinks := stat.Nlink + if nlinks > 1 { + inode := stat.Ino + if original, exists := t.hardlinks[inode]; exists && original != p { + hardlink = true + logrus.Debugf("%s inode exists in hardlinks map, linking to %s", p, original) + linkDst = original + } else { + t.hardlinks[inode] = p } } } return hardlink, linkDst } +func getSyscallStat_t(i os.FileInfo) *syscall.Stat_t { + if sys := i.Sys(); sys != nil { + if stat, ok := sys.(*syscall.Stat_t); ok { + return stat + } + } + return nil +} + // UnpackLocalTarArchive unpacks the tar archive at path to the directory dest // Returns true if the path was actually unpacked func UnpackLocalTarArchive(path, dest string) error {