Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug in extracting hardlinks #284

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions integration/dockerfiles/Dockerfile_hardlink_base
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM alpine@sha256:5ce5f501c457015c4b91f91a15ac69157d9b06f1a75cf9107bf2b62e0843983a AS stage1
RUN apk --no-cache add git
RUN rm /usr/bin/git && ln -s /usr/libexec/git-core/git /usr/bin/git
10 changes: 10 additions & 0 deletions integration/dockerfiles/Dockerfile_test_hardlink
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM gcr.io/kaniko-test/hardlink-base:latest
RUN ls -al /usr/libexec/git-core/git /usr/bin/git /usr/libexec/git-core/git-diff
RUN stat /usr/bin/git
RUN stat /usr/libexec/git-core/git
RUN git --version
WORKDIR /temp/dir
RUN git init
ADD context/foo foo
RUN git add foo
RUN rm /temp/dir/.git/index
1 change: 1 addition & 0 deletions integration/dockerfiles/Dockerfile_test_mv_add
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM busybox@sha256:1bd6df27274fef1dd36eb529d0f4c8033f61c675d6b04213dd913f902f7cafb5
ADD context/tars /tmp/tars
RUN stat /bin/sh
RUN mv /tmp/tars /foo
RUN echo "hi"
32 changes: 29 additions & 3 deletions integration/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ var config = initGCPConfig()
var imageBuilder *DockerFileBuilder

type gcpConfig struct {
gcsBucket string
imageRepo string
onbuildBaseImage string
gcsBucket string
imageRepo string
onbuildBaseImage string
hardlinkBaseImage string
}

type imageDetails struct {
Expand All @@ -65,6 +66,7 @@ func initGCPConfig() *gcpConfig {
c.imageRepo = c.imageRepo + "/"
}
c.onbuildBaseImage = c.imageRepo + "onbuild-base:latest"
c.hardlinkBaseImage = c.imageRepo + "hardlink-base:latest"
return &c
}

Expand Down Expand Up @@ -143,6 +145,30 @@ func TestMain(m *testing.M) {
os.Exit(1)
}

fmt.Println("Building onbuild base image")
buildOnbuildBase := exec.Command("docker", "build", "-t", config.onbuildBaseImage, "-f", "dockerfiles/Dockerfile_onbuild_base", ".")
if err := buildOnbuildBase.Run(); err != nil {
fmt.Printf("error building onbuild base: %v", err)
os.Exit(1)
}
pushOnbuildBase := exec.Command("docker", "push", config.onbuildBaseImage)
if err := pushOnbuildBase.Run(); err != nil {
fmt.Printf("error pushing onbuild base %s: %v", config.onbuildBaseImage, err)
os.Exit(1)
}

fmt.Println("Building hardlink base image")
buildHardlinkBase := exec.Command("docker", "build", "-t", config.hardlinkBaseImage, "-f", "dockerfiles/Dockerfile_hardlink_base", ".")
if err := buildHardlinkBase.Run(); err != nil {
fmt.Printf("error building hardlink base: %v", err)
os.Exit(1)
}
pushHardlinkBase := exec.Command("docker", "push", config.hardlinkBaseImage)
if err := pushHardlinkBase.Run(); err != nil {
fmt.Printf("error pushing hardlink base %s: %v", config.hardlinkBaseImage, err)
os.Exit(1)
}

dockerfiles, err := FindDockerFiles(dockerfilesPath)
if err != nil {
fmt.Printf("Coudn't create map of dockerfiles: %s", err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/dockerfile/dockerfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func SaveStage(index int, stages []instructions.Stage) bool {
if stageIndex <= index {
continue
}
if stage.Name == stages[index].BaseName {
if stage.BaseName == stages[index].Name {
return true
}
for _, cmd := range stage.Commands {
Expand Down
4 changes: 2 additions & 2 deletions pkg/dockerfile/dockerfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ func Test_ResolveStages(t *testing.T) {
FROM scratch
RUN echo hi > /hi

FROM scratch AS second
FROM gcr.io/distroless/base AS second
COPY --from=0 /hi /hi2

FROM scratch
FROM another/image
COPY --from=second /hi2 /hi3
`
stages, err := Parse([]byte(dockerfile))
Expand Down
110 changes: 106 additions & 4 deletions pkg/util/fs_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ package util
import (
"archive/tar"
"bufio"
"bytes"
"io"
"io/ioutil"
"net/http"
"os"
"path/filepath"
Expand All @@ -42,6 +44,11 @@ var whitelist = []string{
}
var volumeWhitelist = []string{}

type hardlink struct {
links []*tar.Header
contents []byte
}

func GetFSFromImage(root string, img v1.Image) error {
whitelist, err := fileSystemWhitelist(constants.WhitelistPath)
if err != nil {
Expand All @@ -55,6 +62,10 @@ func GetFSFromImage(root string, img v1.Image) error {

fs := map[string]struct{}{}
whiteouts := map[string]struct{}{}
hardlinks, err := retrieveHardlinks(layers)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it would make sense to process each layer separately instead of doing them all at once? I think we already disallow hardlinks to cross layer boundaries.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yah I think that would be better, I changed it to retrieve hardlinks as we process layers

if err != nil {
return err
}

for i := len(layers) - 1; i >= 0; i-- {
logrus.Infof("Unpacking layer: %d", i)
Expand All @@ -72,6 +83,10 @@ func GetFSFromImage(root string, img v1.Image) error {
if err != nil {
return err
}
contents, err := ioutil.ReadAll(tr)
if err != nil {
return err
}
path := filepath.Join(root, filepath.Clean(hdr.Name))
base := filepath.Base(path)
dir := filepath.Dir(path)
Expand All @@ -81,6 +96,9 @@ func GetFSFromImage(root string, img v1.Image) error {
whiteouts[filepath.Join(dir, name)] = struct{}{}
continue
}
if err := resolveHardlink(hdr, contents, hardlinks); err != nil {
return err
}

if checkWhiteouts(path, whiteouts) {
logrus.Infof("Not adding %s because it is whited out", path)
Expand All @@ -90,7 +108,6 @@ func GetFSFromImage(root string, img v1.Image) error {
logrus.Infof("Not adding %s because it was added by a prior layer", path)
continue
}

if CheckWhitelist(path) && !checkWhitelistRoot(root) {
logrus.Infof("Not adding %s because it is whitelisted", path)
continue
Expand All @@ -101,16 +118,102 @@ func GetFSFromImage(root string, img v1.Image) error {
continue
}
}
if hdr.Typeflag == tar.TypeLink {
continue
}

fs[path] = struct{}{}

if err := extractFile(root, hdr, tr); err != nil {
if err := extractFile(root, hdr, bytes.NewReader(contents)); err != nil {
return err
}
}
}
// Process hardlinks
for k, h := range hardlinks {
if regularFile(k) {
for _, link := range h.links {
if err := extractFile(root, link, bytes.NewReader(h.contents)); err != nil {
return err
}
}
continue
}
original := h.links[0]
original.Typeflag = tar.TypeReg
if err := extractFile(root, original, bytes.NewReader(h.contents)); err != nil {
return err
}
for _, link := range h.links[1:] {
link.Linkname = original.Name
if err := extractFile(root, link, nil); err != nil {
return err
}
}
}
return nil
}

func regularFile(fp string) bool {
fi, err := os.Stat(fp)
if err != nil {
return false
}
return fi.Mode().IsRegular()
}

func retrieveHardlinks(layers []v1.Layer) (map[string]*hardlink, error) {
hardlinks := map[string]*hardlink{}
for i := len(layers) - 1; i >= 0; i-- {
l := layers[i]
r, err := l.Uncompressed()
if err != nil {
return nil, err
}
tr := tar.NewReader(r)
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if hdr.Typeflag == tar.TypeLink {
// If linkname no longer exists, extract hardlink as regular file
linkname := filepath.Clean(filepath.Join("/", hdr.Linkname))
if CheckWhitelist(linkname) {
continue
}
if h, ok := hardlinks[linkname]; ok {
h.links = append(h.links, hdr)
continue
}
hardlinks[linkname] = &hardlink{
links: []*tar.Header{hdr},
}
continue
}
}
}
return hardlinks, nil
}

func resolveHardlink(hdr *tar.Header, contents []byte, hardlinks map[string]*hardlink) error {
for k, h := range hardlinks {
if h.contents != nil {
return nil
}
if hdr.Typeflag != tar.TypeReg {
continue
}
if k == filepath.Clean(filepath.Join("/", hdr.Name)) {
h.contents = contents
}
}
return nil
}

// DeleteFilesystem deletes the extracted image file system
func DeleteFilesystem() error {
logrus.Info("Deleting filesystem...")
Expand Down Expand Up @@ -208,13 +311,12 @@ func extractFile(dest string, hdr *tar.Header, tr io.Reader) error {
}

case tar.TypeLink:
logrus.Debugf("link from %s to %s", hdr.Linkname, path)
// The base directory for a link may not exist before it is created.
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, 0755); err != nil {
return err
}
if err := os.Symlink(filepath.Clean(filepath.Join("/", hdr.Linkname)), path); err != nil {
if err := os.Link(filepath.Clean(filepath.Join("/", hdr.Linkname)), path); err != nil {
return err
}

Expand Down
43 changes: 36 additions & 7 deletions pkg/util/fs_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ func fileExists(p string) checker {
return func(root string, t *testing.T) {
_, err := os.Stat(filepath.Join(root, p))
if err != nil {
t.Fatalf("File does not exist")
t.Fatalf("File %s does not exist", filepath.Join(root, p))
}
}
}
Expand Down Expand Up @@ -381,6 +381,24 @@ func linkPointsTo(src, dst string) checker {
}
}

func filesAreHardlinks(first, second string) checker {
return func(root string, t *testing.T) {
fi1, err := os.Stat(filepath.Join(root, first))
if err != nil {
t.Fatalf("error getting file %s", first)
}
fi2, err := os.Stat(filepath.Join(second))
if err != nil {
t.Fatalf("error getting file %s", second)
}
stat1 := getSyscallStat_t(fi1)
stat2 := getSyscallStat_t(fi2)
if stat1.Ino != stat2.Ino {
t.Errorf("%s and %s aren't hardlinks as they dont' have the same inode", first, second)
}
}
}

func fileHeader(name string, contents string, mode int64) *tar.Header {
return &tar.Header{
Name: name,
Expand Down Expand Up @@ -425,6 +443,7 @@ func TestExtractFile(t *testing.T) {
type tc struct {
name string
hdrs []*tar.Header
tmpdir string
contents []byte
checkers []checker
}
Expand Down Expand Up @@ -496,13 +515,15 @@ func TestExtractFile(t *testing.T) {
},
},
{
name: "hardlink",
name: "hardlink",
tmpdir: "/tmp/hardlink",
hdrs: []*tar.Header{
fileHeader("/bin/gzip", "gzip-binary", 0751),
hardlinkHeader("/bin/uncompress", "/bin/gzip"),
hardlinkHeader("/bin/uncompress", "/tmp/hardlink/bin/gzip"),
},
checkers: []checker{
linkPointsTo("/bin/uncompress", "/bin/gzip"),
fileExists("/bin/gzip"),
filesAreHardlinks("/bin/uncompress", "/tmp/hardlink/bin/gzip"),
},
},
}
Expand All @@ -511,11 +532,19 @@ func TestExtractFile(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
tc := tc
t.Parallel()
r, err := ioutil.TempDir("", "")
if err != nil {
t.Fatal(err)
r := ""
var err error

if tc.tmpdir != "" {
r = tc.tmpdir
} else {
r, err = ioutil.TempDir("", "")
if err != nil {
t.Fatal(err)
}
}
defer os.RemoveAll(r)

for _, hdr := range tc.hdrs {
if err := extractFile(r, hdr, bytes.NewReader(tc.contents)); err != nil {
t.Fatal(err)
Expand Down
Loading