-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix bug in extracting hardlinks #284
Changes from 1 commit
3d128d3
2808197
881708b
84787f4
69b9e92
ff09a44
d013c30
5ac7822
28766cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM alpine@sha256:5ce5f501c457015c4b91f91a15ac69157d9b06f1a75cf9107bf2b62e0843983a AS stage1 | ||
RUN apk --no-cache add git | ||
# Test removing a file and symlinking it | ||
RUN rm /usr/bin/git && ln -s /usr/libexec/git-core/git /usr/bin/git | ||
# Test removing a file which will be symlinked | ||
RUN rm /usr/libexec/git-core/git-clone | ||
# Test changing a file | ||
RUN rm /usr/libexec/git-core/git-diff && echo "something" > /usr/libexec/git-core/git-diff |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
FROM gcr.io/kaniko-test/hardlink-base:latest | ||
RUN ln -s /usr/libexec/git-core/git /usr/bin/git-clone | ||
RUN ls -al /usr/libexec/git-core/git /usr/bin/git /usr/bin/git-clone /usr/libexec/git-core/git-diff | ||
RUN git --version |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,11 @@ var whitelist = []string{ | |
} | ||
var volumeWhitelist = []string{} | ||
|
||
type hardlink struct { | ||
links []*tar.Header | ||
reader io.Reader | ||
} | ||
|
||
func GetFSFromImage(root string, img v1.Image) error { | ||
whitelist, err := fileSystemWhitelist(constants.WhitelistPath) | ||
if err != nil { | ||
|
@@ -55,6 +60,8 @@ func GetFSFromImage(root string, img v1.Image) error { | |
|
||
fs := map[string]struct{}{} | ||
whiteouts := map[string]struct{}{} | ||
symlinks := map[string]struct{}{} | ||
hardlinks := map[string]*hardlink{} | ||
|
||
for i := len(layers) - 1; i >= 0; i-- { | ||
logrus.Infof("Unpacking layer: %d", i) | ||
|
@@ -90,7 +97,6 @@ func GetFSFromImage(root string, img v1.Image) error { | |
logrus.Infof("Not adding %s because it was added by a prior layer", path) | ||
continue | ||
} | ||
|
||
if CheckWhitelist(path) && !checkWhitelistRoot(root) { | ||
logrus.Infof("Not adding %s because it is whitelisted", path) | ||
continue | ||
|
@@ -100,6 +106,27 @@ func GetFSFromImage(root string, img v1.Image) error { | |
logrus.Debugf("skipping symlink from %s to %s because %s is whitelisted", hdr.Linkname, path, hdr.Linkname) | ||
continue | ||
} | ||
symlinks[filepath.Clean(filepath.Join("/", hdr.Name))] = struct{}{} | ||
} | ||
if hdr.Typeflag == tar.TypeLink { | ||
// If linkname no longer exists, extract hardlink as regular file | ||
linkname := filepath.Clean(filepath.Join("/", hdr.Linkname)) | ||
if CheckWhitelist(linkname) { | ||
logrus.Debugf("skipping hardlink from %s to %s because %s is whitelisted", linkname, path, linkname) | ||
continue | ||
} | ||
_, previouslyAdded := fs[linkname] | ||
if previouslyAdded || checkSymlinks(linkname, symlinks) || checkWhiteouts(linkname, whiteouts) { | ||
if h, ok := hardlinks[linkname]; ok { | ||
h.links = append(h.links, hdr) | ||
continue | ||
} | ||
hardlinks[linkname] = &hardlink{ | ||
links: []*tar.Header{hdr}, | ||
reader: tr, | ||
} | ||
continue | ||
} | ||
} | ||
fs[path] = struct{}{} | ||
|
||
|
@@ -108,6 +135,20 @@ func GetFSFromImage(root string, img v1.Image) error { | |
} | ||
} | ||
} | ||
// Process hardlinks | ||
for _, h := range hardlinks { | ||
original := h.links[0] | ||
original.Typeflag = tar.TypeReg | ||
if err := extractFile(root, original, h.reader); err != nil { | ||
return err | ||
} | ||
for _, link := range h.links[1:] { | ||
link.Linkname = original.Name | ||
if err := extractFile(root, link, nil); err != nil { | ||
return err | ||
} | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
|
@@ -208,13 +249,12 @@ func extractFile(dest string, hdr *tar.Header, tr io.Reader) error { | |
} | ||
|
||
case tar.TypeLink: | ||
logrus.Debugf("link from %s to %s", hdr.Linkname, path) | ||
// The base directory for a link may not exist before it is created. | ||
dir := filepath.Dir(path) | ||
if err := os.MkdirAll(dir, 0755); err != nil { | ||
return err | ||
} | ||
if err := os.Symlink(filepath.Clean(filepath.Join("/", hdr.Linkname)), path); err != nil { | ||
if err := os.Link(filepath.Clean(filepath.Join("/", hdr.Linkname)), path); err != nil { | ||
return err | ||
} | ||
|
||
|
@@ -247,6 +287,15 @@ func checkWhiteouts(path string, whiteouts map[string]struct{}) bool { | |
return false | ||
} | ||
|
||
func checkSymlinks(path string, symlinks map[string]struct{}) bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I'm not sure why we need this one if we're checking the other stuff. Are you sure we need it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right, thanks for pointing that out! I'll remove it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we might actually need the symlink check, since we only want to specially hardlink at the end of extraction if the linked file exists but can't be hardlinked (which I think only happens if it's a symlink) If a file was normally extracted in a previous layer than we can extract the file and create the hardlink normally |
||
for sym := range symlinks { | ||
if path == sym { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
func CheckWhitelist(path string) bool { | ||
for _, wl := range whitelist { | ||
if HasFilepathPrefix(path, wl) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, I'd be kinda surprised if saving off the tar reader here works later. Per the docs for
tr.Next
:(emphasis mine)
One problem is that it's basically streaming through the archvie sequentially as it's downloading. It never buffers to disk or RAM, so I don't think you can go "back" to earlier entries in the archive. Tricky.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for pointing this out, fixed.