Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug in extracting hardlinks #284

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions integration/dockerfiles/Dockerfile_hardlink_base
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM alpine@sha256:5ce5f501c457015c4b91f91a15ac69157d9b06f1a75cf9107bf2b62e0843983a AS stage1
RUN apk --no-cache add git
# Test removing a file and symlinking it
RUN rm /usr/bin/git && ln -s /usr/libexec/git-core/git /usr/bin/git
# Test removing a file which will be symlinked
RUN rm /usr/libexec/git-core/git-clone
# Test changing a file
RUN rm /usr/libexec/git-core/git-diff && echo "something" > /usr/libexec/git-core/git-diff
4 changes: 4 additions & 0 deletions integration/dockerfiles/Dockerfile_test_hardlink
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM gcr.io/kaniko-test/hardlink-base:latest
RUN ln -s /usr/libexec/git-core/git /usr/bin/git-clone
RUN ls -al /usr/libexec/git-core/git /usr/bin/git /usr/bin/git-clone /usr/libexec/git-core/git-diff
RUN git --version
32 changes: 29 additions & 3 deletions integration/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ var config = initGCPConfig()
var imageBuilder *DockerFileBuilder

type gcpConfig struct {
gcsBucket string
imageRepo string
onbuildBaseImage string
gcsBucket string
imageRepo string
onbuildBaseImage string
hardlinkBaseImage string
}

type imageDetails struct {
Expand All @@ -65,6 +66,7 @@ func initGCPConfig() *gcpConfig {
c.imageRepo = c.imageRepo + "/"
}
c.onbuildBaseImage = c.imageRepo + "onbuild-base:latest"
c.hardlinkBaseImage = c.imageRepo + "hardlink-base:latest"
return &c
}

Expand Down Expand Up @@ -143,6 +145,30 @@ func TestMain(m *testing.M) {
os.Exit(1)
}

fmt.Println("Building onbuild base image")
buildOnbuildBase := exec.Command("docker", "build", "-t", config.onbuildBaseImage, "-f", "dockerfiles/Dockerfile_onbuild_base", ".")
if err := buildOnbuildBase.Run(); err != nil {
fmt.Printf("error building onbuild base: %v", err)
os.Exit(1)
}
pushOnbuildBase := exec.Command("docker", "push", config.onbuildBaseImage)
if err := pushOnbuildBase.Run(); err != nil {
fmt.Printf("error pushing onbuild base %s: %v", config.onbuildBaseImage, err)
os.Exit(1)
}

fmt.Println("Building hardlink base image")
buildHardlinkBase := exec.Command("docker", "build", "-t", config.hardlinkBaseImage, "-f", "dockerfiles/Dockerfile_onbuild_base", ".")
if err := buildHardlinkBase.Run(); err != nil {
fmt.Printf("error building hardlink base: %v", err)
os.Exit(1)
}
pushHardlinkBase := exec.Command("docker", "push", config.hardlinkBaseImage)
if err := pushHardlinkBase.Run(); err != nil {
fmt.Printf("error pushing hardlink base %s: %v", config.hardlinkBaseImage, err)
os.Exit(1)
}

dockerfiles, err := FindDockerFiles(dockerfilesPath)
if err != nil {
fmt.Printf("Coudn't create map of dockerfiles: %s", err)
Expand Down
4 changes: 3 additions & 1 deletion pkg/dockerfile/dockerfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/moby/buildkit/frontend/dockerfile/instructions"
"github.com/moby/buildkit/frontend/dockerfile/parser"
"github.com/sirupsen/logrus"
)

// Stages reads the Dockerfile, validates it's contents, and returns stages
Expand Down Expand Up @@ -112,11 +113,12 @@ func ParseCommands(cmdArray []string) ([]instructions.Command, error) {

// SaveStage returns true if the current stage will be needed later in the Dockerfile
func SaveStage(index int, stages []instructions.Stage) bool {
logrus.Infof("looking into saving stage %d", index)
for stageIndex, stage := range stages {
if stageIndex <= index {
continue
}
if stage.Name == stages[index].BaseName {
if stage.BaseName == stages[index].Name {
return true
}
for _, cmd := range stage.Commands {
Expand Down
4 changes: 2 additions & 2 deletions pkg/dockerfile/dockerfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ func Test_ResolveStages(t *testing.T) {
FROM scratch
RUN echo hi > /hi

FROM scratch AS second
FROM gcr.io/distroless/base AS second
COPY --from=0 /hi /hi2

FROM scratch
FROM another/image
COPY --from=second /hi2 /hi3
`
stages, err := Parse([]byte(dockerfile))
Expand Down
55 changes: 52 additions & 3 deletions pkg/util/fs_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ var whitelist = []string{
}
var volumeWhitelist = []string{}

type hardlink struct {
links []*tar.Header
reader io.Reader
}

func GetFSFromImage(root string, img v1.Image) error {
whitelist, err := fileSystemWhitelist(constants.WhitelistPath)
if err != nil {
Expand All @@ -55,6 +60,8 @@ func GetFSFromImage(root string, img v1.Image) error {

fs := map[string]struct{}{}
whiteouts := map[string]struct{}{}
symlinks := map[string]struct{}{}
hardlinks := map[string]*hardlink{}

for i := len(layers) - 1; i >= 0; i-- {
logrus.Infof("Unpacking layer: %d", i)
Expand Down Expand Up @@ -90,7 +97,6 @@ func GetFSFromImage(root string, img v1.Image) error {
logrus.Infof("Not adding %s because it was added by a prior layer", path)
continue
}

if CheckWhitelist(path) && !checkWhitelistRoot(root) {
logrus.Infof("Not adding %s because it is whitelisted", path)
continue
Expand All @@ -100,6 +106,27 @@ func GetFSFromImage(root string, img v1.Image) error {
logrus.Debugf("skipping symlink from %s to %s because %s is whitelisted", hdr.Linkname, path, hdr.Linkname)
continue
}
symlinks[filepath.Clean(filepath.Join("/", hdr.Name))] = struct{}{}
}
if hdr.Typeflag == tar.TypeLink {
// If linkname no longer exists, extract hardlink as regular file
linkname := filepath.Clean(filepath.Join("/", hdr.Linkname))
if CheckWhitelist(linkname) {
logrus.Debugf("skipping hardlink from %s to %s because %s is whitelisted", linkname, path, linkname)
continue
}
_, previouslyAdded := fs[linkname]
if previouslyAdded || checkSymlinks(linkname, symlinks) || checkWhiteouts(linkname, whiteouts) {
if h, ok := hardlinks[linkname]; ok {
h.links = append(h.links, hdr)
continue
}
hardlinks[linkname] = &hardlink{
links: []*tar.Header{hdr},
reader: tr,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I'd be kinda surprised if saving off the tar reader here works later. Per the docs for tr.Next:

Next advances to the next entry in the tar archive. The Header.Size determines how many bytes can be read for the next file. Any remaining data in the current file is automatically discarded.

(emphasis mine)

One problem is that it's basically streaming through the archvie sequentially as it's downloading. It never buffers to disk or RAM, so I don't think you can go "back" to earlier entries in the archive. Tricky.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing this out, fixed.

}
continue
}
}
fs[path] = struct{}{}

Expand All @@ -108,6 +135,20 @@ func GetFSFromImage(root string, img v1.Image) error {
}
}
}
// Process hardlinks
for _, h := range hardlinks {
original := h.links[0]
original.Typeflag = tar.TypeReg
if err := extractFile(root, original, h.reader); err != nil {
return err
}
for _, link := range h.links[1:] {
link.Linkname = original.Name
if err := extractFile(root, link, nil); err != nil {
return err
}
}
}
return nil
}

Expand Down Expand Up @@ -208,13 +249,12 @@ func extractFile(dest string, hdr *tar.Header, tr io.Reader) error {
}

case tar.TypeLink:
logrus.Debugf("link from %s to %s", hdr.Linkname, path)
// The base directory for a link may not exist before it is created.
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, 0755); err != nil {
return err
}
if err := os.Symlink(filepath.Clean(filepath.Join("/", hdr.Linkname)), path); err != nil {
if err := os.Link(filepath.Clean(filepath.Join("/", hdr.Linkname)), path); err != nil {
return err
}

Expand Down Expand Up @@ -247,6 +287,15 @@ func checkWhiteouts(path string, whiteouts map[string]struct{}) bool {
return false
}

func checkSymlinks(path string, symlinks map[string]struct{}) bool {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'm not sure why we need this one if we're checking the other stuff. Are you sure we need it?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, thanks for pointing that out! I'll remove it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we might actually need the symlink check, since we only want to specially hardlink at the end of extraction if the linked file exists but can't be hardlinked (which I think only happens if it's a symlink)

If a file was normally extracted in a previous layer than we can extract the file and create the hardlink normally

for sym := range symlinks {
if path == sym {
return true
}
}
return false
}

func CheckWhitelist(path string) bool {
for _, wl := range whitelist {
if HasFilepathPrefix(path, wl) {
Expand Down