From 091b5682bfa2314cdb96306455c3298e2f0fff59 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Tue, 10 Nov 2015 19:10:52 +0100 Subject: [PATCH] handle hard links and whiteouts correctly Hard links and whiteout were not handled correctly in this scenario: - The first Docker layer contains: - file A - file B hard link to file A - The second Docker layer contains: - whiteout file A Then, the squashed ACI previously generated by docker2aci contained: - file B dangling hard link to file A Hence the bug. This patch changes: - the converting algorithm lib/common/common.go:writeACI() to generate: - First docker layer - .hidden.docker2aci.sha512-xxx (hash of the layer and file name) - file A hard link to .hidden.docker2aci.sha512-xxx - file B hard link to .hidden.docker2aci.sha512-xxx - Second docker layer - whiteout file A - the squashing algorithm lib/docker2aci.go:SquashLayers() to have two passes: - Pass one: build an in-memory map of hard links and whiteouts - Pass two: remove white-out and .hidden.docker2aci.sha512-xxx files I tested the following images from https://github.com/coreos/rkt/issues/1653: - docker://albanc/busybox-hardlinks - docker://zopyx/xmldirector-plone TODO: - not optimized if there are no hard links - semantic changes on --nosquash Fixes https://github.com/appc/docker2aci/issues/98 --- lib/common/common.go | 28 ++++++++++- lib/docker2aci.go | 107 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 120 insertions(+), 15 deletions(-) diff --git a/lib/common/common.go b/lib/common/common.go index c6f0e25f..9bb8c21f 100644 --- a/lib/common/common.go +++ b/lib/common/common.go @@ -3,6 +3,7 @@ package common import ( "archive/tar" "compress/gzip" + "crypto/sha512" "encoding/json" "fmt" "io" @@ -310,6 +311,16 @@ func convertVolumesToMPs(dockerVolumes map[string]struct{}) ([]appctypes.MountPo return mps, nil } +func getIdFromName(layer, target string) string { + h := sha512.New() + h.Write([]byte(layer)) + h.Write([]byte{0}) + h.Write([]byte(target)) + h.Write([]byte{0}) + + return fmt.Sprintf(".hidden.docker2aci.sha512-%x", h.Sum(nil)) +} + func writeACI(layer io.ReadSeeker, manifest schema.ImageManifest, curPwl []string, output string, compress bool) (*schema.ImageManifest, error) { aciFile, err := os.Create(output) if err != nil { @@ -348,8 +359,23 @@ func writeACI(layer io.ReadSeeker, manifest schema.ImageManifest, curPwl []strin whiteouts = append(whiteouts, strings.Replace(absolutePath, ".wh.", "", 1)) return nil } + if t.Header.Typeflag == tar.TypeLink { - t.Header.Linkname = path.Join("rootfs", t.Linkname()) + hash := getIdFromName(manifest.Name.String(), t.Linkname()) + t.Header.Linkname = path.Join("rootfs", hash) + } else if t.Header.Typeflag == tar.TypeReg { + var newHeader tar.Header = *t.Header + linkname := path.Join("rootfs", getIdFromName(manifest.Name.String(), name)) + newHeader.Name = linkname + if err := trw.WriteHeader(&newHeader); err != nil { + return err + } + if _, err := io.Copy(trw, t.TarStream); err != nil { + return err + } + t.Header.Typeflag = tar.TypeLink + t.Header.Linkname = linkname + t.Header.Size = 0 } if err := trw.WriteHeader(t.Header); err != nil { diff --git a/lib/docker2aci.go b/lib/docker2aci.go index 3c44f5d1..30863627 100644 --- a/lib/docker2aci.go +++ b/lib/docker2aci.go @@ -230,6 +230,32 @@ func writeSquashedImage(outputFile *os.File, renderedACI acirenderer.RenderedACI outputWriter := tar.NewWriter(gw) defer outputWriter.Close() + finalManifest := mergeManifests(manifests) + + if err := common.WriteManifest(outputWriter, finalManifest); err != nil { + return err + } + + if err := common.WriteRootfsDir(outputWriter); err != nil { + return err + } + + type hardLinkEntry struct { + firstLinkCleanName string + firstLinkHeader tar.Header + walked bool + } + hardLinks := make(map[string]hardLinkEntry) + + type tempEntry struct { + cleanName string + keep bool + firstLink bool + } + var entries []tempEntry + + // first pass: read all the entries and build tempEntry, hardLinks in memory + // but don't write on disk for _, aciFile := range renderedACI { rs, err := aciProvider.ReadStream(aciFile.Key) if err != nil { @@ -239,19 +265,78 @@ func writeSquashedImage(outputFile *os.File, renderedACI acirenderer.RenderedACI squashWalker := func(t *tarball.TarFile) error { cleanName := filepath.Clean(t.Name()) - - if _, ok := aciFile.FileMap[cleanName]; ok { - // we generate and add rootfs and the squashed manifest later - if cleanName == "manifest" || cleanName == "rootfs" { - return nil + // the rootfs and the squashed manifest are added separately + if cleanName == "manifest" || cleanName == "rootfs" { + return nil + } + _, keep := aciFile.FileMap[cleanName] + if keep && t.Header.Typeflag == tar.TypeLink { + cleanTarget := filepath.Clean(t.Linkname()) + if _, ok := hardLinks[cleanTarget]; !ok { + hardLinks[cleanTarget] = hardLinkEntry{cleanName, *t.Header, false} } - if err := outputWriter.WriteHeader(t.Header); err != nil { + } + entries = append(entries, tempEntry{cleanName, keep, false}) + return nil + } + + tr := tar.NewReader(rs) + if err := tarball.Walk(*tr, squashWalker); err != nil { + return err + } + } + + // second pass: write on disk + currentEntry := 0 + for _, aciFile := range renderedACI { + rs, err := aciProvider.ReadStream(aciFile.Key) + if err != nil { + return err + } + defer rs.Close() + + squashWalker := func(t *tarball.TarFile) error { + cleanName := filepath.Clean(t.Name()) + // the rootfs and the squashed manifest are added separately + if cleanName == "manifest" || cleanName == "rootfs" { + return nil + } + + if link, ok := hardLinks[cleanName]; ok { + link.firstLinkHeader.Size = t.Header.Size + link.firstLinkHeader.Typeflag = t.Header.Typeflag + link.firstLinkHeader.Linkname = "" + + if err := outputWriter.WriteHeader(&link.firstLinkHeader); err != nil { return fmt.Errorf("error writing header: %v", err) } if _, err := io.Copy(outputWriter, t.TarStream); err != nil { return fmt.Errorf("error copying file into the tar out: %v", err) } + } else if entries[currentEntry].keep { + if t.Header.Typeflag == tar.TypeLink { + cleanTarget := filepath.Clean(t.Linkname()) + if link, ok := hardLinks[cleanTarget]; ok { + if !link.walked { + entries[currentEntry].firstLink = true + } else { + t.Header.Linkname = link.firstLinkCleanName + } + link.walked = true + hardLinks[cleanTarget] = link + } + } + + if !entries[currentEntry].firstLink { + if err := outputWriter.WriteHeader(t.Header); err != nil { + return fmt.Errorf("error writing header: %v", err) + } + if _, err := io.Copy(outputWriter, t.TarStream); err != nil { + return fmt.Errorf("error copying file into the tar out: %v", err) + } + } } + currentEntry++ return nil } @@ -261,14 +346,8 @@ func writeSquashedImage(outputFile *os.File, renderedACI acirenderer.RenderedACI } } - if err := common.WriteRootfsDir(outputWriter); err != nil { - return err - } - - finalManifest := mergeManifests(manifests) - - if err := common.WriteManifest(outputWriter, finalManifest); err != nil { - return err + if len(entries) != currentEntry { + panic(fmt.Sprintf("found %d tarball entries in the first iteration and %d in the second", len(entries), currentEntry)) } return nil