diff --git a/drivers/driver.go b/drivers/driver.go index 9aa407168f..4832594ab0 100644 --- a/drivers/driver.go +++ b/drivers/driver.go @@ -167,6 +167,40 @@ type Driver interface { LayerIDMapUpdater } +// DriverWithDifferOutput is the result of ApplyDiffWithDiffer +// This API is experimental and can be changed without bumping the major version number. +type DriverWithDifferOutput struct { + Differ Differ + Target string + Size int64 + UIDs []uint32 + GIDs []uint32 + UncompressedDigest digest.Digest + Metadata string + BigData map[string][]byte +} + +// Differ defines the interface for using a custom differ. +// This API is experimental and can be changed without bumping the major version number. +type Differ interface { + ApplyDiff(dest string, options *archive.TarOptions) (DriverWithDifferOutput, error) +} + +// DriverWithDiffer is the interface for direct diff access. +// This API is experimental and can be changed without bumping the major version number. +type DriverWithDiffer interface { + Driver + // ApplyDiffWithDiffer applies the changes using the callback function. + // If id is empty, then a staging directory is created. The staging directory is guaranteed to be usable with ApplyDiffFromStagingDirectory. + ApplyDiffWithDiffer(id, parent string, options *ApplyDiffOpts, differ Differ) (output DriverWithDifferOutput, err error) + // ApplyDiffFromStagingDirectory applies the changes using the specified staging directory. + ApplyDiffFromStagingDirectory(id, parent, stagingDirectory string, diffOutput *DriverWithDifferOutput, options *ApplyDiffOpts) error + // CleanupStagingDirectory cleanups the staging directory. It can be used to cleanup the staging directory on errors + CleanupStagingDirectory(stagingDirectory string) error + // DifferTarget gets the location where files are stored for the layer. + DifferTarget(id string) (string, error) +} + // Capabilities defines a list of capabilities a driver may implement. // These capabilities are not required; however, they do determine how a // graphdriver can be used. diff --git a/drivers/overlay/overlay.go b/drivers/overlay/overlay.go index 60eb047bd3..0b4ec6bccf 100644 --- a/drivers/overlay/overlay.go +++ b/drivers/overlay/overlay.go @@ -705,6 +705,7 @@ func (d *Driver) Metadata(id string) (map[string]string, error) { // is being shutdown. For now, we just have to unmount the bind mounted // we had created. func (d *Driver) Cleanup() error { + _ = os.RemoveAll(d.getStagingDir()) return mount.Unmount(d.home) } @@ -1490,6 +1491,10 @@ func (f fileGetNilCloser) Close() error { return nil } +func (d *Driver) getStagingDir() string { + return filepath.Join(d.home, "staging") +} + // DiffGetter returns a FileGetCloser that can read files from the directory that // contains files for the layer differences. Used for direct access for tar-split. func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) { @@ -1500,6 +1505,75 @@ func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) { return fileGetNilCloser{storage.NewPathFileGetter(p)}, nil } +// CleanupStagingDirectory cleanups the staging directory. +func (d *Driver) CleanupStagingDirectory(stagingDirectory string) error { + return os.RemoveAll(stagingDirectory) +} + +// ApplyDiff applies the changes in the new layer using the specified function +func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.ApplyDiffOpts, differ graphdriver.Differ) (output graphdriver.DriverWithDifferOutput, err error) { + var idMappings *idtools.IDMappings + if options != nil { + idMappings = options.Mappings + } + if idMappings == nil { + idMappings = &idtools.IDMappings{} + } + + applyDir := "" + + if id == "" { + err := os.MkdirAll(d.getStagingDir(), 0700) + if err != nil && !os.IsExist(err) { + return graphdriver.DriverWithDifferOutput{}, err + } + applyDir, err = ioutil.TempDir(d.getStagingDir(), "") + if err != nil { + return graphdriver.DriverWithDifferOutput{}, err + } + + } else { + var err error + applyDir, err = d.getDiffPath(id) + if err != nil { + return graphdriver.DriverWithDifferOutput{}, err + } + } + + logrus.Debugf("Applying differ in %s", applyDir) + + out, err := differ.ApplyDiff(applyDir, &archive.TarOptions{ + UIDMaps: idMappings.UIDs(), + GIDMaps: idMappings.GIDs(), + IgnoreChownErrors: d.options.ignoreChownErrors, + WhiteoutFormat: d.getWhiteoutFormat(), + InUserNS: rsystem.RunningInUserNS(), + }) + out.Target = applyDir + return out, err +} + +// ApplyDiffFromStagingDirectory applies the changes using the specified staging directory. +func (d *Driver) ApplyDiffFromStagingDirectory(id, parent, stagingDirectory string, diffOutput *graphdriver.DriverWithDifferOutput, options *graphdriver.ApplyDiffOpts) error { + if filepath.Dir(stagingDirectory) != d.getStagingDir() { + return fmt.Errorf("%q is not a staging directory", stagingDirectory) + } + + diff, err := d.getDiffPath(id) + if err != nil { + return err + } + if err := os.RemoveAll(diff); err != nil && !os.IsNotExist(err) { + return err + } + return os.Rename(stagingDirectory, diff) +} + +// DifferTarget gets the location where files are stored for the layer. +func (d *Driver) DifferTarget(id string) (string, error) { + return d.getDiffPath(id) +} + // ApplyDiff applies the new layer into a root func (d *Driver) ApplyDiff(id, parent string, options graphdriver.ApplyDiffOpts) (size int64, err error) { diff --git a/errors.go b/errors.go index 35288d87ad..5fc810b89d 100644 --- a/errors.go +++ b/errors.go @@ -53,4 +53,6 @@ var ( ErrSizeUnknown = types.ErrSizeUnknown // ErrStoreIsReadOnly is returned when the caller makes a call to a read-only store that would require modifying its contents. ErrStoreIsReadOnly = types.ErrStoreIsReadOnly + // ErrNotSupported is returned when the requested functionality is not supported. + ErrNotSupported = types.ErrNotSupported ) diff --git a/layers.go b/layers.go index d398a3ff94..089703e240 100644 --- a/layers.go +++ b/layers.go @@ -247,6 +247,19 @@ type LayerStore interface { // applies its changes to a specified layer. ApplyDiff(to string, diff io.Reader) (int64, error) + // ApplyDiffWithDiffer applies the changes through the differ callback function. + // If to is the empty string, then a staging directory is created by the driver. + ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) + + // CleanupStagingDirectory cleanups the staging directory. It can be used to cleanup the staging directory on errors + CleanupStagingDirectory(stagingDirectory string) error + + // ApplyDiffFromStagingDirectory uses stagingDirectory to create the diff. + ApplyDiffFromStagingDirectory(id, stagingDirectory string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffOpts) error + + // DifferTarget gets the location where files are stored for the layer. + DifferTarget(id string) (string, error) + // LoadLocked wraps Load in a locked state. This means it loads the store // and cleans-up invalid layers if needed. LoadLocked() error @@ -1553,6 +1566,93 @@ func (r *layerStore) ApplyDiff(to string, diff io.Reader) (size int64, err error return size, err } +func (r *layerStore) DifferTarget(id string) (string, error) { + ddriver, ok := r.driver.(drivers.DriverWithDiffer) + if !ok { + return "", ErrNotSupported + } + layer, ok := r.lookup(id) + if !ok { + return "", ErrLayerUnknown + } + return ddriver.DifferTarget(layer.ID) +} + +func (r *layerStore) ApplyDiffFromStagingDirectory(id, stagingDirectory string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffOpts) error { + ddriver, ok := r.driver.(drivers.DriverWithDiffer) + if !ok { + return ErrNotSupported + } + layer, ok := r.lookup(id) + if !ok { + return ErrLayerUnknown + } + if options == nil { + options = &drivers.ApplyDiffOpts{ + Mappings: r.layerMappings(layer), + MountLabel: layer.MountLabel, + } + } + err := ddriver.ApplyDiffFromStagingDirectory(layer.ID, layer.Parent, stagingDirectory, diffOutput, options) + if err != nil { + return err + } + layer.UIDs = diffOutput.UIDs + layer.GIDs = diffOutput.GIDs + layer.UncompressedDigest = diffOutput.UncompressedDigest + layer.UncompressedSize = diffOutput.Size + layer.Metadata = diffOutput.Metadata + if err = r.Save(); err != nil { + return err + } + for k, v := range diffOutput.BigData { + if err := r.SetBigData(id, k, bytes.NewReader(v)); err != nil { + r.Delete(id) + return err + } + } + return err +} + +func (r *layerStore) ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) { + ddriver, ok := r.driver.(drivers.DriverWithDiffer) + if !ok { + return nil, ErrNotSupported + } + + if to == "" { + output, err := ddriver.ApplyDiffWithDiffer("", "", options, differ) + return &output, err + } + + layer, ok := r.lookup(to) + if !ok { + return nil, ErrLayerUnknown + } + if options == nil { + options = &drivers.ApplyDiffOpts{ + Mappings: r.layerMappings(layer), + MountLabel: layer.MountLabel, + } + } + output, err := ddriver.ApplyDiffWithDiffer(layer.ID, layer.Parent, options, differ) + if err != nil { + return nil, err + } + layer.UIDs = output.UIDs + layer.GIDs = output.GIDs + err = r.Save() + return &output, err +} + +func (r *layerStore) CleanupStagingDirectory(stagingDirectory string) error { + ddriver, ok := r.driver.(drivers.DriverWithDiffer) + if !ok { + return ErrNotSupported + } + return ddriver.CleanupStagingDirectory(stagingDirectory) +} + func (r *layerStore) layersByDigestMap(m map[digest.Digest][]string, d digest.Digest) ([]Layer, error) { var layers []Layer for _, layerID := range m[d] { diff --git a/pkg/chunked/compression.go b/pkg/chunked/compression.go new file mode 100644 index 0000000000..605be4b8f8 --- /dev/null +++ b/pkg/chunked/compression.go @@ -0,0 +1,513 @@ +package chunked + +import ( + "bytes" + "encoding/base64" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "time" + + "github.com/containers/storage/pkg/ioutils" + "github.com/klauspost/compress/zstd" + digest "github.com/opencontainers/go-digest" + "github.com/pkg/errors" + "github.com/vbatts/tar-split/archive/tar" +) + +type zstdTOC struct { + Version int `json:"version"` + Entries []zstdFileMetadata `json:"entries"` +} + +type zstdFileMetadata struct { + Type string `json:"type"` + Name string `json:"name"` + Linkname string `json:"linkName,omitempty"` + Mode int64 `json:"mode,omitempty"` + Size int64 `json:"size"` + UID int `json:"uid"` + GID int `json:"gid"` + ModTime time.Time `json:"modtime"` + AccessTime time.Time `json:"accesstime"` + ChangeTime time.Time `json:"changetime"` + Devmajor int64 `json:"devMajor"` + Devminor int64 `json:"devMinor"` + Xattrs map[string]string `json:"xattrs,omitempty"` + Digest string `json:"digest,omitempty"` + Offset int64 `json:"offset,omitempty"` + EndOffset int64 `json:"endOffset,omitempty"` + + // Currently chunking is not supported. + ChunkSize int64 `json:"chunkSize,omitempty"` + ChunkOffset int64 `json:"chunkOffset,omitempty"` + ChunkDigest string `json:"chunkDigest,omitempty"` +} + +const ( + TypeReg = "reg" + TypeChunk = "chunk" + TypeLink = "hardlink" + TypeChar = "char" + TypeBlock = "block" + TypeDir = "dir" + TypeFifo = "fifo" + TypeSymlink = "symlink" +) + +var tarTypes = map[byte]string{ + tar.TypeReg: TypeReg, + tar.TypeRegA: TypeReg, + tar.TypeLink: TypeLink, + tar.TypeChar: TypeChar, + tar.TypeBlock: TypeBlock, + tar.TypeDir: TypeDir, + tar.TypeFifo: TypeFifo, + tar.TypeSymlink: TypeSymlink, +} + +var typesToTar = map[string]byte{ + TypeReg: tar.TypeReg, + TypeLink: tar.TypeLink, + TypeChar: tar.TypeChar, + TypeBlock: tar.TypeBlock, + TypeDir: tar.TypeDir, + TypeFifo: tar.TypeFifo, + TypeSymlink: tar.TypeSymlink, +} + +func getType(t byte) (string, error) { + r, found := tarTypes[t] + if !found { + return "", fmt.Errorf("unknown tarball type: %v", t) + } + return r, nil +} + +func typeToTarType(t string) (byte, error) { + r, found := typesToTar[t] + if !found { + return 0, fmt.Errorf("unknown type: %v", t) + } + return r, nil +} + +const ( + manifestChecksumKey = "io.containers.zstd-chunked.manifest-checksum" + manifestInfoKey = "io.containers.zstd-chunked.manifest-position" + + // manifestTypeCRFS is a manifest file compatible with the CRFS TOC file. + manifestTypeCRFS = 1 + + // footerSizeSupported is the footer size supported by this implementation. + // Newer versions of the image format might increase this value, so reject + // any version that is not supported. + footerSizeSupported = 40 +) + +var ( + // when the zstd decoder encounters a skippable frame + 1 byte for the size, it + // will ignore it. + // https://tools.ietf.org/html/rfc8478#section-3.1.2 + skippableFrameMagic = []byte{0x50, 0x2a, 0x4d, 0x18} + + zstdChunkedFrameMagic = []byte{0x47, 0x6e, 0x55, 0x6c, 0x49, 0x6e, 0x55, 0x78} +) + +func isZstdChunkedFrameMagic(data []byte) bool { + if len(data) < 8 { + return false + } + return bytes.Equal(zstdChunkedFrameMagic, data[:8]) +} + +// readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. The blob total size must +// be specified. +// This function uses the io.containers.zstd-chunked. annotations when specified. +func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, error) { + footerSize := int64(footerSizeSupported) + if blobSize <= footerSize { + return nil, errors.New("blob too small") + } + + manifestChecksumAnnotation := annotations[manifestChecksumKey] + if manifestChecksumAnnotation == "" { + return nil, fmt.Errorf("manifest checksum annotation %q not found", manifestChecksumKey) + } + + var offset, length, lengthUncompressed, manifestType uint64 + + if offsetMetadata := annotations[manifestInfoKey]; offsetMetadata != "" { + if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d:%d", &offset, &length, &lengthUncompressed, &manifestType); err != nil { + return nil, err + } + } else { + chunk := ImageSourceChunk{ + Offset: uint64(blobSize - footerSize), + Length: uint64(footerSize), + } + parts, errs, err := blobStream.GetBlobAt([]ImageSourceChunk{chunk}) + if err != nil { + return nil, err + } + var reader io.ReadCloser + select { + case r := <-parts: + reader = r + case err := <-errs: + return nil, err + } + footer := make([]byte, footerSize) + if _, err := io.ReadFull(reader, footer); err != nil { + return nil, err + } + + offset = binary.LittleEndian.Uint64(footer[0:8]) + length = binary.LittleEndian.Uint64(footer[8:16]) + lengthUncompressed = binary.LittleEndian.Uint64(footer[16:24]) + manifestType = binary.LittleEndian.Uint64(footer[24:32]) + if !isZstdChunkedFrameMagic(footer[32:40]) { + return nil, errors.New("invalid magic number") + } + } + + if manifestType != manifestTypeCRFS { + return nil, errors.New("invalid manifest type") + } + + // set a reasonable limit + if length > (1<<20)*50 { + return nil, errors.New("manifest too big") + } + if lengthUncompressed > (1<<20)*50 { + return nil, errors.New("manifest too big") + } + + chunk := ImageSourceChunk{ + Offset: offset, + Length: length, + } + + parts, errs, err := blobStream.GetBlobAt([]ImageSourceChunk{chunk}) + if err != nil { + return nil, err + } + var reader io.ReadCloser + select { + case r := <-parts: + reader = r + case err := <-errs: + return nil, err + } + + manifest := make([]byte, length) + if _, err := io.ReadFull(reader, manifest); err != nil { + return nil, err + } + + manifestDigester := digest.Canonical.Digester() + manifestChecksum := manifestDigester.Hash() + if _, err := manifestChecksum.Write(manifest); err != nil { + return nil, err + } + + d, err := digest.Parse(manifestChecksumAnnotation) + if err != nil { + return nil, err + } + if manifestDigester.Digest() != d { + return nil, errors.New("invalid manifest checksum") + } + + decoder, err := zstd.NewReader(nil) + if err != nil { + return nil, err + } + defer decoder.Close() + + b := make([]byte, 0, lengthUncompressed) + if decoded, err := decoder.DecodeAll(manifest, b); err == nil { + return decoded, nil + } + + return manifest, nil +} + +func appendZstdSkippableFrame(dest io.Writer, data []byte) error { + if _, err := dest.Write(skippableFrameMagic); err != nil { + return err + } + + var size []byte = make([]byte, 4) + binary.LittleEndian.PutUint32(size, uint32(len(data))) + if _, err := dest.Write(size); err != nil { + return err + } + if _, err := dest.Write(data); err != nil { + return err + } + return nil +} + +func writeZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, metadata []zstdFileMetadata, level int) error { + // 8 is the size of the zstd skippable frame header + the frame size + manifestOffset := offset + 8 + + toc := zstdTOC{ + Version: 1, + Entries: metadata, + } + + // Generate the manifest + manifest, err := json.Marshal(toc) + if err != nil { + return err + } + + var compressedBuffer bytes.Buffer + zstdWriter, err := zstdWriterWithLevel(&compressedBuffer, level) + if err != nil { + return err + } + if _, err := zstdWriter.Write(manifest); err != nil { + zstdWriter.Close() + return err + } + if err := zstdWriter.Close(); err != nil { + return err + } + compressedManifest := compressedBuffer.Bytes() + + manifestDigester := digest.Canonical.Digester() + manifestChecksum := manifestDigester.Hash() + if _, err := manifestChecksum.Write(compressedManifest); err != nil { + return err + } + + outMetadata[manifestChecksumKey] = manifestDigester.Digest().String() + outMetadata[manifestInfoKey] = fmt.Sprintf("%d:%d:%d:%d", manifestOffset, len(compressedManifest), len(manifest), manifestTypeCRFS) + if err := appendZstdSkippableFrame(dest, compressedManifest); err != nil { + return err + } + + // Store the offset to the manifest and its size in LE order + var manifestDataLE []byte = make([]byte, footerSizeSupported) + binary.LittleEndian.PutUint64(manifestDataLE, manifestOffset) + binary.LittleEndian.PutUint64(manifestDataLE[8:], uint64(len(compressedManifest))) + binary.LittleEndian.PutUint64(manifestDataLE[16:], uint64(len(manifest))) + binary.LittleEndian.PutUint64(manifestDataLE[24:], uint64(manifestTypeCRFS)) + copy(manifestDataLE[32:], zstdChunkedFrameMagic) + + return appendZstdSkippableFrame(dest, manifestDataLE) +} + +func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error { + // total written so far. Used to retrieve partial offsets in the file + dest := ioutils.NewWriteCounter(destFile) + + tr := tar.NewReader(reader) + tr.RawAccounting = true + + buf := make([]byte, 4096) + + zstdWriter, err := zstdWriterWithLevel(dest, level) + if err != nil { + return err + } + defer func() { + if zstdWriter != nil { + zstdWriter.Close() + zstdWriter.Flush() + } + }() + + restartCompression := func() (int64, error) { + var offset int64 + if zstdWriter != nil { + if err := zstdWriter.Close(); err != nil { + return 0, err + } + if err := zstdWriter.Flush(); err != nil { + return 0, err + } + offset = dest.Count + zstdWriter.Reset(dest) + } + return offset, nil + } + + var metadata []zstdFileMetadata + for { + hdr, err := tr.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + + rawBytes := tr.RawBytes() + if _, err := zstdWriter.Write(rawBytes); err != nil { + return err + } + payloadDigester := digest.Canonical.Digester() + payloadChecksum := payloadDigester.Hash() + + payloadDest := io.MultiWriter(payloadChecksum, zstdWriter) + + // Now handle the payload, if any + var startOffset, endOffset int64 + checksum := "" + for { + read, errRead := tr.Read(buf) + if errRead != nil && errRead != io.EOF { + return err + } + + // restart the compression only if there is + // a payload. + if read > 0 { + if startOffset == 0 { + startOffset, err = restartCompression() + if err != nil { + return err + } + } + _, err := payloadDest.Write(buf[:read]) + if err != nil { + return err + } + } + if errRead == io.EOF { + if startOffset > 0 { + endOffset, err = restartCompression() + if err != nil { + return err + } + checksum = payloadDigester.Digest().String() + } + break + } + } + + typ, err := getType(hdr.Typeflag) + if err != nil { + return err + } + xattrs := make(map[string]string) + for k, v := range hdr.Xattrs { + xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v)) + } + m := zstdFileMetadata{ + Type: typ, + Name: hdr.Name, + Linkname: hdr.Linkname, + Mode: hdr.Mode, + Size: hdr.Size, + UID: hdr.Uid, + GID: hdr.Gid, + ModTime: hdr.ModTime, + AccessTime: hdr.AccessTime, + ChangeTime: hdr.ChangeTime, + Devmajor: hdr.Devmajor, + Devminor: hdr.Devminor, + Xattrs: xattrs, + Digest: checksum, + Offset: startOffset, + EndOffset: endOffset, + + // ChunkSize is 0 for the last chunk + ChunkSize: 0, + ChunkOffset: 0, + ChunkDigest: checksum, + } + metadata = append(metadata, m) + } + + rawBytes := tr.RawBytes() + if _, err := zstdWriter.Write(rawBytes); err != nil { + return err + } + if err := zstdWriter.Flush(); err != nil { + return err + } + if err := zstdWriter.Close(); err != nil { + return err + } + zstdWriter = nil + + return writeZstdChunkedManifest(dest, outMetadata, uint64(dest.Count), metadata, level) +} + +type zstdChunkedWriter struct { + tarSplitOut *io.PipeWriter + tarSplitErr chan error +} + +func (w zstdChunkedWriter) Close() error { + err := <-w.tarSplitErr + if err != nil { + w.tarSplitOut.Close() + return err + } + return w.tarSplitOut.Close() +} + +func (w zstdChunkedWriter) Write(p []byte) (int, error) { + select { + case err := <-w.tarSplitErr: + w.tarSplitOut.Close() + return 0, err + default: + return w.tarSplitOut.Write(p) + } +} + +// zstdChunkedWriterWithLevel writes a zstd compressed tarball where each file is +// compressed separately so it can be addressed separately. Idea based on CRFS: +// https://github.com/google/crfs +// The difference with CRFS is that the zstd compression is used instead of gzip. +// The reason for it is that zstd supports embedding metadata ignored by the decoder +// as part of the compressed stream. +// A manifest json file with all the metadata is appended at the end of the tarball +// stream, using zstd skippable frames. +// The final file will look like: +// [FILE_1][FILE_2]..[FILE_N][SKIPPABLE FRAME 1][SKIPPABLE FRAME 2] +// Where: +// [FILE_N]: [ZSTD HEADER][TAR HEADER][PAYLOAD FILE_N][ZSTD FOOTER] +// [SKIPPABLE FRAME 1]: [ZSTD SKIPPABLE FRAME, SIZE=MANIFEST LENGTH][MANIFEST] +// [SKIPPABLE FRAME 2]: [ZSTD SKIPPABLE FRAME, SIZE=16][MANIFEST_OFFSET][MANIFEST_LENGTH][MANIFEST_LENGTH_UNCOMPRESSED][MANIFEST_TYPE][CHUNKED_ZSTD_MAGIC_NUMBER] +// MANIFEST_OFFSET, MANIFEST_LENGTH, MANIFEST_LENGTH_UNCOMPRESSED and CHUNKED_ZSTD_MAGIC_NUMBER are 64 bits unsigned in little endian format. +func zstdChunkedWriterWithLevel(out io.Writer, metadata map[string]string, level int) (io.WriteCloser, error) { + ch := make(chan error, 1) + r, w := io.Pipe() + + go func() { + ch <- writeZstdChunkedStream(out, metadata, r, level) + io.Copy(ioutil.Discard, r) + r.Close() + close(ch) + }() + + return zstdChunkedWriter{ + tarSplitOut: w, + tarSplitErr: ch, + }, nil +} + +func zstdWriterWithLevel(dest io.Writer, level int) (*zstd.Encoder, error) { + el := zstd.EncoderLevelFromZstd(level) + return zstd.NewWriter(dest, zstd.WithEncoderLevel(el)) +} + +// ZstdCompressor is a CompressorFunc for the zstd compression algorithm. +func ZstdCompressor(r io.Writer, metadata map[string]string, level *int) (io.WriteCloser, error) { + if level == nil { + l := 3 + level = &l + } + + return zstdChunkedWriterWithLevel(r, metadata, *level) +} diff --git a/pkg/chunked/storage.go b/pkg/chunked/storage.go new file mode 100644 index 0000000000..67ed50b78e --- /dev/null +++ b/pkg/chunked/storage.go @@ -0,0 +1,894 @@ +package chunked + +import ( + archivetar "archive/tar" + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "sort" + "strings" + "syscall" + "time" + + storage "github.com/containers/storage" + graphdriver "github.com/containers/storage/drivers" + driversCopy "github.com/containers/storage/drivers/copy" + "github.com/containers/storage/pkg/archive" + "github.com/containers/storage/pkg/idtools" + "github.com/containers/storage/types" + "github.com/klauspost/compress/zstd" + digest "github.com/opencontainers/go-digest" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/vbatts/tar-split/archive/tar" + "golang.org/x/sys/unix" +) + +const ( + maxNumberMissingChunks = 1024 + newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_WRONLY | unix.O_EXCL) + containersOverrideXattr = "user.containers.override_stat" + bigDataKey = "zstd-chunked-manifest" +) + +// ImageSourceChunk is a portion of a blob. +type ImageSourceChunk struct { + Offset uint64 + Length uint64 +} + +// ImageSourceSeekable is an image source that permits to fetch chunks of the entire blob. +type ImageSourceSeekable interface { + // GetBlobAt returns a stream for the specified blob. + GetBlobAt([]ImageSourceChunk) (chan io.ReadCloser, chan error, error) +} + +type chunkedZstdDiffer struct { + stream ImageSourceSeekable + manifest []byte + layersMetadata map[string][]zstdFileMetadata + layersTarget map[string]string +} + +// ErrBadRequest is returned when the request is not valid +type ErrBadRequest struct { +} + +func (e ErrBadRequest) Error() string { + return fmt.Sprintf("http bad request") +} + +func timeToTimespec(time time.Time) (ts unix.Timespec) { + if time.IsZero() { + // Return UTIME_OMIT special value + ts.Sec = 0 + ts.Nsec = ((1 << 30) - 2) + return + } + return unix.NsecToTimespec(time.UnixNano()) +} + +func copyFileContent(src, destFile, root string, dirfd int, missingDirsMode, mode os.FileMode) (*os.File, int64, error) { + st, err := os.Stat(src) + if err != nil { + return nil, -1, err + } + + copyWithFileRange, copyWithFileClone := true, true + + // If the destination file already exists, we shouldn't blow it away + dstFile, err := openFileUnderRoot(destFile, root, dirfd, newFileFlags, mode) + if err != nil { + return nil, -1, err + } + + err = driversCopy.CopyRegularToFile(src, dstFile, st, ©WithFileRange, ©WithFileClone) + if err != nil { + dstFile.Close() + return nil, -1, err + } + return dstFile, st.Size(), err +} + +func prepareOtherLayersCache(layersMetadata map[string][]zstdFileMetadata) map[string]map[string]*zstdFileMetadata { + maps := make(map[string]map[string]*zstdFileMetadata) + + for layerID, v := range layersMetadata { + r := make(map[string]*zstdFileMetadata) + for i := range v { + r[v[i].Digest] = &v[i] + } + maps[layerID] = r + } + return maps +} + +func getLayersCache(store storage.Store) (map[string][]zstdFileMetadata, map[string]string, error) { + allLayers, err := store.Layers() + if err != nil { + return nil, nil, err + } + + layersMetadata := make(map[string][]zstdFileMetadata) + layersTarget := make(map[string]string) + for _, r := range allLayers { + manifestReader, err := store.LayerBigData(r.ID, bigDataKey) + if err != nil { + continue + } + defer manifestReader.Close() + manifest, err := ioutil.ReadAll(manifestReader) + if err != nil { + return nil, nil, err + } + var toc zstdTOC + if err := json.Unmarshal(manifest, &toc); err != nil { + continue + } + layersMetadata[r.ID] = toc.Entries + target, err := store.DifferTarget(r.ID) + if err != nil { + return nil, nil, err + } + layersTarget[r.ID] = target + } + + return layersMetadata, layersTarget, nil +} + +// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer. +func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) { + if _, ok := annotations[manifestChecksumKey]; ok { + return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss) + } + return nil, errors.New("blob type not supported for partial retrieval") +} + +func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (*chunkedZstdDiffer, error) { + manifest, err := readZstdChunkedManifest(iss, blobSize, annotations) + if err != nil { + return nil, err + } + layersMetadata, layersTarget, err := getLayersCache(store) + if err != nil { + return nil, err + } + + return &chunkedZstdDiffer{ + stream: iss, + manifest: manifest, + layersMetadata: layersMetadata, + layersTarget: layersTarget, + }, nil +} + +func findFileInOtherLayers(file zstdFileMetadata, root string, dirfd int, layersMetadata map[string]map[string]*zstdFileMetadata, layersTarget map[string]string, missingDirsMode os.FileMode) (*os.File, int64, error) { + // this is ugly, needs to be indexed + for layerID, checksums := range layersMetadata { + m, found := checksums[file.Digest] + if !found { + continue + } + + source, ok := layersTarget[layerID] + if !ok { + continue + } + + srcDirfd, err := unix.Open(root, unix.O_RDONLY, 0) + if err != nil { + continue + } + defer unix.Close(srcDirfd) + + srcFile, err := openFileUnderRoot(m.Name, source, srcDirfd, unix.O_RDONLY, 0) + if err != nil { + continue + } + defer srcFile.Close() + + srcPath := fmt.Sprintf("/proc/self/fd/%d", srcFile.Fd()) + + dstFile, written, err := copyFileContent(srcPath, file.Name, root, dirfd, missingDirsMode, 0) + if err != nil { + continue + } + return dstFile, written, nil + } + return nil, 0, nil +} + +func getFileDigest(f *os.File) (digest.Digest, error) { + digester := digest.Canonical.Digester() + if _, err := io.Copy(digester.Hash(), f); err != nil { + return "", err + } + return digester.Digest(), nil +} + +// findFileOnTheHost checks whether the requested file already exist on the host and copies the file content from there if possible. +// It is currently implemented to look only at the file with the same path. Ideally it can detect the same content also at different +// paths. +func findFileOnTheHost(file zstdFileMetadata, root string, dirfd int, missingDirsMode os.FileMode) (*os.File, int64, error) { + sourceFile := filepath.Clean(filepath.Join("/", file.Name)) + if !strings.HasPrefix(sourceFile, "/usr/") { + // limit host deduplication to files under /usr. + return nil, 0, nil + } + + st, err := os.Stat(sourceFile) + if err != nil || !st.Mode().IsRegular() { + return nil, 0, nil + } + + if st.Size() != file.Size { + return nil, 0, nil + } + + fd, err := unix.Open(sourceFile, unix.O_RDONLY|unix.O_NONBLOCK, 0) + if err != nil { + return nil, 0, nil + } + + f := os.NewFile(uintptr(fd), "fd") + defer f.Close() + + manifestChecksum, err := digest.Parse(file.Digest) + if err != nil { + return nil, 0, err + } + + checksum, err := getFileDigest(f) + if err != nil { + return nil, 0, err + } + + if checksum != manifestChecksum { + return nil, 0, nil + } + + dstFile, written, err := copyFileContent(fmt.Sprintf("/proc/self/fd/%d", fd), file.Name, root, dirfd, missingDirsMode, 0) + if err != nil { + return nil, 0, nil + } + + // calculate the checksum again to make sure the file wasn't modified while it was copied + if _, err := f.Seek(0, 0); err != nil { + return nil, 0, err + } + checksum, err = getFileDigest(f) + if err != nil { + return nil, 0, err + } + if checksum != manifestChecksum { + return nil, 0, nil + } + return dstFile, written, nil +} + +func maybeDoIDRemap(manifest []zstdFileMetadata, options *archive.TarOptions) error { + if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 { + return nil + } + + idMappings := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps) + + for i := range manifest { + if options.ChownOpts != nil { + manifest[i].UID = options.ChownOpts.UID + manifest[i].GID = options.ChownOpts.GID + } else { + pair := idtools.IDPair{ + UID: manifest[i].UID, + GID: manifest[i].GID, + } + var err error + manifest[i].UID, manifest[i].GID, err = idMappings.ToContainer(pair) + if err != nil { + return err + } + } + } + return nil +} + +type missingFile struct { + File *zstdFileMetadata + Gap int64 +} + +func (m missingFile) Length() int64 { + return m.File.EndOffset - m.File.Offset +} + +type missingChunk struct { + RawChunk ImageSourceChunk + Files []missingFile +} + +func setFileAttrs(file *os.File, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { + if file == nil || file.Fd() < 0 { + return errors.Errorf("invalid file") + } + fd := int(file.Fd()) + + t, err := typeToTarType(metadata.Type) + if err != nil { + return err + } + if t == tar.TypeSymlink { + return nil + } + + if err := unix.Fchown(fd, metadata.UID, metadata.GID); err != nil { + if !options.IgnoreChownErrors { + return err + } + } + + for k, v := range metadata.Xattrs { + data, err := base64.StdEncoding.DecodeString(v) + if err != nil { + return err + } + if err := unix.Fsetxattr(fd, k, data, 0); err != nil { + return err + } + } + + ts := []unix.Timespec{timeToTimespec(metadata.AccessTime), timeToTimespec(metadata.ModTime)} + if err := unix.UtimesNanoAt(fd, "", ts, 0); err != nil && errors.Is(err, unix.ENOSYS) { + return err + } + + if err := unix.Fchmod(fd, uint32(mode)); err != nil { + return err + } + return nil +} + +func openFileUnderRoot(name, root string, dirfd int, flags uint64, mode os.FileMode) (*os.File, error) { + how := unix.OpenHow{ + Flags: flags, + Mode: uint64(mode & 07777), + Resolve: unix.RESOLVE_IN_ROOT, + } + + fd, err := unix.Openat2(dirfd, name, &how) + if err != nil { + return nil, err + } + return os.NewFile(uintptr(fd), name), nil +} + +func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, missingDirsMode, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) (err error) { + file, err := openFileUnderRoot(metadata.Name, dest, dirfd, newFileFlags, 0) + if err != nil { + return err + } + defer func() { + err2 := file.Close() + if err == nil { + err = err2 + } + }() + + z, err := zstd.NewReader(reader) + if err != nil { + return err + } + defer z.Close() + + digester := digest.Canonical.Digester() + checksum := digester.Hash() + _, err = z.WriteTo(io.MultiWriter(file, checksum)) + if err != nil { + return err + } + manifestChecksum, err := digest.Parse(metadata.Digest) + if err != nil { + return err + } + if digester.Digest() != manifestChecksum { + return fmt.Errorf("checksum mismatch for %q", dest) + } + return setFileAttrs(file, mode, metadata, options) +} + +func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingChunks []missingChunk, missingDirsMode os.FileMode, options *archive.TarOptions) error { + for mc := 0; ; mc++ { + var part io.ReadCloser + select { + case p := <-streams: + part = p + case err := <-errs: + return err + } + if part == nil { + if mc == len(missingChunks) { + break + } + return errors.Errorf("invalid stream returned %d %d", mc, len(missingChunks)) + } + if mc == len(missingChunks) { + return errors.Errorf("too many chunks returned") + } + + for _, mf := range missingChunks[mc].Files { + if mf.Gap > 0 { + limitReader := io.LimitReader(part, mf.Gap) + _, err := io.Copy(ioutil.Discard, limitReader) + if err != nil { + return err + } + continue + } + + limitReader := io.LimitReader(part, mf.Length()) + + if err := createFileFromZstdStream(dest, dirfd, limitReader, missingDirsMode, os.FileMode(mf.File.Mode), mf.File, options); err != nil { + part.Close() + return err + } + } + part.Close() + } + return nil +} + +func mergeMissingChunks(missingChunks []missingChunk, target int) []missingChunk { + if len(missingChunks) <= target { + return missingChunks + } + + getGap := func(missingChunks []missingChunk, i int) int { + prev := missingChunks[i-1].RawChunk.Offset + missingChunks[i-1].RawChunk.Length + return int(missingChunks[i].RawChunk.Offset - prev) + } + + // this implementation doesn't account for duplicates, so it could merge + // more than necessary to reach the specified target. Since target itself + // is a heuristic value, it doesn't matter. + var gaps []int + for i := 1; i < len(missingChunks); i++ { + gaps = append(gaps, getGap(missingChunks, i)) + } + sort.Ints(gaps) + + toShrink := len(missingChunks) - target + targetValue := gaps[toShrink-1] + + newMissingChunks := missingChunks[0:1] + for i := 1; i < len(missingChunks); i++ { + gap := getGap(missingChunks, i) + if gap > targetValue { + newMissingChunks = append(newMissingChunks, missingChunks[i]) + } else { + prev := &newMissingChunks[len(newMissingChunks)-1] + gapFile := missingFile{ + Gap: int64(gap), + } + prev.RawChunk.Length += uint64(gap) + missingChunks[i].RawChunk.Length + prev.Files = append(append(prev.Files, gapFile), missingChunks[i].Files...) + } + } + + return newMissingChunks +} + +func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, missingChunks []missingChunk, missingDirsMode os.FileMode, options *archive.TarOptions) error { + var chunksToRequest []ImageSourceChunk + for _, c := range missingChunks { + chunksToRequest = append(chunksToRequest, c.RawChunk) + } + + // There are some missing files. Prepare a multirange request for the missing chunks. + var streams chan io.ReadCloser + var err error + var errs chan error + for { + streams, errs, err = input.stream.GetBlobAt(chunksToRequest) + if err == nil { + break + } + + if _, ok := err.(ErrBadRequest); ok { + requested := len(missingChunks) + // If the server cannot handle at least 64 chunks in a single request, just give up. + if requested < 64 { + return err + } + + // Merge more chunks to request + missingChunks = mergeMissingChunks(missingChunks, requested/2) + continue + } + return err + } + + if err := storeMissingFiles(streams, errs, dest, dirfd, missingChunks, missingDirsMode, options); err != nil { + return err + } + return nil +} + +func safeMkdir(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { + parent := filepath.Dir(metadata.Name) + base := filepath.Base(metadata.Name) + + parentFd := dirfd + if parent != "." { + parentFile, err := openFileUnderRoot(parent, target, dirfd, unix.O_DIRECTORY|unix.O_PATH|unix.O_RDONLY, 0) + if err != nil { + return err + } + defer parentFile.Close() + parentFd = int(parentFile.Fd()) + } + + if err := unix.Mkdirat(parentFd, base, uint32(mode)); err != nil { + if !os.IsExist(err) { + return err + } + } + + file, err := openFileUnderRoot(metadata.Name, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer file.Close() + + return setFileAttrs(file, mode, metadata, options) +} + +func safeLink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { + sourceFile, err := openFileUnderRoot(metadata.Linkname, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer sourceFile.Close() + + destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) + destDirFd := dirfd + if destDir != "." { + f, err := openFileUnderRoot(destDir, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer f.Close() + destDirFd = int(f.Fd()) + } + + err = unix.Linkat(int(sourceFile.Fd()), "", destDirFd, destBase, unix.AT_EMPTY_PATH) + if err != nil { + return err + } + + newFile, err := openFileUnderRoot(metadata.Name, target, dirfd, unix.O_WRONLY, 0) + if err != nil { + return err + } + defer newFile.Close() + + return setFileAttrs(newFile, mode, metadata, options) +} + +func safeSymlink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { + destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) + destDirFd := dirfd + if destDir != "." { + f, err := openFileUnderRoot(destDir, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer f.Close() + destDirFd = int(f.Fd()) + } + + return unix.Symlinkat(metadata.Linkname, destDirFd, destBase) +} + +type whiteoutHandler struct { + Dirfd int + Root string +} + +func (d whiteoutHandler) Setxattr(path, name string, value []byte) error { + file, err := openFileUnderRoot(path, d.Root, d.Dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer file.Close() + + return unix.Fsetxattr(int(file.Fd()), name, value, 0) +} + +func (d whiteoutHandler) Mknod(path string, mode uint32, dev int) error { + dir := filepath.Dir(path) + base := filepath.Base(path) + + dirfd := d.Dirfd + if dir != "" { + dir, err := openFileUnderRoot(dir, d.Root, d.Dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer dir.Close() + + dirfd = int(dir.Fd()) + } + + return unix.Mknodat(dirfd, base, mode, dev) +} + +func checkChownErr(err error, name string, uid, gid int) error { + if errors.Is(err, syscall.EINVAL) { + return errors.Wrapf(err, "potentially insufficient UIDs or GIDs available in user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid", uid, gid, name) + } + return err +} + +func (d whiteoutHandler) Chown(path string, uid, gid int) error { + file, err := openFileUnderRoot(path, d.Root, d.Dirfd, unix.O_PATH, 0) + if err != nil { + return err + } + defer file.Close() + + if err := unix.Fchownat(int(file.Fd()), "", uid, gid, unix.AT_EMPTY_PATH); err != nil { + var stat unix.Stat_t + if unix.Fstat(int(file.Fd()), &stat) == nil { + if stat.Uid == uint32(uid) && stat.Gid == uint32(gid) { + return nil + } + } + return checkChownErr(err, path, uid, gid) + } + return nil +} + +type hardLinkToCreate struct { + dest string + dirfd int + mode os.FileMode + metadata *zstdFileMetadata +} + +func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) { + bigData := map[string][]byte{ + bigDataKey: d.manifest, + } + output := graphdriver.DriverWithDifferOutput{ + Differ: d, + BigData: bigData, + } + + storeOpts, err := types.DefaultStoreOptionsAutoDetectUID() + if err != nil { + return output, err + } + + enableHostDedup := false + if value := storeOpts.PullOptions["enable_host_deduplication"]; strings.ToLower(value) == "true" { + enableHostDedup = true + } + + // Generate the manifest + var toc zstdTOC + if err := json.Unmarshal(d.manifest, &toc); err != nil { + return output, err + } + + whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData) + + var missingChunks []missingChunk + var mergedEntries []zstdFileMetadata + + if err := maybeDoIDRemap(toc.Entries, options); err != nil { + return output, err + } + + for _, e := range toc.Entries { + if e.Type == TypeChunk { + l := len(mergedEntries) + if l == 0 || mergedEntries[l-1].Type != TypeReg { + return output, errors.New("chunk type without a regular file") + } + mergedEntries[l-1].EndOffset = e.EndOffset + continue + } + mergedEntries = append(mergedEntries, e) + } + + if options.ForceMask != nil { + uid, gid, mode, err := archive.GetFileOwner(dest) + if err == nil { + value := fmt.Sprintf("%d:%d:0%o", uid, gid, mode) + if err := unix.Setxattr(dest, containersOverrideXattr, []byte(value), 0); err != nil { + return output, err + } + } + } + + dirfd, err := unix.Open(dest, unix.O_RDONLY|unix.O_PATH, 0) + if err != nil { + return output, err + } + defer unix.Close(dirfd) + + otherLayersCache := prepareOtherLayersCache(d.layersMetadata) + + missingDirsMode := os.FileMode(0700) + if options.ForceMask != nil { + missingDirsMode = *options.ForceMask + } + + // hardlinks can point to missing files. So create them after all files + // are retrieved + var hardLinks []hardLinkToCreate + + missingChunksSize, totalChunksSize := int64(0), int64(0) + for i, r := range mergedEntries { + if options.ForceMask != nil { + value := fmt.Sprintf("%d:%d:0%o", r.UID, r.GID, r.Mode&07777) + r.Xattrs[containersOverrideXattr] = base64.StdEncoding.EncodeToString([]byte(value)) + r.Mode = int64(*options.ForceMask) + } + + mode := os.FileMode(r.Mode) + + r.Name = filepath.Clean(r.Name) + r.Linkname = filepath.Clean(r.Linkname) + + t, err := typeToTarType(r.Type) + if err != nil { + return output, err + } + if whiteoutConverter != nil { + hdr := archivetar.Header{ + Typeflag: t, + Name: r.Name, + Linkname: r.Linkname, + Size: r.Size, + Mode: r.Mode, + Uid: r.UID, + Gid: r.GID, + } + handler := whiteoutHandler{ + Dirfd: dirfd, + Root: dest, + } + writeFile, err := whiteoutConverter.ConvertReadWithHandler(&hdr, r.Name, &handler) + if err != nil { + return output, err + } + if !writeFile { + continue + } + } + switch t { + case tar.TypeReg: + // Create directly empty files. + if r.Size == 0 { + // Used to have a scope for cleanup. + createEmptyFile := func() error { + file, err := openFileUnderRoot(r.Name, dest, dirfd, newFileFlags, 0) + if err != nil { + return err + } + defer file.Close() + if err := setFileAttrs(file, mode, &r, options); err != nil { + return err + } + return nil + } + if err := createEmptyFile(); err != nil { + return output, err + } + continue + } + + case tar.TypeDir: + if err := safeMkdir(dest, dirfd, mode, &r, options); err != nil { + return output, err + } + continue + + case tar.TypeLink: + dest := dest + dirfd := dirfd + mode := mode + r := r + hardLinks = append(hardLinks, hardLinkToCreate{ + dest: dest, + dirfd: dirfd, + mode: mode, + metadata: &r, + }) + continue + + case tar.TypeSymlink: + if err := safeSymlink(dest, dirfd, mode, &r, options); err != nil { + return output, err + } + continue + + case tar.TypeChar: + case tar.TypeBlock: + case tar.TypeFifo: + /* Ignore. */ + default: + return output, fmt.Errorf("invalid type %q", t) + } + + totalChunksSize += r.Size + + dstFile, _, err := findFileInOtherLayers(r, dest, dirfd, otherLayersCache, d.layersTarget, missingDirsMode) + if err != nil { + return output, err + } + if dstFile != nil { + if err := setFileAttrs(dstFile, mode, &r, options); err != nil { + dstFile.Close() + return output, err + } + dstFile.Close() + continue + } + + if enableHostDedup { + dstFile, _, err = findFileOnTheHost(r, dest, dirfd, missingDirsMode) + if err != nil { + return output, err + } + if dstFile != nil { + if err := setFileAttrs(dstFile, mode, &r, options); err != nil { + dstFile.Close() + return output, err + } + dstFile.Close() + continue + } + } + + missingChunksSize += r.Size + if t == tar.TypeReg { + rawChunk := ImageSourceChunk{ + Offset: uint64(r.Offset), + Length: uint64(r.EndOffset - r.Offset), + } + file := missingFile{ + File: &toc.Entries[i], + } + missingChunks = append(missingChunks, missingChunk{ + RawChunk: rawChunk, + Files: []missingFile{ + file, + }, + }) + } + } + // There are some missing files. Prepare a multirange request for the missing chunks. + if len(missingChunks) > 0 { + missingChunks = mergeMissingChunks(missingChunks, maxNumberMissingChunks) + if err := retrieveMissingFiles(d, dest, dirfd, missingChunks, missingDirsMode, options); err != nil { + return output, err + } + } + + for _, m := range hardLinks { + if err := safeLink(m.dest, m.dirfd, m.mode, m.metadata, options); err != nil { + return output, err + } + } + + if totalChunksSize > 0 { + logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingChunksSize, totalChunksSize, float32(missingChunksSize*100.0)/float32(totalChunksSize)) + } + return output, nil +} diff --git a/pkg/chunked/zstdchunked_test.go b/pkg/chunked/zstdchunked_test.go new file mode 100644 index 0000000000..59892a3f4f --- /dev/null +++ b/pkg/chunked/zstdchunked_test.go @@ -0,0 +1,172 @@ +package chunked + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "testing" +) + +func TestIsZstdChunkedFrameMagic(t *testing.T) { + b := append(zstdChunkedFrameMagic[:], make([]byte, 200)...) + if !isZstdChunkedFrameMagic(b) { + t.Fatal("Chunked frame magic not found") + } + // change a byte + b[0] = -b[0] + if isZstdChunkedFrameMagic(b) { + t.Fatal("Invalid chunked frame magic found") + } +} + +type seekable struct { + data []byte + offset uint64 + length uint64 + t *testing.T +} + +func (s seekable) GetBlobAt(req []ImageSourceChunk) (chan io.ReadCloser, chan error, error) { + if len(req) != 1 { + s.t.Fatal("Requested more than one chunk") + } + if req[0].Offset != s.offset { + s.t.Fatal("Invalid offset requested") + } + if req[0].Length != s.length { + s.t.Fatal("Invalid length requested") + } + + m := make(chan io.ReadCloser) + e := make(chan error) + + go func() { + m <- ioutil.NopCloser(bytes.NewReader(s.data)) + close(m) + close(e) + }() + + return m, e, nil +} + +var someFiles = []zstdFileMetadata{ + { + Type: "dir", + Name: "/foo", + Mode: 0755, + Size: 0, + }, + { + Type: "reg", + Name: "/foo/bar", + Mode: 0755, + Size: 10, + Digest: "sha256:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03", + Offset: 100, + EndOffset: 110, + ChunkSize: 10, + ChunkDigest: "sha256:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03", + ChunkOffset: 0, + }, + { + Type: "reg", + Name: "/foo/baz", + Mode: 0755, + Size: 12, + Digest: "sha256:6f0378f21a495f5c13247317d158e9d51da45a5bf68fc2f366e450deafdc8302", + Offset: 200, + EndOffset: 212, + ChunkSize: 12, + ChunkDigest: "sha256:6f0378f21a495f5c13247317d158e9d51da45a5bf68fc2f366e450deafdc8302", + ChunkOffset: 0, + }, +} + +func TestGenerateAndParseManifest(t *testing.T) { + annotations := make(map[string]string) + offsetManifest := uint64(100000) + + var b bytes.Buffer + writer := bufio.NewWriter(&b) + if err := writeZstdChunkedManifest(writer, annotations, offsetManifest, someFiles[:], 9); err != nil { + t.Error(err) + } + if err := writer.Flush(); err != nil { + t.Error(err) + } + + offsetMetadata := annotations[manifestInfoKey] + if offsetMetadata == "" { + t.Fatal("Annotation not found") + } + + var offset, length, lengthUncompressed, manifestType uint64 + if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d:%d", &offset, &length, &lengthUncompressed, &manifestType); err != nil { + t.Error(err) + } + + if offset != offsetManifest+8 { + t.Fatalf("Invalid offset %d", offset) + } + if manifestType != manifestTypeCRFS { + t.Fatalf("Invalid manifest type %d", manifestType) + } + if b.Len() == 0 { + t.Fatal("no manifest written") + } + + data := b.Bytes()[offset-offsetManifest:] + s := seekable{ + data: data, + offset: offset, + length: length, + t: t, + } + + manifest, err := readZstdChunkedManifest(s, 8192, annotations) + if err != nil { + t.Error(err) + } + + var toc zstdTOC + if err := json.Unmarshal(manifest, &toc); err != nil { + t.Error(err) + } + + if toc.Version != 1 { + t.Fatal("Invalid manifest version generated") + } + if len(toc.Entries) != len(someFiles) { + t.Fatal("Manifest mismatch") + } +} + +func TestGetTarType(t *testing.T) { + for k, v := range typesToTar { + r, err := typeToTarType(k) + if err != nil { + t.Error(err) + } + if r != v { + t.Fatal("Invalid typeToTarType conversion") + } + } + if _, err := typeToTarType("FOO"); err == nil { + t.Fatal("Invalid typeToTarType conversion") + } + for k, v := range tarTypes { + r, err := getType(k) + if err != nil { + t.Error(err) + } + if r != v { + t.Fatal("Invalid getType conversion") + } + } + if _, err := getType(byte('Z')); err == nil { + t.Fatal("Invalid getType conversion") + } +} diff --git a/pkg/config/config.go b/pkg/config/config.go index 2d24707226..b92af218a4 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -189,6 +189,10 @@ type OptionsConfig struct { // MountOpt specifies extra mount options used when mounting MountOpt string `toml:"mountopt"` + + // PullOptions specifies options to be handed to pull managers + // This API is experimental and can be changed without bumping the major version number. + PullOptions map[string]string `toml:"pull_options"` } // GetGraphDriverOptions returns the driver specific options diff --git a/store.go b/store.go index 25767b7a2f..b67776e61c 100644 --- a/store.go +++ b/store.go @@ -317,6 +317,20 @@ type Store interface { // } ApplyDiff(to string, diff io.Reader) (int64, error) + // ApplyDiffer applies a diff to a layer. + // It is the caller responsibility to clean the staging directory if it is not + // successfully applied with ApplyDiffFromStagingDirectory. + ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) + + // ApplyDiffFromStagingDirectory uses stagingDirectory to create the diff. + ApplyDiffFromStagingDirectory(to, stagingDirectory string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffOpts) error + + // CleanupStagingDirectory cleanups the staging directory. It can be used to cleanup the staging directory on errors + CleanupStagingDirectory(stagingDirectory string) error + + // DifferTarget gets the path to the differ target. + DifferTarget(id string) (string, error) + // LayersByCompressedDigest returns a slice of the layers with the // specified compressed digest value recorded for them. LayersByCompressedDigest(d digest.Digest) ([]Layer, error) @@ -2939,6 +2953,75 @@ func (s *store) Diff(from, to string, options *DiffOptions) (io.ReadCloser, erro return nil, ErrLayerUnknown } +func (s *store) ApplyDiffFromStagingDirectory(to, stagingDirectory string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffOpts) error { + rlstore, err := s.LayerStore() + if err != nil { + return err + } + rlstore.Lock() + defer rlstore.Unlock() + if modified, err := rlstore.Modified(); modified || err != nil { + if err = rlstore.Load(); err != nil { + return err + } + } + if !rlstore.Exists(to) { + return ErrLayerUnknown + } + return rlstore.ApplyDiffFromStagingDirectory(to, stagingDirectory, diffOutput, options) +} + +func (s *store) CleanupStagingDirectory(stagingDirectory string) error { + rlstore, err := s.LayerStore() + if err != nil { + return err + } + rlstore.Lock() + defer rlstore.Unlock() + if modified, err := rlstore.Modified(); modified || err != nil { + if err = rlstore.Load(); err != nil { + return err + } + } + return rlstore.CleanupStagingDirectory(stagingDirectory) +} + +func (s *store) ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) { + rlstore, err := s.LayerStore() + if err != nil { + return nil, err + } + rlstore.Lock() + defer rlstore.Unlock() + if modified, err := rlstore.Modified(); modified || err != nil { + if err = rlstore.Load(); err != nil { + return nil, err + } + } + if to != "" && !rlstore.Exists(to) { + return nil, ErrLayerUnknown + } + return rlstore.ApplyDiffWithDiffer(to, options, differ) +} + +func (s *store) DifferTarget(id string) (string, error) { + rlstore, err := s.LayerStore() + if err != nil { + return "", err + } + rlstore.Lock() + defer rlstore.Unlock() + if modified, err := rlstore.Modified(); modified || err != nil { + if err = rlstore.Load(); err != nil { + return "", err + } + } + if rlstore.Exists(id) { + return rlstore.DifferTarget(id) + } + return "", ErrLayerUnknown +} + func (s *store) ApplyDiff(to string, diff io.Reader) (int64, error) { rlstore, err := s.LayerStore() if err != nil { diff --git a/types/errors.go b/types/errors.go index 4b923dcf69..d920d12eb5 100644 --- a/types/errors.go +++ b/types/errors.go @@ -53,4 +53,6 @@ var ( ErrSizeUnknown = errors.New("size is not known") // ErrStoreIsReadOnly is returned when the caller makes a call to a read-only store that would require modifying its contents. ErrStoreIsReadOnly = errors.New("called a write method on a read-only store") + // ErrNotSupported is returned when the requested functionality is not supported. + ErrNotSupported = errors.New("not supported") ) diff --git a/types/options.go b/types/options.go index 223db8f00f..987ea5f26d 100644 --- a/types/options.go +++ b/types/options.go @@ -148,6 +148,9 @@ type StoreOptions struct { AutoNsMinSize uint32 `json:"auto_userns_min_size,omitempty"` // AutoNsMaxSize is the maximum size for an automatic user namespace. AutoNsMaxSize uint32 `json:"auto_userns_max_size,omitempty"` + // PullOptions specifies options to be handed to pull managers + // This API is experimental and can be changed without bumping the major version number. + PullOptions map[string]string `toml:"pull_options"` } // isRootlessDriver returns true if the given storage driver is valid for containers running as non root @@ -362,6 +365,9 @@ func ReloadConfigurationFile(configFile string, storeOptions *StoreOptions) { if config.Storage.Options.AutoUsernsMaxSize > 0 { storeOptions.AutoNsMaxSize = config.Storage.Options.AutoUsernsMaxSize } + if config.Storage.Options.PullOptions != nil { + storeOptions.PullOptions = config.Storage.Options.PullOptions + } storeOptions.GraphDriverOptions = append(storeOptions.GraphDriverOptions, cfg.GetGraphDriverOptions(storeOptions.GraphDriverName, config.Storage.Options)...)