Add unarchiver

jfrog · Apr 9, 2023 · 5c4675d · 5c4675d
1 parent 6b9e00f
commit 5c4675d
Show file tree

Hide file tree

Showing 39 changed files with 408 additions and 3 deletions.
diff --git a/datastructures/set.go b/datastructures/set.go
@@ -6,7 +6,7 @@ type Set[T comparable] struct {
 	container map[T]struct{}
 }
 
-//MakeSet initialize the set
+// MakeSet initialize the set
 func MakeSet[T comparable]() *Set[T] {
 	return &Set[T]{
 		container: make(map[T]struct{}),

diff --git a/fanout/readall_reader.go b/fanout/readall_reader.go
@@ -7,7 +7,7 @@ import (
 	"sync"
 )
 
-//A reader that emits its read to multiple consumers using a ReadAll(p []byte) ([]interface{}, error) func
+// A reader that emits its read to multiple consumers using a ReadAll(p []byte) ([]interface{}, error) func
 type ReadAllReader struct {
 	reader      io.Reader
 	consumers   []ReadAllConsumer

diff --git a/fanout/reader.go b/fanout/reader.go
@@ -5,7 +5,7 @@ import (
 	"sync"
 )
 
-//A reader that emits its read to multiple consumers using an io.Reader Read(p []byte) (int, error) func
+// A reader that emits its read to multiple consumers using an io.Reader Read(p []byte) (int, error) func
 type Reader struct {
 	reader      io.Reader
 	consumers   []Consumer

diff --git a/go.mod b/go.mod
@@ -3,12 +3,22 @@ module github.com/jfrog/gofrog
 go 1.19
 
 require (
+	github.com/mholt/archiver/v3 v3.5.1
 	github.com/pkg/errors v0.9.1
 	github.com/stretchr/testify v1.8.0
 )
 
 require (
+	github.com/andybalholm/brotli v1.0.1 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
+	github.com/golang/snappy v0.0.2 // indirect
+	github.com/klauspost/compress v1.11.4 // indirect
+	github.com/klauspost/pgzip v1.2.5 // indirect
+	github.com/nwaples/rardecode v1.1.0 // indirect
+	github.com/pierrec/lz4/v4 v4.1.2 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
+	github.com/ulikunitz/xz v0.5.9 // indirect
+	github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/go.sum b/go.sum
@@ -1,6 +1,26 @@
+github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc=
+github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
+github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
+github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
+github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw=
+github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/compress v1.11.4 h1:kz40R/YWls3iqT9zX9AHN3WoVsrAWVyui5sxuLqiXqU=
+github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
+github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
+github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
+github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo=
+github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4=
+github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
+github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
+github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
+github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -10,6 +30,12 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
+github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
+github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
+github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
+github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

diff --git a/unarchive/archive.go b/unarchive/archive.go
@@ -0,0 +1,261 @@
+package unarchive
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/jfrog/gofrog/datastructures"
+	"github.com/mholt/archiver/v3"
+)
+
+type Unarchiver struct {
+	BypassArchiveInspection bool
+}
+
+var supportedArchives = []archiver.ExtensionChecker{
+	&archiver.TarBrotli{}, &archiver.TarBz2{}, &archiver.TarGz{}, &archiver.TarLz4{}, &archiver.TarSz{}, &archiver.TarXz{}, &archiver.TarZstd{},
+	&archiver.Rar{}, &archiver.Tar{}, &archiver.Zip{}, &archiver.Brotli{}, &archiver.Gz{}, &archiver.Bz2{}, &archiver.Lz4{}, &archiver.Snappy{},
+	&archiver.Xz{}, &archiver.Zstd{},
+}
+
+func (u *Unarchiver) IsSupportedArchive(filePath string) bool {
+	archive, err := archiver.ByExtension(filePath)
+	if err != nil {
+		return false
+	}
+	_, ok := archive.(archiver.Unarchiver)
+	return ok
+}
+
+// The 'archiver' dependency includes an API called 'Unarchive' to extract archive files. This API uses the archive file
+// extension to determine the archive type.
+// We therefore need to use the file name as it was in Artifactory, and not the file name which was downloaded. To achieve this,
+// we added a new implementation of the 'Unarchive' func and use it instead of the default one.
+// archivePath - Absolute or relative path to the archive, without the file name
+// archiveName - The archive file name
+// destinationPath - The extraction destination directory
+func (u *Unarchiver) Unarchive(archivePath, archiveName, destinationPath string) error {
+	archive, err := byExtension(archiveName)
+	if err != nil {
+		return err
+	}
+	unarchiver, ok := archive.(archiver.Unarchiver)
+	if !ok {
+		return fmt.Errorf("format specified by source filename is not an archive format: " + archiveName)
+	}
+	if !u.BypassArchiveInspection {
+		if err = inspectArchive(archive, archivePath, destinationPath); err != nil {
+			return err
+		}
+	}
+	return unarchiver.Unarchive(archivePath, destinationPath)
+}
+
+// Instead of using 'archiver.byExtension' that by default sets OverwriteExisting to false, we implement our own.
+func byExtension(filename string) (interface{}, error) {
+	var ec interface{}
+	for _, c := range supportedArchives {
+		if err := c.CheckExt(filename); err == nil {
+			ec = c
+			break
+		}
+	}
+	switch ec.(type) {
+	case *archiver.Rar:
+		archiveInstance := archiver.NewRar()
+		archiveInstance.OverwriteExisting = true
+		return archiveInstance, nil
+	case *archiver.Tar:
+		archiveInstance := archiver.NewTar()
+		archiveInstance.OverwriteExisting = true
+		return archiveInstance, nil
+	case *archiver.TarBrotli:
+		archiveInstance := archiver.NewTarBrotli()
+		archiveInstance.OverwriteExisting = true
+		return archiveInstance, nil
+	case *archiver.TarBz2:
+		archiveInstance := archiver.NewTarBz2()
+		archiveInstance.OverwriteExisting = true
+		return archiveInstance, nil
+	case *archiver.TarGz:
+		archiveInstance := archiver.NewTarGz()
+		archiveInstance.OverwriteExisting = true
+		return archiveInstance, nil
+	case *archiver.TarLz4:
+		archiveInstance := archiver.NewTarLz4()
+		archiveInstance.OverwriteExisting = true
+		return archiveInstance, nil
+	case *archiver.TarSz:
+		archiveInstance := archiver.NewTarSz()
+		archiveInstance.OverwriteExisting = true
+		return archiveInstance, nil
+	case *archiver.TarXz:
+		archiveInstance := archiver.NewTarXz()
+		archiveInstance.OverwriteExisting = true
+		return archiveInstance, nil
+	case *archiver.TarZstd:
+		archiveInstance := archiver.NewTarZstd()
+		archiveInstance.OverwriteExisting = true
+		return archiveInstance, nil
+	case *archiver.Zip:
+		archiveInstance := archiver.NewZip()
+		archiveInstance.OverwriteExisting = true
+		return archiveInstance, nil
+	case *archiver.Gz:
+		archiver.NewGz()
+		return archiver.NewGz(), nil
+	case *archiver.Bz2:
+		return archiver.NewBz2(), nil
+	case *archiver.Lz4:
+		return archiver.NewLz4(), nil
+	case *archiver.Snappy:
+		return archiver.NewSnappy(), nil
+	case *archiver.Xz:
+		return archiver.NewXz(), nil
+	case *archiver.Zstd:
+		return archiver.NewZstd(), nil
+	}
+	return nil, fmt.Errorf("format unrecognized by filename: %s", filename)
+}
+
+// Make sure the archive is free from Zip Slip and Zip symlinks attacks
+func inspectArchive(archive interface{}, localArchivePath, destinationDir string) error {
+	walker, ok := archive.(archiver.Walker)
+	if !ok {
+		return fmt.Errorf("couldn't inspect archive: " + localArchivePath)
+	}
+
+	uplinksValidator := newUplinksValidator()
+	err := walker.Walk(localArchivePath, func(archiveEntry archiver.File) error {
+		header, err := extractArchiveEntryHeader(archiveEntry)
+		if err != nil {
+			return err
+		}
+		pathInArchive := getPathInArchive(destinationDir, "", header.EntryPath)
+		if !strings.HasPrefix(pathInArchive, destinationDir) {
+			return fmt.Errorf(
+				"illegal path in archive: '%s'. To prevent Zip Slip exploit, the path can't lead to an entry outside '%s'",
+				header.EntryPath, destinationDir)
+		}
+		if (archiveEntry.Mode()&os.ModeSymlink) != 0 || len(header.TargetLink) > 0 {
+			var targetLink string
+			if targetLink, err = checkSymlinkEntry(header, archiveEntry, destinationDir); err != nil {
+				return err
+			}
+			uplinksValidator.addTargetLink(pathInArchive, targetLink)
+		}
+		uplinksValidator.addEntryFile(pathInArchive, archiveEntry.IsDir())
+		return err
+	})
+	if err != nil {
+		return err
+	}
+	return uplinksValidator.ensureNoUplinkDirs()
+}
+
+// Make sure the extraction path of the symlink entry target is under the destination dir
+func checkSymlinkEntry(header *archiveHeader, archiveEntry archiver.File, destinationDir string) (string, error) {
+	targetLinkPath := header.TargetLink
+	if targetLinkPath == "" {
+		// The link destination path is not always in the archive header
+		// In that case, we will look at the link content to get the link destination path
+		content, err := io.ReadAll(archiveEntry.ReadCloser)
+		if err != nil {
+			return "", err
+		}
+		targetLinkPath = string(content)
+	}
+
+	targetPathInArchive := getPathInArchive(destinationDir, filepath.Dir(header.EntryPath), targetLinkPath)
+	if !strings.HasPrefix(targetPathInArchive, destinationDir) {
+		return "", fmt.Errorf(
+			"illegal link path in archive: '%s'. To prevent Zip Slip Symlink exploit, the path can't lead to an entry outside '%s'",
+			targetLinkPath, destinationDir)
+	}
+
+	return targetPathInArchive, nil
+}
+
+// Get the path in archive of the entry or the target link
+func getPathInArchive(destinationDir, entryDirInArchive, pathInArchive string) string {
+	// If pathInArchive starts with '/' and we are on Windows, the path is illegal
+	pathInArchive = strings.TrimSpace(pathInArchive)
+	if os.IsPathSeparator('\\') && strings.HasPrefix(pathInArchive, "/") {
+		return ""
+	}
+
+	pathInArchive = filepath.Clean(pathInArchive)
+	if !filepath.IsAbs(pathInArchive) {
+		// If path is relative, concatenate it to the destination dir
+		pathInArchive = filepath.Join(destinationDir, entryDirInArchive, pathInArchive)
+	}
+	return pathInArchive
+}
+
+// Extract the header of the archive entry
+func extractArchiveEntryHeader(f archiver.File) (*archiveHeader, error) {
+	headerBytes, err := json.Marshal(f.Header)
+	if err != nil {
+		return nil, err
+	}
+	archiveHeader := &archiveHeader{}
+	err = json.Unmarshal(headerBytes, archiveHeader)
+	return archiveHeader, err
+}
+
+type archiveHeader struct {
+	EntryPath  string `json:"Name,omitempty"`
+	TargetLink string `json:"Linkname,omitempty"`
+}
+
+// This validator blocks the option to extract an archive with a link to an ancestor directory.
+// An ancestor directory is a directory located above the symlink in the hierarchy of the extraction dir, but not necessarily a direct ancestor.
+// For example, a sibling of a parent is an ancestor directory.
+// The purpose of the uplinksValidator is to prevent directories loop in the file system during extraction.
+type uplinksValidator struct {
+	entryFiles        *datastructures.Set[string]
+	targetParentLinks map[string]string
+}
+
+func newUplinksValidator() *uplinksValidator {
+	return &uplinksValidator{
+		// Set of all entries that are not directories in the archive
+		entryFiles: datastructures.MakeSet[string](),
+		// Map of all links in the archive pointing to an ancestor entry
+		targetParentLinks: make(map[string]string),
+	}
+}
+
+func (lv *uplinksValidator) addTargetLink(pathInArchive, targetLink string) {
+	if strings.Count(targetLink, string(filepath.Separator)) < strings.Count(pathInArchive, string(filepath.Separator)) {
+		// Add the target link only if it is an ancestor
+		lv.targetParentLinks[pathInArchive] = targetLink
+	}
+}
+
+func (lv *uplinksValidator) addEntryFile(entryFile string, isDir bool) {
+	if !isDir {
+		// Add the entry only if it is not a directory
+		lv.entryFiles.Add(entryFile)
+	}
+}
+
+// Iterate over all links pointing to an ancestor directories and files.
+// If a targetParentLink does not exist in the entryFiles list, it is a directory and therefore return an error.
+func (lv *uplinksValidator) ensureNoUplinkDirs() error {
+	for pathInArchive, targetLink := range lv.targetParentLinks {
+		if lv.entryFiles.Exists(targetLink) {
+			// Target link to a file
+			continue
+		}
+		// Target link to a directory
+		return fmt.Errorf(
+			"illegal target link path in archive: '%s' -> '%s'. To prevent Zip Slip symlink exploit, a link can't lead to an ancestor directory",
+			pathInArchive, targetLink)
+	}
+	return nil
+}