Skip to content

Commit

Permalink
Add unarchiver
Browse files Browse the repository at this point in the history
  • Loading branch information
yahavi committed Apr 9, 2023
1 parent 6b9e00f commit 5c4675d
Show file tree
Hide file tree
Showing 39 changed files with 408 additions and 3 deletions.
2 changes: 1 addition & 1 deletion datastructures/set.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ type Set[T comparable] struct {
container map[T]struct{}
}

//MakeSet initialize the set
// MakeSet initialize the set
func MakeSet[T comparable]() *Set[T] {
return &Set[T]{
container: make(map[T]struct{}),
Expand Down
2 changes: 1 addition & 1 deletion fanout/readall_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"sync"
)

//A reader that emits its read to multiple consumers using a ReadAll(p []byte) ([]interface{}, error) func
// A reader that emits its read to multiple consumers using a ReadAll(p []byte) ([]interface{}, error) func
type ReadAllReader struct {
reader io.Reader
consumers []ReadAllConsumer
Expand Down
2 changes: 1 addition & 1 deletion fanout/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"sync"
)

//A reader that emits its read to multiple consumers using an io.Reader Read(p []byte) (int, error) func
// A reader that emits its read to multiple consumers using an io.Reader Read(p []byte) (int, error) func
type Reader struct {
reader io.Reader
consumers []Consumer
Expand Down
10 changes: 10 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,22 @@ module github.com/jfrog/gofrog
go 1.19

require (
github.com/mholt/archiver/v3 v3.5.1
github.com/pkg/errors v0.9.1
github.com/stretchr/testify v1.8.0
)

require (
github.com/andybalholm/brotli v1.0.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
github.com/golang/snappy v0.0.2 // indirect
github.com/klauspost/compress v1.11.4 // indirect
github.com/klauspost/pgzip v1.2.5 // indirect
github.com/nwaples/rardecode v1.1.0 // indirect
github.com/pierrec/lz4/v4 v4.1.2 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/ulikunitz/xz v0.5.9 // indirect
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
26 changes: 26 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,6 +1,26 @@
github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw=
github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.11.4 h1:kz40R/YWls3iqT9zX9AHN3WoVsrAWVyui5sxuLqiXqU=
github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo=
github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4=
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand All @@ -10,6 +30,12 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
Expand Down
261 changes: 261 additions & 0 deletions unarchive/archive.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
package unarchive

import (
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"

"github.com/jfrog/gofrog/datastructures"
"github.com/mholt/archiver/v3"
)

type Unarchiver struct {
BypassArchiveInspection bool
}

var supportedArchives = []archiver.ExtensionChecker{
&archiver.TarBrotli{}, &archiver.TarBz2{}, &archiver.TarGz{}, &archiver.TarLz4{}, &archiver.TarSz{}, &archiver.TarXz{}, &archiver.TarZstd{},
&archiver.Rar{}, &archiver.Tar{}, &archiver.Zip{}, &archiver.Brotli{}, &archiver.Gz{}, &archiver.Bz2{}, &archiver.Lz4{}, &archiver.Snappy{},
&archiver.Xz{}, &archiver.Zstd{},
}

func (u *Unarchiver) IsSupportedArchive(filePath string) bool {
archive, err := archiver.ByExtension(filePath)
if err != nil {
return false
}
_, ok := archive.(archiver.Unarchiver)
return ok
}

// The 'archiver' dependency includes an API called 'Unarchive' to extract archive files. This API uses the archive file
// extension to determine the archive type.
// We therefore need to use the file name as it was in Artifactory, and not the file name which was downloaded. To achieve this,
// we added a new implementation of the 'Unarchive' func and use it instead of the default one.
// archivePath - Absolute or relative path to the archive, without the file name
// archiveName - The archive file name
// destinationPath - The extraction destination directory
func (u *Unarchiver) Unarchive(archivePath, archiveName, destinationPath string) error {
archive, err := byExtension(archiveName)
if err != nil {
return err
}
unarchiver, ok := archive.(archiver.Unarchiver)
if !ok {
return fmt.Errorf("format specified by source filename is not an archive format: " + archiveName)
}
if !u.BypassArchiveInspection {
if err = inspectArchive(archive, archivePath, destinationPath); err != nil {
return err
}
}
return unarchiver.Unarchive(archivePath, destinationPath)
}

// Instead of using 'archiver.byExtension' that by default sets OverwriteExisting to false, we implement our own.
func byExtension(filename string) (interface{}, error) {
var ec interface{}
for _, c := range supportedArchives {
if err := c.CheckExt(filename); err == nil {
ec = c
break
}
}
switch ec.(type) {
case *archiver.Rar:
archiveInstance := archiver.NewRar()
archiveInstance.OverwriteExisting = true
return archiveInstance, nil
case *archiver.Tar:
archiveInstance := archiver.NewTar()
archiveInstance.OverwriteExisting = true
return archiveInstance, nil
case *archiver.TarBrotli:
archiveInstance := archiver.NewTarBrotli()
archiveInstance.OverwriteExisting = true
return archiveInstance, nil
case *archiver.TarBz2:
archiveInstance := archiver.NewTarBz2()
archiveInstance.OverwriteExisting = true
return archiveInstance, nil
case *archiver.TarGz:
archiveInstance := archiver.NewTarGz()
archiveInstance.OverwriteExisting = true
return archiveInstance, nil
case *archiver.TarLz4:
archiveInstance := archiver.NewTarLz4()
archiveInstance.OverwriteExisting = true
return archiveInstance, nil
case *archiver.TarSz:
archiveInstance := archiver.NewTarSz()
archiveInstance.OverwriteExisting = true
return archiveInstance, nil
case *archiver.TarXz:
archiveInstance := archiver.NewTarXz()
archiveInstance.OverwriteExisting = true
return archiveInstance, nil
case *archiver.TarZstd:
archiveInstance := archiver.NewTarZstd()
archiveInstance.OverwriteExisting = true
return archiveInstance, nil
case *archiver.Zip:
archiveInstance := archiver.NewZip()
archiveInstance.OverwriteExisting = true
return archiveInstance, nil
case *archiver.Gz:
archiver.NewGz()
return archiver.NewGz(), nil
case *archiver.Bz2:
return archiver.NewBz2(), nil
case *archiver.Lz4:
return archiver.NewLz4(), nil
case *archiver.Snappy:
return archiver.NewSnappy(), nil
case *archiver.Xz:
return archiver.NewXz(), nil
case *archiver.Zstd:
return archiver.NewZstd(), nil
}
return nil, fmt.Errorf("format unrecognized by filename: %s", filename)
}

// Make sure the archive is free from Zip Slip and Zip symlinks attacks
func inspectArchive(archive interface{}, localArchivePath, destinationDir string) error {
walker, ok := archive.(archiver.Walker)
if !ok {
return fmt.Errorf("couldn't inspect archive: " + localArchivePath)
}

uplinksValidator := newUplinksValidator()
err := walker.Walk(localArchivePath, func(archiveEntry archiver.File) error {
header, err := extractArchiveEntryHeader(archiveEntry)
if err != nil {
return err
}
pathInArchive := getPathInArchive(destinationDir, "", header.EntryPath)
if !strings.HasPrefix(pathInArchive, destinationDir) {
return fmt.Errorf(
"illegal path in archive: '%s'. To prevent Zip Slip exploit, the path can't lead to an entry outside '%s'",
header.EntryPath, destinationDir)
}
if (archiveEntry.Mode()&os.ModeSymlink) != 0 || len(header.TargetLink) > 0 {
var targetLink string
if targetLink, err = checkSymlinkEntry(header, archiveEntry, destinationDir); err != nil {
return err
}
uplinksValidator.addTargetLink(pathInArchive, targetLink)
}
uplinksValidator.addEntryFile(pathInArchive, archiveEntry.IsDir())
return err
})
if err != nil {
return err
}
return uplinksValidator.ensureNoUplinkDirs()
}

// Make sure the extraction path of the symlink entry target is under the destination dir
func checkSymlinkEntry(header *archiveHeader, archiveEntry archiver.File, destinationDir string) (string, error) {
targetLinkPath := header.TargetLink
if targetLinkPath == "" {
// The link destination path is not always in the archive header
// In that case, we will look at the link content to get the link destination path
content, err := io.ReadAll(archiveEntry.ReadCloser)
if err != nil {
return "", err
}
targetLinkPath = string(content)
}

targetPathInArchive := getPathInArchive(destinationDir, filepath.Dir(header.EntryPath), targetLinkPath)
if !strings.HasPrefix(targetPathInArchive, destinationDir) {
return "", fmt.Errorf(
"illegal link path in archive: '%s'. To prevent Zip Slip Symlink exploit, the path can't lead to an entry outside '%s'",
targetLinkPath, destinationDir)
}

return targetPathInArchive, nil
}

// Get the path in archive of the entry or the target link
func getPathInArchive(destinationDir, entryDirInArchive, pathInArchive string) string {
// If pathInArchive starts with '/' and we are on Windows, the path is illegal
pathInArchive = strings.TrimSpace(pathInArchive)
if os.IsPathSeparator('\\') && strings.HasPrefix(pathInArchive, "/") {
return ""
}

pathInArchive = filepath.Clean(pathInArchive)
if !filepath.IsAbs(pathInArchive) {
// If path is relative, concatenate it to the destination dir
pathInArchive = filepath.Join(destinationDir, entryDirInArchive, pathInArchive)
}
return pathInArchive
}

// Extract the header of the archive entry
func extractArchiveEntryHeader(f archiver.File) (*archiveHeader, error) {
headerBytes, err := json.Marshal(f.Header)
if err != nil {
return nil, err
}
archiveHeader := &archiveHeader{}
err = json.Unmarshal(headerBytes, archiveHeader)
return archiveHeader, err
}

type archiveHeader struct {
EntryPath string `json:"Name,omitempty"`
TargetLink string `json:"Linkname,omitempty"`
}

// This validator blocks the option to extract an archive with a link to an ancestor directory.
// An ancestor directory is a directory located above the symlink in the hierarchy of the extraction dir, but not necessarily a direct ancestor.
// For example, a sibling of a parent is an ancestor directory.
// The purpose of the uplinksValidator is to prevent directories loop in the file system during extraction.
type uplinksValidator struct {
entryFiles *datastructures.Set[string]
targetParentLinks map[string]string
}

func newUplinksValidator() *uplinksValidator {
return &uplinksValidator{
// Set of all entries that are not directories in the archive
entryFiles: datastructures.MakeSet[string](),
// Map of all links in the archive pointing to an ancestor entry
targetParentLinks: make(map[string]string),
}
}

func (lv *uplinksValidator) addTargetLink(pathInArchive, targetLink string) {
if strings.Count(targetLink, string(filepath.Separator)) < strings.Count(pathInArchive, string(filepath.Separator)) {
// Add the target link only if it is an ancestor
lv.targetParentLinks[pathInArchive] = targetLink
}
}

func (lv *uplinksValidator) addEntryFile(entryFile string, isDir bool) {
if !isDir {
// Add the entry only if it is not a directory
lv.entryFiles.Add(entryFile)
}
}

// Iterate over all links pointing to an ancestor directories and files.
// If a targetParentLink does not exist in the entryFiles list, it is a directory and therefore return an error.
func (lv *uplinksValidator) ensureNoUplinkDirs() error {
for pathInArchive, targetLink := range lv.targetParentLinks {
if lv.entryFiles.Exists(targetLink) {
// Target link to a file
continue
}
// Target link to a directory
return fmt.Errorf(
"illegal target link path in archive: '%s' -> '%s'. To prevent Zip Slip symlink exploit, a link can't lead to an ancestor directory",
pathInArchive, targetLink)
}
return nil
}
Loading

0 comments on commit 5c4675d

Please sign in to comment.