Skip to content

Commit

Permalink
Add package for fetching archives over HTTP
Browse files Browse the repository at this point in the history
Signed-off-by: Stefan Prodan <stefan.prodan@gmail.com>
  • Loading branch information
stefanprodan committed Oct 10, 2022
1 parent 60dfe84 commit d147ae2
Show file tree
Hide file tree
Showing 5 changed files with 443 additions and 0 deletions.
124 changes: 124 additions & 0 deletions http/fetch/archive_fetcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
Copyright 2022 The Flux authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package fetch

import (
"bytes"
"crypto/sha256"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"time"

"github.com/hashicorp/go-retryablehttp"

"github.com/fluxcd/pkg/tar"
)

// ArchiveFetcher holds the HTTP client that reties with back off when
// the file server is offline.
type ArchiveFetcher struct {
httpClient *retryablehttp.Client
maxUntarSize int
hostnameOverwrite string
}

// FileNotFoundError is an error type used to signal 404 HTTP status code responses.
var FileNotFoundError = errors.New("file not found")

// NewArchiveFetcher configures the retryable http client used for fetching archives.
func NewArchiveFetcher(retries, maxUntarSize int, hostnameOverwrite string) *ArchiveFetcher {
httpClient := retryablehttp.NewClient()
httpClient.RetryWaitMin = 5 * time.Second
httpClient.RetryWaitMax = 30 * time.Second
httpClient.RetryMax = retries
httpClient.Logger = nil

return &ArchiveFetcher{
httpClient: httpClient,
maxUntarSize: maxUntarSize,
hostnameOverwrite: hostnameOverwrite,
}
}

// Fetch downloads, verifies and extracts the tarball content to the specified directory.
// If the file server responds with 5xx errors, the download operation is retried.
// If the file server responds with 404, the returned error is of type FileNotFoundError.
// If the file server is unavailable for more than 3 minutes, the returned error contains the original status code.
func (r *ArchiveFetcher) Fetch(archiveURL, checksum, dir string) error {
if r.hostnameOverwrite != "" {
u, err := url.Parse(archiveURL)
if err != nil {
return err
}
u.Host = r.hostnameOverwrite
archiveURL = u.String()
}

req, err := retryablehttp.NewRequest(http.MethodGet, archiveURL, nil)
if err != nil {
return fmt.Errorf("failed to create a new request: %w", err)
}

resp, err := r.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to download archive, error: %w", err)
}
defer resp.Body.Close()

if code := resp.StatusCode; code != http.StatusOK {
if code == http.StatusNotFound {
return FileNotFoundError
}
return fmt.Errorf("failed to download archive from %s, status: %s", archiveURL, resp.Status)
}

var buf bytes.Buffer

// verify checksum matches origin
if err := r.verifyChecksum(checksum, &buf, resp.Body); err != nil {
return err
}

// extract
if err = tar.Untar(&buf, dir, tar.WithMaxUntarSize(r.maxUntarSize)); err != nil {
return fmt.Errorf("failed to extract archive, error: %w", err)
}

return nil
}

// verifyChecksum computes the checksum of the tarball and returns an error if the computed value
// does not match the artifact advertised checksum.
func (r *ArchiveFetcher) verifyChecksum(checksum string, buf *bytes.Buffer, reader io.Reader) error {
hasher := sha256.New()

// compute checksum
mw := io.MultiWriter(hasher, buf)
if _, err := io.Copy(mw, reader); err != nil {
return err
}

if newChecksum := fmt.Sprintf("%x", hasher.Sum(nil)); newChecksum != checksum {
return fmt.Errorf("failed to verify archive: computed checksum '%s' doesn't match provided '%s'",
newChecksum, checksum)
}

return nil
}
195 changes: 195 additions & 0 deletions http/fetch/archive_fetcher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
/*
Copyright 2022 The Flux authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package fetch

import (
"archive/tar"
"compress/gzip"
"crypto/sha256"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"testing"

. "github.com/onsi/gomega"

"github.com/fluxcd/pkg/testserver"
)

func TestArchiveFetcher_Fetch(t *testing.T) {
g := NewWithT(t)
tmpDir := t.TempDir()

testServer, err := testserver.NewTempArtifactServer()
if err != nil {
g.Expect(err).NotTo(HaveOccurred(), "failed to create the test server")
}
fmt.Println("Starting the test server")
testServer.Start()

fileName := "testdata/manifests.yaml"
artifactName := "manifests.tgz"
artifactURL := fmt.Sprintf("%s/%s", testServer.URL(), artifactName)
artifactChecksum, err := createArtifact(testServer, "testdata", artifactName)
g.Expect(err).ToNot(HaveOccurred())

tests := []struct {
name string
url string
checksum string
wantErr bool
wantErrType error
}{
{
name: "fetches and verifies the checksum",
url: artifactURL,
checksum: artifactChecksum,
wantErr: false,
},
{
name: "fails to verify the checksum",
url: artifactURL,
checksum: artifactChecksum + "1",
wantErr: true,
},
{
name: "fails with not found error",
url: artifactURL + "1",
checksum: artifactChecksum,
wantErr: true,
wantErrType: FileNotFoundError,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

fetcher := NewArchiveFetcher(1, -1, "")
err = fetcher.Fetch(tt.url, tt.checksum, tmpDir)

if tt.wantErr {
g.Expect(err).To(HaveOccurred())
if tt.wantErrType != nil {
g.Expect(err).To(Equal(tt.wantErrType))
}
} else {
g.Expect(err).ToNot(HaveOccurred())

originContent, err := os.ReadFile(fileName)
g.Expect(err).ToNot(HaveOccurred())

fetchedContent, err := os.ReadFile(filepath.Join(tmpDir, fileName))
g.Expect(err).ToNot(HaveOccurred())

g.Expect(string(originContent)).To(BeIdenticalTo(string(fetchedContent)))
}
})
}

}

func createArtifact(artifactServer *testserver.ArtifactServer, source, destination string) (string, error) {
if f, err := os.Stat(source); os.IsNotExist(err) || !f.IsDir() {
return "", fmt.Errorf("invalid source path: %s", source)
}
f, err := os.Create(filepath.Join(artifactServer.Root(), destination))
if err != nil {
return "", err
}
defer func() {
if err != nil {
os.Remove(f.Name())
}
}()

h := sha256.New()

mw := io.MultiWriter(h, f)
gw := gzip.NewWriter(mw)
tw := tar.NewWriter(gw)

if err = filepath.Walk(source, func(p string, fi os.FileInfo, err error) error {
if err != nil {
return err
}

// Ignore anything that is not a file (directories, symlinks)
if !fi.Mode().IsRegular() {
return nil
}

// Ignore dotfiles
if strings.HasPrefix(fi.Name(), ".") {
return nil
}

header, err := tar.FileInfoHeader(fi, p)
if err != nil {
return err
}
// The name needs to be modified to maintain directory structure
// as tar.FileInfoHeader only has access to the base name of the file.
// Ref: https://golang.org/src/archive/tar/common.go?#L626
relFilePath := p
if filepath.IsAbs(source) {
relFilePath, err = filepath.Rel(source, p)
if err != nil {
return err
}
}
header.Name = relFilePath

if err := tw.WriteHeader(header); err != nil {
return err
}

f, err := os.Open(p)
if err != nil {
f.Close()
return err
}
if _, err := io.Copy(tw, f); err != nil {
f.Close()
return err
}
return f.Close()
}); err != nil {
return "", err
}

if err := tw.Close(); err != nil {
gw.Close()
f.Close()
return "", err
}
if err := gw.Close(); err != nil {
f.Close()
return "", err
}
if err := f.Close(); err != nil {
return "", err
}

if err := os.Chmod(f.Name(), 0644); err != nil {
return "", err
}

return fmt.Sprintf("%x", h.Sum(nil)), nil
}
24 changes: 24 additions & 0 deletions http/fetch/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
module github.com/fluxcd/pkg/http/fetch

go 1.18

replace (
github.com/fluxcd/pkg/tar => ../../tar
github.com/fluxcd/pkg/testserver => ../../testserver
)

require (
github.com/fluxcd/pkg/tar v0.1.0
github.com/fluxcd/pkg/testserver v0.3.0
github.com/hashicorp/go-retryablehttp v0.7.1
github.com/onsi/gomega v1.21.1
)

require (
github.com/cyphar/filepath-securejoin v0.2.3 // indirect
github.com/google/go-cmp v0.5.8 // indirect
github.com/hashicorp/go-cleanhttp v0.5.1 // indirect
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
golang.org/x/text v0.3.7 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
25 changes: 25 additions & 0 deletions http/fetch/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
github.com/cyphar/filepath-securejoin v0.2.3 h1:YX6ebbZCZP7VkM3scTTokDgBL2TY741X51MTk3ycuNI=
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI=
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-retryablehttp v0.7.1 h1:sUiuQAnLlbvmExtFQs72iFW/HXeUn8Z1aJLQ4LJJbTQ=
github.com/hashicorp/go-retryablehttp v0.7.1/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=
github.com/onsi/ginkgo/v2 v2.1.6 h1:Fx2POJZfKRQcM1pH49qSZiYeu319wji004qX+GDovrU=
github.com/onsi/gomega v1.21.1 h1:OB/euWYIExnPBohllTicTHmGTrMaqJ67nIu80j0/uEM=
github.com/onsi/gomega v1.21.1/go.mod h1:iYAIXgPSaDHak0LCMA+AWBpIKBr8WZicMxnE8luStNc=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
Loading

0 comments on commit d147ae2

Please sign in to comment.