From b949d94d73460874cf4a388ab9f0ec9e0d9d827d Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Mon, 10 Oct 2022 13:39:04 +0300 Subject: [PATCH] Add package for fetching archives over HTTP Signed-off-by: Stefan Prodan --- http/fetch/archive_fetcher.go | 124 ++++++++++++++++++ http/fetch/archive_fetcher_test.go | 195 +++++++++++++++++++++++++++++ http/fetch/go.mod | 24 ++++ http/fetch/go.sum | 22 ++++ http/fetch/testdata/manifests.yaml | 75 +++++++++++ 5 files changed, 440 insertions(+) create mode 100644 http/fetch/archive_fetcher.go create mode 100644 http/fetch/archive_fetcher_test.go create mode 100644 http/fetch/go.mod create mode 100644 http/fetch/go.sum create mode 100644 http/fetch/testdata/manifests.yaml diff --git a/http/fetch/archive_fetcher.go b/http/fetch/archive_fetcher.go new file mode 100644 index 000000000..8cc4811a1 --- /dev/null +++ b/http/fetch/archive_fetcher.go @@ -0,0 +1,124 @@ +/* +Copyright 2022 The Flux authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package fetch + +import ( + "bytes" + "crypto/sha256" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "github.com/hashicorp/go-retryablehttp" + + "github.com/fluxcd/pkg/tar" +) + +// ArchiveFetcher holds the HTTP client that reties with back off when +// the file server is offline. +type ArchiveFetcher struct { + httpClient *retryablehttp.Client + maxUntarSize int + hostnameOverwrite string +} + +// FileNotFoundError is an error type used to signal 404 HTTP status code responses. +var FileNotFoundError = errors.New("file not found") + +// NewArchiveFetcher configures the retryable http client used for fetching archives. +func NewArchiveFetcher(retries, maxUntarSize int, hostnameOverwrite string) *ArchiveFetcher { + httpClient := retryablehttp.NewClient() + httpClient.RetryWaitMin = 5 * time.Second + httpClient.RetryWaitMax = 30 * time.Second + httpClient.RetryMax = retries + httpClient.Logger = nil + + return &ArchiveFetcher{ + httpClient: httpClient, + maxUntarSize: maxUntarSize, + hostnameOverwrite: hostnameOverwrite, + } +} + +// Fetch downloads, verifies and extracts the tarball content to the specified directory. +// If the file server responds with 5xx errors, the download operation is retried. +// If the file server responds with 404, the returned error is of type FileNotFoundError. +// If the file server is unavailable for more than 3 minutes, the returned error contains the original status code. +func (r *ArchiveFetcher) Fetch(archiveURL, checksum, dir string) error { + if r.hostnameOverwrite != "" { + u, err := url.Parse(archiveURL) + if err != nil { + return err + } + u.Host = r.hostnameOverwrite + archiveURL = u.String() + } + + req, err := retryablehttp.NewRequest(http.MethodGet, archiveURL, nil) + if err != nil { + return fmt.Errorf("failed to create a new request: %w", err) + } + + resp, err := r.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to download archive, error: %w", err) + } + defer resp.Body.Close() + + if code := resp.StatusCode; code != http.StatusOK { + if code == http.StatusNotFound { + return FileNotFoundError + } + return fmt.Errorf("failed to download archive from %s, status: %s", archiveURL, resp.Status) + } + + var buf bytes.Buffer + + // verify checksum matches origin + if err := r.verifyChecksum(checksum, &buf, resp.Body); err != nil { + return err + } + + // extract + if err = tar.Untar(&buf, dir, tar.WithMaxUntarSize(r.maxUntarSize)); err != nil { + return fmt.Errorf("failed to extract archive, error: %w", err) + } + + return nil +} + +// verifyChecksum computes the checksum of the tarball and returns an error if the computed value +// does not match the artifact advertised checksum. +func (r *ArchiveFetcher) verifyChecksum(checksum string, buf *bytes.Buffer, reader io.Reader) error { + hasher := sha256.New() + + // compute checksum + mw := io.MultiWriter(hasher, buf) + if _, err := io.Copy(mw, reader); err != nil { + return err + } + + if newChecksum := fmt.Sprintf("%x", hasher.Sum(nil)); newChecksum != checksum { + return fmt.Errorf("failed to verify archive: computed checksum '%s' doesn't match provided '%s'", + newChecksum, checksum) + } + + return nil +} diff --git a/http/fetch/archive_fetcher_test.go b/http/fetch/archive_fetcher_test.go new file mode 100644 index 000000000..57d01d940 --- /dev/null +++ b/http/fetch/archive_fetcher_test.go @@ -0,0 +1,195 @@ +/* +Copyright 2022 The Flux authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package fetch + +import ( + "archive/tar" + "compress/gzip" + "crypto/sha256" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "testing" + + . "github.com/onsi/gomega" + + "github.com/fluxcd/pkg/testserver" +) + +func TestArchiveFetcher_Fetch(t *testing.T) { + g := NewWithT(t) + tmpDir := t.TempDir() + + testServer, err := testserver.NewTempArtifactServer() + if err != nil { + g.Expect(err).NotTo(HaveOccurred(), "failed to create the test server") + } + fmt.Println("Starting the test server") + testServer.Start() + + fileName := "testdata/manifests.yaml" + artifactName := "manifests.tgz" + artifactURL := fmt.Sprintf("%s/%s", testServer.URL(), artifactName) + artifactChecksum, err := createArtifact(testServer, "testdata", artifactName) + g.Expect(err).ToNot(HaveOccurred()) + + tests := []struct { + name string + url string + checksum string + wantErr bool + wantErrType error + }{ + { + name: "fetches and verifies the checksum", + url: artifactURL, + checksum: artifactChecksum, + wantErr: false, + }, + { + name: "fails to verify the checksum", + url: artifactURL, + checksum: artifactChecksum + "1", + wantErr: true, + }, + { + name: "fails with not found error", + url: artifactURL + "1", + checksum: artifactChecksum, + wantErr: true, + wantErrType: FileNotFoundError, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + fetcher := NewArchiveFetcher(1, -1, "") + err = fetcher.Fetch(tt.url, tt.checksum, tmpDir) + + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + if tt.wantErrType != nil { + g.Expect(err).To(Equal(tt.wantErrType)) + } + } else { + g.Expect(err).ToNot(HaveOccurred()) + + originContent, err := os.ReadFile(fileName) + g.Expect(err).ToNot(HaveOccurred()) + + fetchedContent, err := os.ReadFile(filepath.Join(tmpDir, fileName)) + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(string(originContent)).To(BeIdenticalTo(string(fetchedContent))) + } + }) + } + +} + +func createArtifact(artifactServer *testserver.ArtifactServer, source, destination string) (string, error) { + if f, err := os.Stat(source); os.IsNotExist(err) || !f.IsDir() { + return "", fmt.Errorf("invalid source path: %s", source) + } + f, err := os.Create(filepath.Join(artifactServer.Root(), destination)) + if err != nil { + return "", err + } + defer func() { + if err != nil { + os.Remove(f.Name()) + } + }() + + h := sha256.New() + + mw := io.MultiWriter(h, f) + gw := gzip.NewWriter(mw) + tw := tar.NewWriter(gw) + + if err = filepath.Walk(source, func(p string, fi os.FileInfo, err error) error { + if err != nil { + return err + } + + // Ignore anything that is not a file (directories, symlinks) + if !fi.Mode().IsRegular() { + return nil + } + + // Ignore dotfiles + if strings.HasPrefix(fi.Name(), ".") { + return nil + } + + header, err := tar.FileInfoHeader(fi, p) + if err != nil { + return err + } + // The name needs to be modified to maintain directory structure + // as tar.FileInfoHeader only has access to the base name of the file. + // Ref: https://golang.org/src/archive/tar/common.go?#L626 + relFilePath := p + if filepath.IsAbs(source) { + relFilePath, err = filepath.Rel(source, p) + if err != nil { + return err + } + } + header.Name = relFilePath + + if err := tw.WriteHeader(header); err != nil { + return err + } + + f, err := os.Open(p) + if err != nil { + f.Close() + return err + } + if _, err := io.Copy(tw, f); err != nil { + f.Close() + return err + } + return f.Close() + }); err != nil { + return "", err + } + + if err := tw.Close(); err != nil { + gw.Close() + f.Close() + return "", err + } + if err := gw.Close(); err != nil { + f.Close() + return "", err + } + if err := f.Close(); err != nil { + return "", err + } + + if err := os.Chmod(f.Name(), 0644); err != nil { + return "", err + } + + return fmt.Sprintf("%x", h.Sum(nil)), nil +} diff --git a/http/fetch/go.mod b/http/fetch/go.mod new file mode 100644 index 000000000..55342440a --- /dev/null +++ b/http/fetch/go.mod @@ -0,0 +1,24 @@ +module github.com/fluxcd/pkg/http/fetch + +go 1.18 + +replace ( + github.com/fluxcd/pkg/tar => ../../tar + github.com/fluxcd/pkg/testserver => ../../testserver +) + +require ( + github.com/fluxcd/pkg/tar v0.1.0 + github.com/fluxcd/pkg/testserver v0.3.0 + github.com/hashicorp/go-retryablehttp v0.7.1 +) + +require ( + github.com/cyphar/filepath-securejoin v0.2.3 // indirect + github.com/google/go-cmp v0.5.8 // indirect + github.com/hashicorp/go-cleanhttp v0.5.1 // indirect + github.com/onsi/gomega v1.21.1 // indirect + golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect + golang.org/x/text v0.3.7 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/http/fetch/go.sum b/http/fetch/go.sum new file mode 100644 index 000000000..fa7d3fcc4 --- /dev/null +++ b/http/fetch/go.sum @@ -0,0 +1,22 @@ +github.com/cyphar/filepath-securejoin v0.2.3 h1:YX6ebbZCZP7VkM3scTTokDgBL2TY741X51MTk3ycuNI= +github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM= +github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI= +github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-retryablehttp v0.7.1 h1:sUiuQAnLlbvmExtFQs72iFW/HXeUn8Z1aJLQ4LJJbTQ= +github.com/hashicorp/go-retryablehttp v0.7.1/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY= +github.com/onsi/gomega v1.21.1 h1:OB/euWYIExnPBohllTicTHmGTrMaqJ67nIu80j0/uEM= +github.com/onsi/gomega v1.21.1/go.mod h1:iYAIXgPSaDHak0LCMA+AWBpIKBr8WZicMxnE8luStNc= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/http/fetch/testdata/manifests.yaml b/http/fetch/testdata/manifests.yaml new file mode 100644 index 000000000..dbe12b316 --- /dev/null +++ b/http/fetch/testdata/manifests.yaml @@ -0,0 +1,75 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clustertests.testing.fluxcd.io +spec: + group: testing.fluxcd.io + names: + kind: ClusterTest + listKind: ClusterTestList + plural: clustertests + singular: clustertest + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.type + name: TYPE + type: string + name: v1 + schema: + openAPIV3Schema: + description: Test is the Schema for the testing API + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + description: TestSpec defines the desired state of a test run + properties: + type: + description: Type of test + type: string + enum: + - unit + - integration + valuesFrom: + description: config reference + type: string + type: object + status: + default: + observedGeneration: -1 + properties: + observedGeneration: + description: ObservedGeneration is the last observed generation. + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] +--- +apiVersion: v1 +kind: Namespace +metadata: + name: test +--- +apiVersion: testing.fluxcd.io/v1 +kind: ClusterTest +metadata: + name: test + namespace: test +spec: + type: integration + valuesFrom: test-config