From a601f15ccfc7609899a78c027b3c7d7ac54248c0 Mon Sep 17 00:00:00 2001 From: Avi Deitcher Date: Tue, 28 Feb 2023 12:27:38 +0200 Subject: [PATCH] support for scanning license files in golang packages Signed-off-by: Avi Deitcher --- cmd/syft/cli/options/packages.go | 17 +++++ go.mod | 1 + go.sum | 2 + internal/config/application.go | 2 + internal/licenses/list.go | 58 +++++++++++++++++ internal/licenses/parser.go | 65 +++++++++++++++++++ syft/pkg/cataloger/golang/package.go | 50 ++++++++++++++ .../cataloger/golang/parse_go_binary_test.go | 5 ++ 8 files changed, 200 insertions(+) create mode 100644 internal/licenses/list.go create mode 100644 internal/licenses/parser.go diff --git a/cmd/syft/cli/options/packages.go b/cmd/syft/cli/options/packages.go index ec0331e4ff7c..67ad5fe12a94 100644 --- a/cmd/syft/cli/options/packages.go +++ b/cmd/syft/cli/options/packages.go @@ -10,6 +10,7 @@ import ( "github.com/anchore/syft/syft/formats" "github.com/anchore/syft/syft/formats/table" "github.com/anchore/syft/syft/pkg/cataloger" + "github.com/anchore/syft/syft/pkg/cataloger/golang" "github.com/anchore/syft/syft/source" ) @@ -22,6 +23,8 @@ type PackagesOptions struct { Exclude []string Catalogers []string Name string + GoFetchPackages bool + GoProxy string } var _ Interface = (*PackagesOptions)(nil) @@ -51,6 +54,12 @@ func (o *PackagesOptions) AddFlags(cmd *cobra.Command, v *viper.Viper) error { cmd.Flags().StringVarP(&o.Name, "name", "", "", "set the name of the target being analyzed") + cmd.Flags().BoolVarP(&o.GoFetchPackages, "go-fetch", "", false, + "enable fetching of Go packages from the internet for license analysis, otherwise will look only in local") + + cmd.Flags().StringVarP(&o.GoProxy, "go-proxy", "", golang.DefaultGoProxy, + "proxy to use when fetching Go packages from the internet for license analysis; used only if --go-fetch is set") + return bindPackageConfigOptions(cmd.Flags(), v) } @@ -89,5 +98,13 @@ func bindPackageConfigOptions(flags *pflag.FlagSet, v *viper.Viper) error { return err } + if err := v.BindPFlag("go-fetch", flags.Lookup("go-fetch")); err != nil { + return err + } + + if err := v.BindPFlag("go-proxy", flags.Lookup("go-proxy")); err != nil { + return err + } + return nil } diff --git a/go.mod b/go.mod index 4e1ffb29524d..955a494bc856 100644 --- a/go.mod +++ b/go.mod @@ -55,6 +55,7 @@ require ( github.com/anchore/stereoscope v0.0.0-20230222185948-fab1c9638abc github.com/docker/docker v23.0.1+incompatible github.com/google/go-containerregistry v0.13.0 + github.com/google/licensecheck v0.3.1 github.com/invopop/jsonschema v0.7.0 github.com/knqyf263/go-rpmdb v0.0.0-20221030135625-4082a22221ce github.com/opencontainers/go-digest v1.0.0 diff --git a/go.sum b/go.sum index 43bfd0581ce4..70864ec9faf7 100644 --- a/go.sum +++ b/go.sum @@ -263,6 +263,8 @@ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/go-containerregistry v0.13.0 h1:y1C7Z3e149OJbOPDBxLYR8ITPz8dTKqQwjErKVHJC8k= github.com/google/go-containerregistry v0.13.0/go.mod h1:J9FQ+eSS4a1aC2GNZxvNpbWhgp0487v+cgiilB4FqDo= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/licensecheck v0.3.1 h1:QoxgoDkaeC4nFrtGN1jV7IPmDCHFNIVh54e5hSt6sPs= +github.com/google/licensecheck v0.3.1/go.mod h1:ORkR35t/JjW+emNKtfJDII0zlciG9JgbT7SmsohlHmY= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= diff --git a/internal/config/application.go b/internal/config/application.go index b5a74b6380fe..1e53bcca65e1 100644 --- a/internal/config/application.go +++ b/internal/config/application.go @@ -57,6 +57,8 @@ type Application struct { Platform string `yaml:"platform" json:"platform" mapstructure:"platform"` Name string `yaml:"name" json:"name" mapstructure:"name"` Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel + GoFetch bool `yaml:"go-fetch" json:"go-fetch" mapstructure:"go-fetch"` + GoProxy string `yaml:"go-proxy]" json:"go-proxy" mapstructure:"go-proxy"` } func (cfg Application) ToCatalogerConfig() cataloger.Config { diff --git a/internal/licenses/list.go b/internal/licenses/list.go new file mode 100644 index 000000000000..cd2ecf59525b --- /dev/null +++ b/internal/licenses/list.go @@ -0,0 +1,58 @@ +package licenses + +// all of these taken from https://github.com/golang/pkgsite/blob/8996ff632abee854aef1b764ca0501f262f8f523/internal/licenses/licenses.go#L338 +// which unfortunately is not exported. But fortunately is under BSD-style license. + +var ( + FileNames = []string{ + "COPYING", + "COPYING.md", + "COPYING.markdown", + "COPYING.txt", + "LICENCE", + "LICENCE.md", + "LICENCE.markdown", + "LICENCE.txt", + "LICENSE", + "LICENSE.md", + "LICENSE.markdown", + "LICENSE.txt", + "LICENSE-2.0.txt", + "LICENCE-2.0.txt", + "LICENSE-APACHE", + "LICENCE-APACHE", + "LICENSE-APACHE-2.0.txt", + "LICENCE-APACHE-2.0.txt", + "LICENSE-MIT", + "LICENCE-MIT", + "LICENSE.MIT", + "LICENCE.MIT", + "LICENSE.code", + "LICENCE.code", + "LICENSE.docs", + "LICENCE.docs", + "LICENSE.rst", + "LICENCE.rst", + "MIT-LICENSE", + "MIT-LICENCE", + "MIT-LICENSE.md", + "MIT-LICENCE.md", + "MIT-LICENSE.markdown", + "MIT-LICENCE.markdown", + "MIT-LICENSE.txt", + "MIT-LICENCE.txt", + "MIT_LICENSE", + "MIT_LICENCE", + "UNLICENSE", + "UNLICENCE", + } +) + +var fileNames map[string]bool + +func init() { + fileNames = make(map[string]bool) + for _, name := range FileNames { + fileNames[name] = true + } +} diff --git a/internal/licenses/parser.go b/internal/licenses/parser.go new file mode 100644 index 000000000000..2c53e6145c91 --- /dev/null +++ b/internal/licenses/parser.go @@ -0,0 +1,65 @@ +package licenses + +import ( + "io" + "io/fs" + "path/filepath" + "strings" + + "github.com/google/licensecheck" +) + +const ( + coverageThreshold = 75 + unknownLicenseType = "UNKNOWN" +) + +// ScanLicenses scan an fs.FS for licenses, First finds files that fit with the list +// in FileNames, and then uses github.com/google/licensecheck to scan the contents. +func ScanLicenses(fsys fs.FS) []string { + var ( + licenses []string + isVendor bool + ) + _ = fs.WalkDir(fsys, ".", func(p string, d fs.DirEntry, err error) error { + if err != nil { + return nil + } + filename := filepath.Base(p) + // ignore any tat are not a known filetype + if _, ok := fileNames[filename]; !ok { + return nil + } + // make sure it is not in a vendored path + parts := strings.Split(filepath.Dir(p), string(filepath.Separator)) + for _, part := range parts { + if part == "vendor" { + isVendor = true + break + } + } + if isVendor { + return nil + } + // read the file contents + rc, err := fsys.Open(p) + if err != nil { + return nil + } + defer rc.Close() + contents, err := io.ReadAll(rc) + if err != nil { + return nil + } + cov := licensecheck.Scan(contents) + + if cov.Percent < float64(coverageThreshold) { + licenses = append(licenses, unknownLicenseType) + } + for _, m := range cov.Match { + licenses = append(licenses, m.ID) + } + return nil + }) + return licenses +} diff --git a/syft/pkg/cataloger/golang/package.go b/syft/pkg/cataloger/golang/package.go index 93f762a5d9af..048bf4272e3a 100644 --- a/syft/pkg/cataloger/golang/package.go +++ b/syft/pkg/cataloger/golang/package.go @@ -1,15 +1,26 @@ package golang import ( + "archive/zip" + "bytes" + "fmt" + "io" + "io/fs" + "net/http" + "os" + "path/filepath" "regexp" "runtime/debug" "strings" "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal/licenses" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/source" ) +const DefaultGoProxy = "https://proxy.golang.org" + func newGoBinaryPackage(dep *debug.Module, mainModule, goVersion, architecture string, buildSettings map[string]string, locations ...source.Location) pkg.Package { if dep.Replace != nil { dep = dep.Replace @@ -18,6 +29,7 @@ func newGoBinaryPackage(dep *debug.Module, mainModule, goVersion, architecture s p := pkg.Package{ Name: dep.Path, Version: dep.Version, + Licenses: goLicenses(dep.Path, dep.Version), PURL: packageURL(dep.Path, dep.Version), Language: pkg.Go, Type: pkg.GoModulePkg, @@ -67,3 +79,41 @@ func packageURL(moduleName, moduleVersion string) string { subpath, ).ToString() } + +func goLicenses(moduleName, moduleVersion string) []string { + fsys, err := getModule(moduleName, moduleVersion, DefaultGoProxy) + if err != nil { + return nil + } + return licenses.ScanLicenses(fsys) +} + +func getModule(module, version, proxy string) (fs.FS, error) { + // first see if we have it locally + goPath := os.Getenv("GOPATH") + if goPath != "" { + modPath := filepath.Join(goPath, "pkg", "mod", fmt.Sprintf("%s@%s", module, version)) + if fi, err := os.Stat(modPath); err == nil && fi != nil && fi.IsDir() { + modFS := os.DirFS(modPath) + return modFS, nil + } + } + + // we could not get it locally, so get it from the proxy, but only if network is enabled + + // get the module zip + resp, err := http.Get(fmt.Sprintf("%s/%s/@v/%s.zip", proxy, module, version)) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to get module zip: %s", resp.Status) + } + // read the zip + b, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + return zip.NewReader(bytes.NewReader(b), resp.ContentLength) +} diff --git a/syft/pkg/cataloger/golang/parse_go_binary_test.go b/syft/pkg/cataloger/golang/parse_go_binary_test.go index acfd435c59a0..72f8e97c2aba 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary_test.go +++ b/syft/pkg/cataloger/golang/parse_go_binary_test.go @@ -223,6 +223,7 @@ func TestBuildGoPkgInfo(t *testing.T) { Name: "github.com/adrg/xdg", Version: "v0.2.1", PURL: "pkg:golang/github.com/adrg/xdg@v0.2.1", + Licenses: []string{"MIT"}, Language: pkg.Go, Type: pkg.GoModulePkg, Locations: source.NewLocationSet( @@ -330,6 +331,7 @@ func TestBuildGoPkgInfo(t *testing.T) { Name: "github.com/adrg/xdg", Version: "v0.2.1", PURL: "pkg:golang/github.com/adrg/xdg@v0.2.1", + Licenses: []string{"MIT"}, Language: pkg.Go, Type: pkg.GoModulePkg, Locations: source.NewLocationSet( @@ -352,6 +354,7 @@ func TestBuildGoPkgInfo(t *testing.T) { Name: "github.com/anchore/client-go", Version: "v0.0.0-20210222170800-9c70f9b80bcf", PURL: "pkg:golang/github.com/anchore/client-go@v0.0.0-20210222170800-9c70f9b80bcf", + Licenses: []string{"Apache-2.0"}, Language: pkg.Go, Type: pkg.GoModulePkg, Locations: source.NewLocationSet( @@ -407,6 +410,7 @@ func TestBuildGoPkgInfo(t *testing.T) { Name: "golang.org/x/sys", Version: "v0.0.0-20211006194710-c8a6f5223071", PURL: "pkg:golang/golang.org/x/sys@v0.0.0-20211006194710-c8a6f5223071", + Licenses: []string{"BSD-3-Clause"}, Language: pkg.Go, Type: pkg.GoModulePkg, Locations: source.NewLocationSet( @@ -428,6 +432,7 @@ func TestBuildGoPkgInfo(t *testing.T) { Name: "golang.org/x/term", Version: "v0.0.0-20210916214954-140adaaadfaf", PURL: "pkg:golang/golang.org/x/term@v0.0.0-20210916214954-140adaaadfaf", + Licenses: []string{"BSD-3-Clause"}, Language: pkg.Go, Type: pkg.GoModulePkg, Locations: source.NewLocationSet(