Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding the ability to retrieve remote licenses for yarn.lock #2338

Merged
merged 7 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,10 @@ python:
# when given an arbitrary constraint will be used (even if that version may not be available/published).
guess-unpinned-requirements: false

javascript:
search-remote-licenses: false
npm-base-url: "https://registry.npmjs.org"

file-contents:
cataloger:
# enable/disable cataloging of file contents
Expand Down
5 changes: 5 additions & 0 deletions cmd/syft/cli/options/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger"
golangCataloger "github.com/anchore/syft/syft/pkg/cataloger/golang"
javaCataloger "github.com/anchore/syft/syft/pkg/cataloger/java"
javascriptCataloger "github.com/anchore/syft/syft/pkg/cataloger/javascript"
"github.com/anchore/syft/syft/pkg/cataloger/kernel"
pythonCataloger "github.com/anchore/syft/syft/pkg/cataloger/python"
"github.com/anchore/syft/syft/source"
Expand All @@ -26,6 +27,7 @@ type Catalog struct {
Package pkg `yaml:"package" json:"package" mapstructure:"package"`
Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"`
Java java `yaml:"java" json:"java" mapstructure:"java"`
Javascript javascript `yaml:"javascript" json:"javascript" mapstructure:"javascript"`
LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"`
Python python `yaml:"python" json:"python" mapstructure:"python"`
FileMetadata fileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
Expand Down Expand Up @@ -145,6 +147,9 @@ func (cfg Catalog) ToCatalogerConfig() cataloger.Config {
IncludeUnindexedArchives: cfg.Package.SearchUnindexedArchives,
},
cfg.Java.MaxParentRecursiveDepth),
Javascript: javascriptCataloger.DefaultCatalogerConfig().
WithSearchRemoteLicenses(cfg.Javascript.SearchRemoteLicenses).
WithNpmBaseURL(cfg.Javascript.NpmBaseURL),
Python: pythonCataloger.CatalogerConfig{
GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements,
},
Expand Down
6 changes: 6 additions & 0 deletions cmd/syft/cli/options/javascript.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package options

type javascript struct {
SearchRemoteLicenses bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
NpmBaseURL string `json:"npm-base-url" yaml:"npm-base-url" mapstructure:"npm-base-url"`
}
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
java.NewGradleLockfileCataloger(),
java.NewPomCataloger(),
java.NewNativeImageCataloger(),
javascript.NewLockCataloger(),
javascript.NewLockCataloger(cfg.Javascript),
nix.NewStoreCataloger(),
php.NewComposerLockCataloger(),
gentoo.NewPortageCataloger(),
Expand Down Expand Up @@ -124,7 +124,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
java.NewGradleLockfileCataloger(),
java.NewPomCataloger(),
java.NewNativeImageCataloger(),
javascript.NewLockCataloger(),
javascript.NewLockCataloger(cfg.Javascript),
javascript.NewPackageCataloger(),
kernel.NewLinuxKernelCataloger(cfg.LinuxKernel),
nix.NewStoreCataloger(),
Expand Down
3 changes: 3 additions & 0 deletions syft/pkg/cataloger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/pkg/cataloger/golang"
"github.com/anchore/syft/syft/pkg/cataloger/java"
"github.com/anchore/syft/syft/pkg/cataloger/javascript"
"github.com/anchore/syft/syft/pkg/cataloger/kernel"
"github.com/anchore/syft/syft/pkg/cataloger/python"
)
Expand All @@ -15,6 +16,7 @@ type Config struct {
LinuxKernel kernel.LinuxKernelCatalogerConfig
Python python.CatalogerConfig
Java java.ArchiveCatalogerConfig
Javascript javascript.CatalogerConfig
Catalogers []string
Parallelism int
ExcludeBinaryOverlapByOwnership bool
Expand All @@ -27,6 +29,7 @@ func DefaultConfig() Config {
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(),
Java: java.DefaultArchiveCatalogerConfig(),
Javascript: javascript.DefaultCatalogerConfig(),
ExcludeBinaryOverlapByOwnership: true,
}
}
Expand Down
5 changes: 3 additions & 2 deletions syft/pkg/cataloger/javascript/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ func NewPackageCataloger() pkg.Cataloger {
}

// NewLockCataloger returns a new cataloger object for NPM (and NPM-adjacent, such as yarn) lock files.
func NewLockCataloger() pkg.Cataloger {
func NewLockCataloger(cfg CatalogerConfig) pkg.Cataloger {
yarnLockAdapter := newGenericYarnLockAdapter(cfg)
return generic.NewCataloger("javascript-lock-cataloger").
WithParserByGlobs(parsePackageLock, "**/package-lock.json").
WithParserByGlobs(parseYarnLock, "**/yarn.lock").
WithParserByGlobs(yarnLockAdapter.parseYarnLock, "**/yarn.lock").
WithParserByGlobs(parsePnpmLock, "**/pnpm-lock.yaml")
}
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/javascript/cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func Test_JavascriptCataloger(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, "test-fixtures/pkg-lock").
Expects(expectedPkgs, nil).
TestCataloger(t, NewLockCataloger())
TestCataloger(t, NewLockCataloger(CatalogerConfig{}))

}

Expand Down Expand Up @@ -183,7 +183,7 @@ func Test_LockCataloger_Globs(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, test.fixture).
ExpectsResolverContentQueries(test.expected).
TestCataloger(t, NewLockCataloger())
TestCataloger(t, NewLockCataloger(CatalogerConfig{}))
})
}
}
27 changes: 27 additions & 0 deletions syft/pkg/cataloger/javascript/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package javascript

const npmBaseURL = "https://registry.npmjs.org"

type CatalogerConfig struct {
searchRemoteLicenses bool
npmBaseURL string
}

func DefaultCatalogerConfig() CatalogerConfig {
return CatalogerConfig{
searchRemoteLicenses: false,
npmBaseURL: npmBaseURL,
}
}

func (j CatalogerConfig) WithSearchRemoteLicenses(input bool) CatalogerConfig {
j.searchRemoteLicenses = input
return j
}

func (j CatalogerConfig) WithNpmBaseURL(input string) CatalogerConfig {
if input != "" {
j.npmBaseURL = input
}
return j
}
77 changes: 76 additions & 1 deletion syft/pkg/cataloger/javascript/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ package javascript

import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"path"
"strings"
"time"

"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/log"
Expand Down Expand Up @@ -106,13 +110,27 @@ func newPnpmPackage(resolver file.Resolver, location file.Location, name, versio
)
}

func newYarnLockPackage(resolver file.Resolver, location file.Location, name, version string) pkg.Package {
func newYarnLockPackage(cfg CatalogerConfig, resolver file.Resolver, location file.Location, name, version string) pkg.Package {
var licenseSet pkg.LicenseSet

if cfg.searchRemoteLicenses {
license, err := getLicenseFromNpmRegistry(cfg.npmBaseURL, name, version)
if err == nil && license != "" {
licenses := pkg.NewLicensesFromValues(license)
licenseSet = pkg.NewLicenseSet(licenses...)
}
if err != nil {
log.Warnf("unable to extract licenses from javascript yarn.lock for package %s:%s: %+v", name, version, err)
}
}

return finalizeLockPkg(
resolver,
location,
pkg.Package{
Name: name,
Version: version,
Licenses: licenseSet,
Locations: file.NewLocationSet(location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
PURL: packageURL(name, version),
Language: pkg.JavaScript,
Expand All @@ -121,6 +139,63 @@ func newYarnLockPackage(resolver file.Resolver, location file.Location, name, ve
)
}

func formatNpmRegistryURL(baseURL, packageName, version string) (requestURL string, err error) {
urlPath := []string{packageName, version}
requestURL, err = url.JoinPath(baseURL, urlPath...)
if err != nil {
return requestURL, fmt.Errorf("unable to format npm request for pkg:version %s%s; %w", packageName, version, err)
}
return requestURL, nil
}

func getLicenseFromNpmRegistry(basURL, packageName, version string) (string, error) {
// "https://registry.npmjs.org/%s/%s", packageName, version
requestURL, err := formatNpmRegistryURL(basURL, packageName, version)
if err != nil {
return "", fmt.Errorf("unable to format npm request for pkg:version %s%s; %w", packageName, version, err)
}
log.Tracef("trying to fetch remote package %s", requestURL)

npmRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
if err != nil {
return "", fmt.Errorf("unable to format remote request: %w", err)
}

httpClient := &http.Client{
Timeout: time.Second * 10,
}

resp, err := httpClient.Do(npmRequest)
if err != nil {
return "", fmt.Errorf("unable to get package from npm registry: %w", err)
}
defer func() {
if err := resp.Body.Close(); err != nil {
log.Errorf("unable to close body: %+v", err)
}
}()

bytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("unable to parse package from npm registry: %w", err)
}

dec := json.NewDecoder(strings.NewReader(string(bytes)))

// Read "license" from the response
var license struct {
License string `json:"license"`
}

if err := dec.Decode(&license); err != nil {
return "", fmt.Errorf("unable to parse license from npm registry: %w", err)
}

log.Tracef("Retrieved License: %s", license.License)

return license.License, nil
}

func finalizeLockPkg(resolver file.Resolver, location file.Location, p pkg.Package) pkg.Package {
licenseCandidate := addLicenses(p.Name, resolver, location)
p.Licenses.Add(pkg.NewLicensesFromLocation(location, licenseCandidate...)...)
Expand Down
21 changes: 14 additions & 7 deletions syft/pkg/cataloger/javascript/parse_yarn_lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

// integrity check
var _ generic.Parser = parseYarnLock

var (
// packageNameExp matches the name of the dependency in yarn.lock
// including scope/namespace prefix if found.
Expand Down Expand Up @@ -43,7 +40,17 @@ const (
noVersion = ""
)

func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
type genericYarnLockAdapter struct {
cfg CatalogerConfig
}

func newGenericYarnLockAdapter(cfg CatalogerConfig) genericYarnLockAdapter {
return genericYarnLockAdapter{
cfg: cfg,
}
}

func (a genericYarnLockAdapter) parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
// in the case we find yarn.lock files in the node_modules directories, skip those
// as the whole purpose of the lock file is for the specific dependencies of the project
if pathContainsNodeModulesDirectory(reader.Path()) {
Expand All @@ -62,15 +69,15 @@ func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.L
if packageName := findPackageName(line); packageName != noPackage {
// When we find a new package, check if we have unsaved identifiers
if currentPackage != noPackage && currentVersion != noVersion && !parsedPackages.Has(currentPackage+"@"+currentVersion) {
pkgs = append(pkgs, newYarnLockPackage(resolver, reader.Location, currentPackage, currentVersion))
pkgs = append(pkgs, newYarnLockPackage(a.cfg, resolver, reader.Location, currentPackage, currentVersion))
parsedPackages.Add(currentPackage + "@" + currentVersion)
}

currentPackage = packageName
} else if version := findPackageVersion(line); version != noVersion {
currentVersion = version
} else if packageName, version := findPackageAndVersion(line); packageName != noPackage && version != noVersion && !parsedPackages.Has(packageName+"@"+version) {
pkgs = append(pkgs, newYarnLockPackage(resolver, reader.Location, packageName, version))
pkgs = append(pkgs, newYarnLockPackage(a.cfg, resolver, reader.Location, packageName, version))
parsedPackages.Add(packageName + "@" + version)

// Cleanup to indicate no unsaved identifiers
Expand All @@ -81,7 +88,7 @@ func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.L

// check if we have valid unsaved data after end-of-file has reached
if currentPackage != noPackage && currentVersion != noVersion && !parsedPackages.Has(currentPackage+"@"+currentVersion) {
pkgs = append(pkgs, newYarnLockPackage(resolver, reader.Location, currentPackage, currentVersion))
pkgs = append(pkgs, newYarnLockPackage(a.cfg, resolver, reader.Location, currentPackage, currentVersion))
parsedPackages.Add(currentPackage + "@" + currentVersion)
}

Expand Down
Loading
Loading