diff --git a/README.md b/README.md index d75f248bee4..4b053bf9a15 100644 --- a/README.md +++ b/README.md @@ -109,13 +109,13 @@ nix-shell -p syft To generate an SBOM for a container image: -``` +```bash syft ``` The above output includes only software that is visible in the container (i.e., the squashed representation of the image). To include software from all image layers in the SBOM, regardless of its presence in the final image, provide `--scope all-layers`: -``` +```bash syft --scope all-layers ``` @@ -123,7 +123,7 @@ syft --scope all-layers Syft can generate an SBOM from a variety of sources: -``` +```bash # catalog a container image archive (from the result of `docker image save ...`, `podman save ...`, or `skopeo copy` commands) syft path/to/image.tar @@ -155,69 +155,114 @@ If docker is not present, then the Podman daemon is attempted next, followed by This default behavior can be overridden with the `default-image-pull-source` configuration option (See [Configuration](https://github.com/anchore/syft#configuration) for more details). -### Default Cataloger Configuration by scan type - -Syft uses different default sets of catalogers depending on what it is scanning: a container image or a directory on disk. The default catalogers for an image scan assumes that package installation steps have already been completed. For example, Syft will identify Python packages that have egg or wheel metadata files under a site-packages directory, since this indicates software actually installed on an image. - -However, if you are scanning a directory, Syft doesn't assume that all relevant software is installed, and will use catalogers that can identify declared dependencies that may not yet be installed on the final system: for example, dependencies listed in a Python requirements.txt. - -You can override the list of enabled/disabled catalogers by using the "catalogers" keyword in the [Syft configuration file](https://github.com/anchore/syft#configuration). - -##### Image Scanning: -- alpmdb -- apkdb -- binary -- dotnet-deps -- dotnet-portable-executable -- dpkgdb -- go-module-binary -- graalvm-native-image -- java -- javascript-package -- linux-kernel -- nix-store -- php-composer-installed -- portage -- python-package -- rpm-db -- ruby-gemspec -- sbom - -##### Directory Scanning: -- alpmdb -- apkdb -- binary -- cocoapods -- conan -- dartlang-lock -- dotnet-deps -- dotnet-portable-executable -- dpkgdb -- elixir-mix-lock -- erlang-rebar-lock -- go-mod-file -- go-module-binary -- graalvm-native-image -- haskell -- java -- java-gradle-lockfile -- java-pom -- javascript-lock -- linux-kernel -- nix-store -- php-composer-lock -- portage -- python-index -- python-package -- rpm-db -- rpm-file -- ruby-gemfile -- rust-cargo-lock -- sbom -- swift-package-manager - -##### Non Default: -- cargo-auditable-binary + +### File selection + +By default, Syft will catalog file details and digests for files that are owned by discovered packages. You can change this behavior by using the `SYFT_FILE_METADATA_SELECTION` environment variable or the `file.metadata.selection` configuration option. The options are: + +- `all`: capture all files from the search space +- `owned-by-package`: capture only files owned by packages (default) +- `none`: disable capturing any file information + + +### Package cataloger selection + +#### Concepts + +> [!IMPORTANT] +> Syft uses a different set of catalogers by default when scanning files directly than it does when scanning images + +The catalogers for an image scan assumes that package installation steps have already been completed. For example, Syft will identify Python packages that have egg or wheel metadata files under a `site-packages` directory, since this is how the canonical tooling `pip` installs python packages. + +The catalogers for a directory scan will look for installed software as well as declared dependencies that are not necessarily installed. For example, dependencies listed in a Python `requirements.txt`. + +This default set of catalogers being dynamic is critical as this allows Syft to be used in a variety of contexts while still generating accurate SBOMs. +Overriding the set of default catalogers is not recommended for most purposes, however, is possible if needed. + +Catalogers can be referenced in two different ways: +- *by name*: the exact cataloger name (e.g. `java-pom-cataloger` or `java-archive-cataloger`) +- *by tag*: a tag that is associated with a cataloger (e.g. `java`) + +Syft can take lists of references on the CLI or in the application configuration to define which catalogers to use. + +You can **set** the list of catalogers explicitly to use with the `--override-default-catalogers` CLI flag, accepting a comma-separated list of cataloger names or tags. + +You can also **add** to, **remove** from, or **sub-select** catalogers to use within the default set of catalogers by using the `--select-catalogers` CLI flag. + - To **sub-select** catalogers simply provide a tag (e.g. `--select-catalogers TAG`). Catalogers will always be selected from the default set of catalogers (e.g. `--select-catalogers java,go` will select all the `java` catalogers in the default set and all the `go` catalogers in the default set). + - To **add** a cataloger prefix the cataloger name with `+` (e.g. `--select-catalogers +NAME`). Added catalogers will _always be added_ regardless of removals, filtering, or other defaults. + - To **remove** a cataloger prefix the cataloger name or tag with `-` (e.g. `--select-catalogers -NAME_OR_TAG`). Catalogers are removed from the set of default catalogers after processing any sub-selections. + +These rules and the dynamic default cataloger sets approximates to the following logic: + +``` +image_catalogers = all_catalogers AND catalogers_tagged("image") + +directory_catalogers = all_catalogers AND catalogers_tagged("directory") + +default_catalogers = image_catalogers OR directory_catalogers + +sub_selected_catalogers = default_catalogers INTERSECT catalogers_tagged(TAG) [ UNION sub_selected_catalogers ... ] + +base_catalogers = default_catalogers OR sub_selected_catalogers + +final_set = (base_catalogers SUBTRACT removed_catalogers) UNION added_catalogers +``` + + +#### Examples + +Only scan for python related packages with catalogers appropriate for the source type (image or directory): +```bash +syft --select-catalogers "python" +# results in the following catalogers being used: +# - python-installed-package-cataloger +``` + +Same command, but the set of catalogers changes based on what is being analyzed (in this case a directory): +```bash +syft --select-catalogers "python" +# results in the following catalogers being used: +# - python-installed-package-cataloger +# - python-package-cataloger +``` + +Use the default set of catalogers and add a cataloger to the set: +```bash +syft ... --catalogers "+sbom-cataloger" +``` + +Use the default set of catalogers but remove any catalogers that deal with RPMs: +```bash +syft ... --catalogers "-rpm" +``` + +Only scan with catalogers that: +- are tagged with "go" +- always use the sbom-cataloger +- are appropriate for the source type (image or directory) + +```bash +syft --select-catalogers "go,+sbom-cataloger" +# results in the following catalogers being used: +# - go-module-binary-cataloger +# - sbom-cataloger +``` + +Scan with all catalogers that deal with binary analysis, regardless of the source type: +```bash +syft ... --override-default-catalogers "binary" +# results in the following catalogers being used: +# - binary-cataloger +# - cargo-auditable-binary-cataloger +# - dotnet-portable-executable-cataloger +# - go-module-binary-cataloger +``` + +Only scan with the specific `go-module-binary-cataloger` and `go-module-file-cataloger` catalogers: +```bash +syft ... --override-default-catalogers "go-module-binary-cataloger,go-module-file-cataloger" +``` + ### Excluding file paths @@ -468,21 +513,18 @@ output: "syft-table" # same as -q ; SYFT_QUIET env var quiet: false -# same as --file; write output report to a file (default is to write to stdout) -file: "" - # enable/disable checking for application updates on startup # same as SYFT_CHECK_FOR_APP_UPDATE env var check-for-app-update: true -# allows users to specify which image source should be used to generate the sbom -# valid values are: registry, docker, podman -default-image-pull-source: "" +# maximum number of workers used to process the list of package catalogers in parallel +parallelism: 1 -# a list of globs to exclude from scanning. same as --exclude ; for example: +# a list of globs to exclude from scanning, for example: # exclude: # - "/etc/**" # - "./out/**/*.json" +# same as --exclude ; SYFT_EXCLUDE env var exclude: [] # allows users to exclude synthetic binary packages from the sbom @@ -493,6 +535,10 @@ exclude-binary-overlap-by-ownership: true # same as --platform; SYFT_PLATFORM env var platform: "" +# the search space to look for file and package data (options: all-layers, squashed) +# SYFT_SCOPE env var +scope: "squashed" + # set the list of package catalogers to use when generating the SBOM # default = empty (cataloger set determined automatically by the source type [image or file/directory]) # catalogers: @@ -533,7 +579,6 @@ platform: "" # - spm-cataloger catalogers: - # all format configuration format: @@ -586,9 +631,36 @@ format: pretty: false +file: + + metadata: + # select which files should be captured by the file-metadata cataloger and included in the SBOM. + # Options include: + # - "all-files": capture all files from the search space + # - "owned-files": capture only files owned by packages + # - "unowned-files": capture only files not owned by packages + # - "no-files", "": do not capture any files + # SYFT_FILE_METADATA_SELECTION env var + selection: "owned-files" + + # the file digest algorithms to use when cataloging files (options: "md5", "sha1", "sha224", "sha256", "sha384", "sha512") + # SYFT_FILE_METADATA_DIGESTS env var + digests: ["sha256"] + + # capture the contents of select files in the SBOM + content: + # skip searching a file entirely if it is above the given size (default = 1MB; unit = bytes) + # SYFT_FILE_CONTENT_SKIP_FILES_ABOVE_SIZE env var + skip-files-above-size: 1048576 + + # file globs for the cataloger to match on + # SYFT_FILE_CONTENT_GLOBS env var + globs: [] + + # cataloging packages is exposed through the packages and power-user subcommands package: - + # search within archives that do contain a file index to search against (zip) # note: for now this only applies to the java package cataloger # SYFT_PACKAGE_SEARCH_INDEXED_ARCHIVES env var @@ -600,14 +672,10 @@ package: # SYFT_PACKAGE_SEARCH_UNINDEXED_ARCHIVES env var search-unindexed-archives: false - cataloger: - # enable/disable cataloging of packages - # SYFT_PACKAGE_CATALOGER_ENABLED env var - enabled: true + # allows users to exclude synthetic binary packages from the sbom + # these packages are removed if an overlap with a non-synthetic package is found + exclude-binary-overlap-by-ownership: true - # the search space to look for packages (options: all-layers, squashed) - # same as -s ; SYFT_PACKAGE_CATALOGER_SCOPE env var - scope: "squashed" golang: # search for go package licences in the GOPATH of the system running Syft, note that this is outside the @@ -657,45 +725,11 @@ python: guess-unpinned-requirements: false javascript: - search-remote-licenses: false - npm-base-url: "https://registry.npmjs.org" - -file-contents: - cataloger: - # enable/disable cataloging of file contents - # SYFT_FILE_CONTENTS_CATALOGER_ENABLED env var - enabled: true - - # the search space to look for file contents (options: all-layers, squashed) - # SYFT_FILE_CONTENTS_CATALOGER_SCOPE env var - scope: "squashed" + search-remote-licenses: false + npm-base-url: "https://registry.npmjs.org" - # skip searching a file entirely if it is above the given size (default = 1MB; unit = bytes) - # SYFT_FILE_CONTENTS_SKIP_FILES_ABOVE_SIZE env var - skip-files-above-size: 1048576 - # file globs for the cataloger to match on - # SYFT_FILE_CONTENTS_GLOBS env var - globs: [] - -file-metadata: - cataloger: - # enable/disable cataloging of file metadata - # SYFT_FILE_METADATA_CATALOGER_ENABLED env var - enabled: true - - # the search space to look for file metadata (options: all-layers, squashed) - # SYFT_FILE_METADATA_CATALOGER_SCOPE env var - scope: "squashed" - - # the file digest algorithms to use when cataloging files (options: "md5", "sha1", "sha224", "sha256", "sha384", "sha512") - # SYFT_FILE_METADATA_DIGESTS env var - digests: ["sha256"] - -# maximum number of workers used to process the list of package catalogers in parallel -parallelism: 1 - -# options that apply to all scan sources +# configuration for the source that the SBOM is generated from (e.g. a file, directory, or container image) source: # alias name for the source # SYFT_SOURCE_NAME env var; --source-name flag @@ -710,6 +744,14 @@ source: # the file digest algorithms to use on the scanned file (options: "md5", "sha1", "sha224", "sha256", "sha384", "sha512") digests: ["sha256"] + image: + + # allows users to specify which image source should be used to generate the sbom + # valid values are: registry, docker, podman + # SYFT_SOURCE_IMAGE_DEFAULT_PULL_SOURCE env var + default-pull-source: "" + + # options when pulling directly from a registry via the "registry:" or "containerd:" scheme registry: # skip TLS verification when communicating with the registry diff --git a/cmd/syft/cli/commands/attest.go b/cmd/syft/cli/commands/attest.go index 37bbbe56a64..b1ec9fc6790 100644 --- a/cmd/syft/cli/commands/attest.go +++ b/cmd/syft/cli/commands/attest.go @@ -1,6 +1,7 @@ package commands import ( + "context" "fmt" "io" "os" @@ -66,7 +67,7 @@ func Attest(app clio.Application) *cobra.Command { restoreStdout := ui.CaptureStdoutToTraceLog() defer restoreStdout() - return runAttest(id, &opts, args[0]) + return runAttest(cmd.Context(), id, &opts, args[0]) }, }, &opts) } @@ -98,7 +99,7 @@ func defaultAttestOutputOptions() options.Output { } //nolint:funlen -func runAttest(id clio.Identification, opts *attestOptions, userInput string) error { +func runAttest(ctx context.Context, id clio.Identification, opts *attestOptions, userInput string) error { // TODO: what other validation here besides binary name? if !commandExists(cosignBinName) { return fmt.Errorf("'syft attest' requires cosign to be installed, however it does not appear to be on PATH") @@ -111,7 +112,7 @@ func runAttest(id clio.Identification, opts *attestOptions, userInput string) er } defer os.Remove(f.Name()) - s, err := generateSBOMForAttestation(id, &opts.Catalog, userInput) + s, err := generateSBOMForAttestation(ctx, id, &opts.Catalog, userInput) if err != nil { return fmt.Errorf("unable to build SBOM: %w", err) } @@ -245,7 +246,7 @@ func predicateType(outputName string) string { } } -func generateSBOMForAttestation(id clio.Identification, opts *options.Catalog, userInput string) (*sbom.SBOM, error) { +func generateSBOMForAttestation(ctx context.Context, id clio.Identification, opts *options.Catalog, userInput string) (*sbom.SBOM, error) { src, err := getSource(opts, userInput, onlyContainerImages) if err != nil { @@ -260,7 +261,7 @@ func generateSBOMForAttestation(id clio.Identification, opts *options.Catalog, u } }() - s, err := generateSBOM(id, src, opts) + s, err := generateSBOM(ctx, id, src, opts) if err != nil { return nil, err } diff --git a/cmd/syft/cli/commands/attest_test.go b/cmd/syft/cli/commands/attest_test.go index 31ab90e8952..6391f6311ca 100644 --- a/cmd/syft/cli/commands/attest_test.go +++ b/cmd/syft/cli/commands/attest_test.go @@ -2,6 +2,7 @@ package commands import ( "bytes" + "context" "fmt" "os/exec" "regexp" @@ -258,7 +259,7 @@ func Test_buildSBOMForAttestation(t *testing.T) { if tt.wantErr == nil { tt.wantErr = require.NoError } - _, err := generateSBOMForAttestation(tt.args.id, tt.args.opts, tt.args.userInput) + _, err := generateSBOMForAttestation(context.Background(), tt.args.id, tt.args.opts, tt.args.userInput) tt.wantErr(t, err) if err != nil { return diff --git a/cmd/syft/cli/commands/packages.go b/cmd/syft/cli/commands/packages.go index 1c81dd3e5a1..18d29cf07ba 100644 --- a/cmd/syft/cli/commands/packages.go +++ b/cmd/syft/cli/commands/packages.go @@ -23,7 +23,7 @@ func Packages(app clio.Application, scanCmd *cobra.Command) *cobra.Command { restoreStdout := ui.CaptureStdoutToTraceLog() defer restoreStdout() - return runScan(id, opts, args[0]) + return runScan(cmd.Context(), id, opts, args[0]) }, }, opts) diff --git a/cmd/syft/cli/commands/packages_test.go b/cmd/syft/cli/commands/packages_test.go new file mode 100644 index 00000000000..35d5241a217 --- /dev/null +++ b/cmd/syft/cli/commands/packages_test.go @@ -0,0 +1,300 @@ +package commands + +import ( + "errors" + "fmt" + "testing" + + "github.com/hashicorp/go-multierror" + "github.com/stretchr/testify/assert" + + "github.com/anchore/syft/internal/task" +) + +func Test_filterExpressionErrors_expressionErrorsHelp(t *testing.T) { + tests := []struct { + name string + err error + wantExpErrs []task.ErrInvalidExpression + wantErr assert.ErrorAssertionFunc + wantHelp string + }{ + { + name: "no errors", + err: nil, + wantExpErrs: nil, + wantErr: assert.NoError, + wantHelp: "", + }, + { + name: "single non-expression error is retained", + err: errors.New("foo"), + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.Equal(t, "foo", err.Error()) + }, + wantHelp: "", + }, + { + name: "multiple non-expression sibling errors are retained", + err: func() error { + var err error + err = multierror.Append(err, errors.New("foo")) + err = multierror.Append(err, errors.New("bar")) + return err + }(), + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + // note: this is the default formatting from the hashicorp multierror object + expected := `2 errors occurred: + * foo + * bar + +` + return assert.Equal(t, expected, err.Error()) + }, + wantHelp: "", + }, + { + name: "has multiple expression errors (with sibling errors)", + err: func() error { + var err error + err = multierror.Append(err, errors.New("foo")) + err = multierror.Append(err, task.ErrInvalidExpression{Expression: "foo", Operation: task.AddOperation, Err: task.ErrTagsNotAllowed}) + err = multierror.Append(err, errors.New("bar")) + err = multierror.Append(err, task.ErrInvalidExpression{Expression: "bar", Operation: task.SubSelectOperation, Err: task.ErrNamesNotAllowed}) + err = multierror.Append(err, errors.New("last")) + return err + }(), + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + expected := `5 errors occurred: + * foo + * invalid expression: "foo": tags are not allowed with this operation (must use exact names) + * bar + * invalid expression: "bar": names are not allowed with this operation (must use tags) + * last + +` + return assert.Equal(t, expected, err.Error()) + }, + wantExpErrs: []task.ErrInvalidExpression{ + {Expression: "foo", Operation: task.AddOperation, Err: task.ErrTagsNotAllowed}, + {Expression: "bar", Operation: task.SubSelectOperation, Err: task.ErrNamesNotAllowed}, + }, + wantHelp: `Suggestions: + + ❖ Given expression "--select-catalogers foo" + However, tags are not allowed with this operation (must use exact names). + Adding groups of catalogers may result in surprising behavior (create inaccurate SBOMs). + If you are certain this is what you want to do, use "--override-default-catalogers foo" instead. + + ❖ Given expression "--select-catalogers bar" + However, names are not allowed with this operation (must use tags). + It seems like you are intending to add a cataloger in addition to the default set. + ... Did you mean "--select-catalogers +bar" instead? +`, + }, + { + name: "has multiple expression errors (with error chains and sibling errors)", + err: func() error { + var err error + err = multierror.Append(err, fmt.Errorf("foo: %w", fmt.Errorf("bar: %w", errors.New("last")))) + err = multierror.Append(err, task.ErrInvalidExpression{Expression: "foo", Operation: task.AddOperation, Err: task.ErrTagsNotAllowed}) + err = multierror.Append(err, task.ErrInvalidExpression{Expression: "bar", Operation: task.SubSelectOperation, Err: task.ErrNamesNotAllowed}) + err = multierror.Append(err, errors.New("bottom")) + + return fmt.Errorf("top: %w", fmt.Errorf("middle: %w", err)) + }(), + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + expected := `top: middle: 4 errors occurred: + * foo: bar: last + * invalid expression: "foo": tags are not allowed with this operation (must use exact names) + * invalid expression: "bar": names are not allowed with this operation (must use tags) + * bottom + +` + return assert.Equal(t, expected, err.Error()) + }, + wantExpErrs: []task.ErrInvalidExpression{ + {Expression: "foo", Operation: task.AddOperation, Err: task.ErrTagsNotAllowed}, + {Expression: "bar", Operation: task.SubSelectOperation, Err: task.ErrNamesNotAllowed}, + }, + wantHelp: `Suggestions: + + ❖ Given expression "--select-catalogers foo" + However, tags are not allowed with this operation (must use exact names). + Adding groups of catalogers may result in surprising behavior (create inaccurate SBOMs). + If you are certain this is what you want to do, use "--override-default-catalogers foo" instead. + + ❖ Given expression "--select-catalogers bar" + However, names are not allowed with this operation (must use tags). + It seems like you are intending to add a cataloger in addition to the default set. + ... Did you mean "--select-catalogers +bar" instead? +`, + }, + { + name: "has multiple expression errors (with error chains and sibling errors)", + err: func() error { + var err error + err = multierror.Append(err, fmt.Errorf("foo: %w", fmt.Errorf("bar: %w", errors.New("last")))) + err = multierror.Append(err, task.ErrInvalidExpression{Expression: "foo", Operation: task.AddOperation, Err: task.ErrTagsNotAllowed}) + err = multierror.Append(err, task.ErrInvalidExpression{Expression: "bar", Operation: task.SubSelectOperation, Err: task.ErrNamesNotAllowed}) + err = multierror.Append(err, errors.New("bottom")) + + // note we wrap the top error in a chain + return fmt.Errorf("top: %w", fmt.Errorf("middle: %w", err)) + }(), + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + expected := `top: middle: 4 errors occurred: + * foo: bar: last + * invalid expression: "foo": tags are not allowed with this operation (must use exact names) + * invalid expression: "bar": names are not allowed with this operation (must use tags) + * bottom + +` + return assert.Equal(t, expected, err.Error()) + }, + wantExpErrs: []task.ErrInvalidExpression{ + {Expression: "foo", Operation: task.AddOperation, Err: task.ErrTagsNotAllowed}, + {Expression: "bar", Operation: task.SubSelectOperation, Err: task.ErrNamesNotAllowed}, + }, + wantHelp: `Suggestions: + + ❖ Given expression "--select-catalogers foo" + However, tags are not allowed with this operation (must use exact names). + Adding groups of catalogers may result in surprising behavior (create inaccurate SBOMs). + If you are certain this is what you want to do, use "--override-default-catalogers foo" instead. + + ❖ Given expression "--select-catalogers bar" + However, names are not allowed with this operation (must use tags). + It seems like you are intending to add a cataloger in addition to the default set. + ... Did you mean "--select-catalogers +bar" instead? +`, + }, + { + name: "preserve for any errors within ErrInvalidExpression types", + err: func() error { + var err error + err = multierror.Append(err, task.ErrInvalidExpression{Expression: "foo", Operation: task.AddOperation, Err: task.ErrTagsNotAllowed}) + err = multierror.Append(err, task.ErrInvalidExpression{Expression: "bar", Operation: task.SubSelectOperation, Err: errors.New("explanation")}) // this is what makes this test different... + + return err + }(), + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + // note: the errors are removed and the help text shows the enriched error help + expected := `2 errors occurred: + * invalid expression: "foo": tags are not allowed with this operation (must use exact names) + * invalid expression: "bar": explanation + +` + return assert.Equal(t, expected, err.Error()) + }, + wantExpErrs: []task.ErrInvalidExpression{ + {Expression: "foo", Operation: task.AddOperation, Err: task.ErrTagsNotAllowed}, + {Expression: "bar", Operation: task.SubSelectOperation, Err: errors.New("explanation")}, + }, + wantHelp: `Suggestions: + + ❖ Given expression "--select-catalogers foo" + However, tags are not allowed with this operation (must use exact names). + Adding groups of catalogers may result in surprising behavior (create inaccurate SBOMs). + If you are certain this is what you want to do, use "--override-default-catalogers foo" instead. + +`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotExpErrs := filterExpressionErrors(tt.err) + tt.wantErr(t, tt.err) // ensure the error still remains + assert.Equal(t, tt.wantExpErrs, gotExpErrs) + + gotHelp := expressionErrorsHelp(gotExpErrs) + assert.Equal(t, tt.wantHelp, gotHelp) + }) + } +} + +func Test_expressionSuggestions(t *testing.T) { + tests := []struct { + name string + expErr task.ErrInvalidExpression + want string + }{ + { + name: "no embedded error", + expErr: task.ErrInvalidExpression{ + Expression: "example", + }, + want: ``, + }, + { + name: "general error", + expErr: task.ErrInvalidExpression{ + Err: errors.New("general error message"), + Expression: "example", + }, + want: ``, + }, + { + name: "ErrUnknownNameOrTag with add operation", + expErr: task.ErrInvalidExpression{ + Err: task.ErrUnknownNameOrTag, + Operation: task.AddOperation, + Expression: "+example", + }, + want: ``, + }, + { + name: "ErrUnknownNameOrTag with subselect operation", + expErr: task.ErrInvalidExpression{ + Err: task.ErrUnknownNameOrTag, + Operation: task.SubSelectOperation, + Expression: "example", + }, + want: ``, + }, + { + name: "ErrNamesNotAllowed with subselect operator", + expErr: task.ErrInvalidExpression{ + Err: task.ErrNamesNotAllowed, + Operation: task.SubSelectOperation, + Expression: "example", + }, + want: ` ❖ Given expression "--select-catalogers example" + However, names are not allowed with this operation (must use tags). + It seems like you are intending to add a cataloger in addition to the default set. + ... Did you mean "--select-catalogers +example" instead? +`, + }, + { + name: "ErrTagsNotAllowed with add operation", + expErr: task.ErrInvalidExpression{ + Err: task.ErrTagsNotAllowed, + Operation: task.AddOperation, + Expression: "+example", + }, + want: ` ❖ Given expression "--select-catalogers +example" + However, tags are not allowed with this operation (must use exact names). + Adding groups of catalogers may result in surprising behavior (create inaccurate SBOMs). + If you are certain this is what you want to do, use "--override-default-catalogers example" instead. +`, + }, + { + name: "ErrAllNotAllowed with subselect operation", + expErr: task.ErrInvalidExpression{ + Err: task.ErrAllNotAllowed, + Operation: task.SubSelectOperation, + Expression: "example", + }, + want: ` ❖ Given expression "--select-catalogers example" + However, you cannot use the 'all' operand in this context. + It seems like you are intending to use all catalogers (which is not recommended). + ... Did you mean "--override-default-catalogers example" instead? +`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, expressionSuggetions(tt.expErr)) + }) + } +} diff --git a/cmd/syft/cli/commands/root.go b/cmd/syft/cli/commands/root.go index 51c477d4e8f..bfc508bf33e 100644 --- a/cmd/syft/cli/commands/root.go +++ b/cmd/syft/cli/commands/root.go @@ -25,7 +25,7 @@ func Root(app clio.Application, packagesCmd *cobra.Command) *cobra.Command { restoreStdout := ui.CaptureStdoutToTraceLog() defer restoreStdout() - return runScan(id, opts, args[0]) + return runScan(cmd.Context(), id, opts, args[0]) }, }, opts) } diff --git a/cmd/syft/cli/commands/scan.go b/cmd/syft/cli/commands/scan.go index 31c78120cec..143cf2b303f 100644 --- a/cmd/syft/cli/commands/scan.go +++ b/cmd/syft/cli/commands/scan.go @@ -1,21 +1,27 @@ package commands import ( + "context" + "errors" "fmt" + "os" + "reflect" + "strings" "github.com/hashicorp/go-multierror" "github.com/spf13/cobra" + "gopkg.in/yaml.v3" "github.com/anchore/clio" "github.com/anchore/stereoscope/pkg/image" - "github.com/anchore/syft/cmd/syft/cli/eventloop" "github.com/anchore/syft/cmd/syft/cli/options" "github.com/anchore/syft/cmd/syft/internal/ui" "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/internal/task" + "github.com/anchore/syft/syft" "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" ) @@ -89,11 +95,57 @@ func Scan(app clio.Application) *cobra.Command { restoreStdout := ui.CaptureStdoutToTraceLog() defer restoreStdout() - return runScan(id, opts, args[0]) + return runScan(cmd.Context(), id, opts, args[0]) }, }, opts) } +func (o *scanOptions) PostLoad() error { + return o.validateLegacyOptionsNotUsed() +} + +func (o *scanOptions) validateLegacyOptionsNotUsed() error { + if o.Config.ConfigFile == "" { + return nil + } + + // check for legacy config file shapes that are no longer valid + type legacyConfig struct { + BasePath *string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` + DefaultImagePullSource *string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` + ExcludeBinaryOverlapByOwnership *bool `yaml:"exclude-binary-overlap-by-ownership" json:"exclude-binary-overlap-by-ownership" mapstructure:"exclude-binary-overlap-by-ownership"` + File any `yaml:"file" json:"file" mapstructure:"file"` + } + + by, err := os.ReadFile(o.Config.ConfigFile) + if err != nil { + return fmt.Errorf("unable to read config file during validations %q: %w", o.Config.ConfigFile, err) + } + + var legacy legacyConfig + if err := yaml.Unmarshal(by, &legacy); err != nil { + return fmt.Errorf("unable to parse config file during validations %q: %w", o.Config.ConfigFile, err) + } + + if legacy.DefaultImagePullSource != nil { + return fmt.Errorf("the config file option 'default-image-pull-source' has been removed, please use 'source.image.default-pull-source' instead") + } + + if legacy.ExcludeBinaryOverlapByOwnership != nil { + return fmt.Errorf("the config file option 'exclude-binary-overlap-by-ownership' has been removed, please use 'relationships.exclude-binary-packages-with-file-ownership-overlap' instead") + } + + if legacy.BasePath != nil { + return fmt.Errorf("the config file option 'base-path' has been removed, please use 'source.base-path' instead") + } + + if legacy.File != nil && reflect.TypeOf(legacy.File).Kind() == reflect.String { + return fmt.Errorf("the config file option 'file' has been removed, please use 'outputs' instead") + } + + return nil +} + func validateScanArgs(cmd *cobra.Command, args []string) error { return validateArgs(cmd, args, "an image/directory argument is required") } @@ -111,7 +163,7 @@ func validateArgs(cmd *cobra.Command, args []string, error string) error { } // nolint:funlen -func runScan(id clio.Identification, opts *scanOptions, userInput string) error { +func runScan(ctx context.Context, id clio.Identification, opts *scanOptions, userInput string) error { writer, err := opts.SBOMWriter() if err != nil { return err @@ -131,7 +183,7 @@ func runScan(id clio.Identification, opts *scanOptions, userInput string) error } }() - s, err := generateSBOM(id, src, &opts.Catalog) + s, err := generateSBOM(ctx, id, src, &opts.Catalog) if err != nil { return err } @@ -151,7 +203,7 @@ func getSource(opts *options.Catalog, userInput string, filters ...func(*source. detection, err := source.Detect( userInput, source.DetectConfig{ - DefaultImageSource: opts.DefaultImagePullSource, + DefaultImageSource: opts.Source.Image.DefaultPullSource, }, ) if err != nil { @@ -175,7 +227,7 @@ func getSource(opts *options.Catalog, userInput string, filters ...func(*source. hashers, err := file.Hashers(opts.Source.File.Digests...) if err != nil { - return nil, fmt.Errorf("invalid hash: %w", err) + return nil, fmt.Errorf("invalid hash algorithm: %w", err) } src, err := detection.NewSource( @@ -190,7 +242,7 @@ func getSource(opts *options.Catalog, userInput string, filters ...func(*source. Paths: opts.Exclusions, }, DigestAlgorithms: hashers, - BasePath: opts.BasePath, + BasePath: opts.Source.BasePath, }, ) @@ -204,52 +256,192 @@ func getSource(opts *options.Catalog, userInput string, filters ...func(*source. return src, nil } -func generateSBOM(id clio.Identification, src source.Source, opts *options.Catalog) (*sbom.SBOM, error) { - tasks, err := eventloop.Tasks(opts) +func generateSBOM(ctx context.Context, id clio.Identification, src source.Source, opts *options.Catalog) (*sbom.SBOM, error) { + s, err := syft.CreateSBOM(ctx, src, opts.ToSBOMConfig(id)) if err != nil { + expErrs := filterExpressionErrors(err) + notifyExpressionErrors(expErrs) return nil, err } + return s, nil +} - s := sbom.SBOM{ - Source: src.Describe(), - Descriptor: sbom.Descriptor{ - Name: id.Name, - Version: id.Version, - Configuration: opts, - }, +func filterExpressionErrors(err error) []task.ErrInvalidExpression { + if err == nil { + return nil } - err = buildRelationships(&s, src, tasks) + expErrs := processErrors(err) - return &s, err + return expErrs } -func buildRelationships(s *sbom.SBOM, src source.Source, tasks []eventloop.Task) error { - var errs error - - var relationships []<-chan artifact.Relationship - for _, task := range tasks { - c := make(chan artifact.Relationship) - relationships = append(relationships, c) - go func(task eventloop.Task) { - err := eventloop.RunTask(task, &s.Artifacts, src, c) - if err != nil { - errs = multierror.Append(errs, err) +// processErrors traverses error chains and multierror lists and returns all ErrInvalidExpression errors found +func processErrors(err error) []task.ErrInvalidExpression { + var result []task.ErrInvalidExpression + + var processError func(...error) + processError = func(errs ...error) { + for _, e := range errs { + // note: using errors.As will result in surprising behavior (since that will traverse the error chain, + // potentially skipping over nodes in a list of errors) + if cerr, ok := e.(task.ErrInvalidExpression); ok { + result = append(result, cerr) + continue + } + var multiErr *multierror.Error + if errors.As(e, &multiErr) { + processError(multiErr.Errors...) } - }(task) + } } - s.Relationships = append(s.Relationships, mergeRelationships(relationships...)...) + processError(err) - return errs + return result } -func mergeRelationships(cs ...<-chan artifact.Relationship) (relationships []artifact.Relationship) { - for _, c := range cs { - for n := range c { - relationships = append(relationships, n) +func notifyExpressionErrors(expErrs []task.ErrInvalidExpression) { + helpText := expressionErrorsHelp(expErrs) + if helpText == "" { + return + } + + bus.Notify(helpText) +} + +func expressionErrorsHelp(expErrs []task.ErrInvalidExpression) string { + // enrich all errors found with CLI hints + if len(expErrs) == 0 { + return "" + } + + sb := strings.Builder{} + + sb.WriteString("Suggestions:\n\n") + + found := false + for i, expErr := range expErrs { + help := expressionSuggetions(expErr) + if help == "" { + continue } + found = true + sb.WriteString(help) + if i != len(expErrs)-1 { + sb.WriteString("\n") + } + } + + if !found { + return "" } - return relationships + return sb.String() +} + +const expressionHelpTemplate = " ❖ Given expression %q\n%s%s" + +func expressionSuggetions(expErr task.ErrInvalidExpression) string { + if expErr.Err == nil { + return "" + } + + hint := getHintPhrase(expErr) + if hint == "" { + return "" + } + + return fmt.Sprintf(expressionHelpTemplate, + getExpression(expErr), + indentMsg(getExplanation(expErr)), + indentMsg(hint), + ) +} + +func indentMsg(msg string) string { + if msg == "" { + return "" + } + + lines := strings.Split(msg, "\n") + for i, line := range lines { + lines[i] = " " + line + } + + return strings.Join(lines, "\n") + "\n" +} + +func getExpression(expErr task.ErrInvalidExpression) string { + flag := "--select-catalogers" + if expErr.Operation == task.SetOperation { + flag = "--override-default-catalogers" + } + return fmt.Sprintf("%s %s", flag, expErr.Expression) +} + +func getExplanation(expErr task.ErrInvalidExpression) string { + err := expErr.Err + if errors.Is(err, task.ErrUnknownNameOrTag) { + noun := "" + switch expErr.Operation { + case task.AddOperation: + noun = "name" + case task.SubSelectOperation: + noun = "tag" + default: + noun = "name or tag" + } + + return fmt.Sprintf("However, %q is not a recognized cataloger %s.", trimOperation(expErr.Expression), noun) + } + + if errors.Is(err, task.ErrNamesNotAllowed) { + if expErr.Operation == task.SubSelectOperation { + return "However, " + err.Error() + ".\nIt seems like you are intending to add a cataloger in addition to the default set." // nolint:goconst + } + return "However, " + err.Error() + "." // nolint:goconst + } + + if errors.Is(err, task.ErrTagsNotAllowed) { + return "However, " + err.Error() + ".\nAdding groups of catalogers may result in surprising behavior (create inaccurate SBOMs)." // nolint:goconst + } + + if errors.Is(err, task.ErrAllNotAllowed) { + return "However, you " + err.Error() + ".\nIt seems like you are intending to use all catalogers (which is not recommended)." + } + + if err != nil { + return "However, this is not valid: " + err.Error() + } + + return "" +} + +func getHintPhrase(expErr task.ErrInvalidExpression) string { + if errors.Is(expErr.Err, task.ErrUnknownNameOrTag) { + return "" + } + + switch expErr.Operation { + case task.AddOperation: + if errors.Is(expErr.Err, task.ErrTagsNotAllowed) { + return fmt.Sprintf("If you are certain this is what you want to do, use %q instead.", "--override-default-catalogers "+trimOperation(expErr.Expression)) + } + + case task.SubSelectOperation: + didYouMean := "... Did you mean %q instead?" + if errors.Is(expErr.Err, task.ErrNamesNotAllowed) { + return fmt.Sprintf(didYouMean, "--select-catalogers +"+expErr.Expression) + } + + if errors.Is(expErr.Err, task.ErrAllNotAllowed) { + return fmt.Sprintf(didYouMean, "--override-default-catalogers "+expErr.Expression) + } + } + return "" +} + +func trimOperation(x string) string { + return strings.TrimLeft(x, "+-") } diff --git a/cmd/syft/cli/commands/scan_test.go b/cmd/syft/cli/commands/scan_test.go new file mode 100644 index 00000000000..9226b46f038 --- /dev/null +++ b/cmd/syft/cli/commands/scan_test.go @@ -0,0 +1,66 @@ +package commands + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/anchore/syft/cmd/syft/cli/options" +) + +func Test_scanOptions_validateLegacyOptionsNotUsed(t *testing.T) { + tests := []struct { + name string + cfg string + wantErr assert.ErrorAssertionFunc + }{ + { + name: "no config file", + }, + { + name: "config file with no legacy options", + cfg: "test-fixtures/scan-configs/no-legacy-options.yaml", + }, + { + name: "config file with default image pull source legacy option", + cfg: "test-fixtures/scan-configs/with-default-pull-source.yaml", + wantErr: assertErrorContains("source.image.default-pull-source"), + }, + { + name: "config file with exclude-binary-overlap-by-ownership legacy option", + cfg: "test-fixtures/scan-configs/with-exclude-binary-overlap-by-ownership.yaml", + wantErr: assertErrorContains("relationships.exclude-binary-packages-with-file-ownership-overlap"), + }, + { + name: "config file with file string legacy option", + cfg: "test-fixtures/scan-configs/with-file-string.yaml", + wantErr: assertErrorContains("outputs"), + }, + { + name: "config file with file section", + cfg: "test-fixtures/scan-configs/with-file-section.yaml", + }, + { + name: "config file with base-path legacy option", + cfg: "test-fixtures/scan-configs/with-base-path.yaml", + wantErr: assertErrorContains("source.base-path"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = assert.NoError + } + o := &scanOptions{ + Config: options.Config{ConfigFile: tt.cfg}, + } + tt.wantErr(t, o.validateLegacyOptionsNotUsed()) + }) + } +} + +func assertErrorContains(contains string) assert.ErrorAssertionFunc { + return func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorContains(t, err, contains, i...) + } +} diff --git a/cmd/syft/cli/commands/test-fixtures/scan-configs/no-legacy-options.yaml b/cmd/syft/cli/commands/test-fixtures/scan-configs/no-legacy-options.yaml new file mode 100644 index 00000000000..51af565a245 --- /dev/null +++ b/cmd/syft/cli/commands/test-fixtures/scan-configs/no-legacy-options.yaml @@ -0,0 +1,2 @@ +# simple, valid syft config with no legacy fields used +parallelism: 5 diff --git a/cmd/syft/cli/commands/test-fixtures/scan-configs/with-base-path.yaml b/cmd/syft/cli/commands/test-fixtures/scan-configs/with-base-path.yaml new file mode 100644 index 00000000000..5dc5db6bcaf --- /dev/null +++ b/cmd/syft/cli/commands/test-fixtures/scan-configs/with-base-path.yaml @@ -0,0 +1 @@ +base-path: "something" \ No newline at end of file diff --git a/cmd/syft/cli/commands/test-fixtures/scan-configs/with-default-pull-source.yaml b/cmd/syft/cli/commands/test-fixtures/scan-configs/with-default-pull-source.yaml new file mode 100644 index 00000000000..8a9e836342e --- /dev/null +++ b/cmd/syft/cli/commands/test-fixtures/scan-configs/with-default-pull-source.yaml @@ -0,0 +1 @@ +default-image-pull-source: "something" \ No newline at end of file diff --git a/cmd/syft/cli/commands/test-fixtures/scan-configs/with-exclude-binary-overlap-by-ownership.yaml b/cmd/syft/cli/commands/test-fixtures/scan-configs/with-exclude-binary-overlap-by-ownership.yaml new file mode 100644 index 00000000000..e0864a8785b --- /dev/null +++ b/cmd/syft/cli/commands/test-fixtures/scan-configs/with-exclude-binary-overlap-by-ownership.yaml @@ -0,0 +1 @@ +exclude-binary-overlap-by-ownership: true \ No newline at end of file diff --git a/cmd/syft/cli/commands/test-fixtures/scan-configs/with-file-section.yaml b/cmd/syft/cli/commands/test-fixtures/scan-configs/with-file-section.yaml new file mode 100644 index 00000000000..603e3aeca27 --- /dev/null +++ b/cmd/syft/cli/commands/test-fixtures/scan-configs/with-file-section.yaml @@ -0,0 +1,2 @@ +file: + selection: all \ No newline at end of file diff --git a/cmd/syft/cli/commands/test-fixtures/scan-configs/with-file-string.yaml b/cmd/syft/cli/commands/test-fixtures/scan-configs/with-file-string.yaml new file mode 100644 index 00000000000..804eed613da --- /dev/null +++ b/cmd/syft/cli/commands/test-fixtures/scan-configs/with-file-string.yaml @@ -0,0 +1 @@ +file: path \ No newline at end of file diff --git a/cmd/syft/cli/eventloop/tasks.go b/cmd/syft/cli/eventloop/tasks.go deleted file mode 100644 index 8cfb68503ff..00000000000 --- a/cmd/syft/cli/eventloop/tasks.go +++ /dev/null @@ -1,151 +0,0 @@ -package eventloop - -import ( - "github.com/anchore/syft/cmd/syft/cli/options" - "github.com/anchore/syft/internal/file" - "github.com/anchore/syft/syft" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/file/cataloger/filecontent" - "github.com/anchore/syft/syft/file/cataloger/filedigest" - "github.com/anchore/syft/syft/file/cataloger/filemetadata" - "github.com/anchore/syft/syft/sbom" - "github.com/anchore/syft/syft/source" -) - -type Task func(*sbom.Artifacts, source.Source) ([]artifact.Relationship, error) - -func Tasks(opts *options.Catalog) ([]Task, error) { - var tasks []Task - - generators := []func(opts *options.Catalog) (Task, error){ - generateCatalogPackagesTask, - generateCatalogFileMetadataTask, - generateCatalogFileDigestsTask, - generateCatalogContentsTask, - } - - for _, generator := range generators { - task, err := generator(opts) - if err != nil { - return nil, err - } - - if task != nil { - tasks = append(tasks, task) - } - } - - return tasks, nil -} - -func generateCatalogPackagesTask(opts *options.Catalog) (Task, error) { - if !opts.Package.Cataloger.Enabled { - return nil, nil - } - - task := func(results *sbom.Artifacts, src source.Source) ([]artifact.Relationship, error) { - packageCatalog, relationships, theDistro, err := syft.CatalogPackages(src, opts.ToCatalogerConfig()) - - results.Packages = packageCatalog - results.LinuxDistribution = theDistro - - return relationships, err - } - - return task, nil -} - -func generateCatalogFileMetadataTask(opts *options.Catalog) (Task, error) { - if !opts.FileMetadata.Cataloger.Enabled { - return nil, nil - } - - metadataCataloger := filemetadata.NewCataloger() - - task := func(results *sbom.Artifacts, src source.Source) ([]artifact.Relationship, error) { - resolver, err := src.FileResolver(opts.FileMetadata.Cataloger.GetScope()) - if err != nil { - return nil, err - } - - result, err := metadataCataloger.Catalog(resolver) - if err != nil { - return nil, err - } - results.FileMetadata = result - return nil, nil - } - - return task, nil -} - -func generateCatalogFileDigestsTask(opts *options.Catalog) (Task, error) { - if !opts.FileMetadata.Cataloger.Enabled { - return nil, nil - } - - hashes, err := file.Hashers(opts.FileMetadata.Digests...) - if err != nil { - return nil, err - } - - digestsCataloger := filedigest.NewCataloger(hashes) - - task := func(results *sbom.Artifacts, src source.Source) ([]artifact.Relationship, error) { - resolver, err := src.FileResolver(opts.FileMetadata.Cataloger.GetScope()) - if err != nil { - return nil, err - } - - result, err := digestsCataloger.Catalog(resolver) - if err != nil { - return nil, err - } - results.FileDigests = result - return nil, nil - } - - return task, nil -} - -func generateCatalogContentsTask(opts *options.Catalog) (Task, error) { - if !opts.FileContents.Cataloger.Enabled { - return nil, nil - } - - contentsCataloger, err := filecontent.NewCataloger(opts.FileContents.Globs, opts.FileContents.SkipFilesAboveSize) //nolint:staticcheck - if err != nil { - return nil, err - } - - task := func(results *sbom.Artifacts, src source.Source) ([]artifact.Relationship, error) { - resolver, err := src.FileResolver(opts.FileContents.Cataloger.GetScope()) - if err != nil { - return nil, err - } - - result, err := contentsCataloger.Catalog(resolver) - if err != nil { - return nil, err - } - results.FileContents = result - return nil, nil - } - - return task, nil -} - -func RunTask(t Task, a *sbom.Artifacts, src source.Source, c chan<- artifact.Relationship) error { - defer close(c) - - relationships, err := t(a, src) - if err != nil { - return err - } - - for _, relationship := range relationships { - c <- relationship - } - - return nil -} diff --git a/cmd/syft/cli/options/catalog.go b/cmd/syft/cli/options/catalog.go index 5f9d06c770a..3d8baafe27c 100644 --- a/cmd/syft/cli/options/catalog.go +++ b/cmd/syft/cli/options/catalog.go @@ -6,58 +6,141 @@ import ( "strings" "github.com/iancoleman/strcase" - "github.com/mitchellh/go-homedir" - "github.com/scylladb/go-set/strset" "github.com/anchore/clio" "github.com/anchore/fangs" + intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft" "github.com/anchore/syft/syft/cataloging" - "github.com/anchore/syft/syft/pkg/cataloger" - binaryCataloger "github.com/anchore/syft/syft/pkg/cataloger/binary" - golangCataloger "github.com/anchore/syft/syft/pkg/cataloger/golang" - javaCataloger "github.com/anchore/syft/syft/pkg/cataloger/java" - javascriptCataloger "github.com/anchore/syft/syft/pkg/cataloger/javascript" + "github.com/anchore/syft/syft/cataloging/filecataloging" + "github.com/anchore/syft/syft/cataloging/pkgcataloging" + "github.com/anchore/syft/syft/file/cataloger/filecontent" + "github.com/anchore/syft/syft/pkg/cataloger/binary" + "github.com/anchore/syft/syft/pkg/cataloger/golang" + "github.com/anchore/syft/syft/pkg/cataloger/java" + "github.com/anchore/syft/syft/pkg/cataloger/javascript" "github.com/anchore/syft/syft/pkg/cataloger/kernel" - pythonCataloger "github.com/anchore/syft/syft/pkg/cataloger/python" + "github.com/anchore/syft/syft/pkg/cataloger/python" "github.com/anchore/syft/syft/source" ) type Catalog struct { - Catalogers []string `yaml:"catalogers" json:"catalogers" mapstructure:"catalogers"` - Package pkg `yaml:"package" json:"package" mapstructure:"package"` - Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"` - Java java `yaml:"java" json:"java" mapstructure:"java"` - Javascript javascript `yaml:"javascript" json:"javascript" mapstructure:"javascript"` - LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"` - Python python `yaml:"python" json:"python" mapstructure:"python"` - FileMetadata fileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"` - FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"` - Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"` - Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"` - Platform string `yaml:"platform" json:"platform" mapstructure:"platform"` - Name string `yaml:"name" json:"name" mapstructure:"name"` - Source sourceCfg `yaml:"source" json:"source" mapstructure:"source"` - Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel - DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source - BasePath string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` // specify base path for all file paths - ExcludeBinaryOverlapByOwnership bool `yaml:"exclude-binary-overlap-by-ownership" json:"exclude-binary-overlap-by-ownership" mapstructure:"exclude-binary-overlap-by-ownership"` // exclude synthetic binary packages owned by os package files + // high-level cataloger configuration + Catalogers []string `yaml:"-" json:"catalogers" mapstructure:"catalogers"` // deprecated and not shown in yaml output + DefaultCatalogers []string `yaml:"default-catalogers" json:"default-catalogers" mapstructure:"default-catalogers"` + SelectCatalogers []string `yaml:"select-catalogers" json:"select-catalogers" mapstructure:"select-catalogers"` + Package packageConfig `yaml:"package" json:"package" mapstructure:"package"` + File fileConfig `yaml:"file" json:"file" mapstructure:"file"` + Scope string `yaml:"scope" json:"scope" mapstructure:"scope"` + Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel + Relationships relationshipsConfig `yaml:"relationships" json:"relationships" mapstructure:"relationships"` + + // ecosystem-specific cataloger configuration + Golang golangConfig `yaml:"golang" json:"golang" mapstructure:"golang"` + Java javaConfig `yaml:"java" json:"java" mapstructure:"java"` + JavaScript javaScriptConfig `yaml:"javascript" json:"javascript" mapstructure:"javascript"` + LinuxKernel linuxKernelConfig `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"` + Python pythonConfig `yaml:"python" json:"python" mapstructure:"python"` + + // configuration for the source (the subject being analyzed) + Registry registryConfig `yaml:"registry" json:"registry" mapstructure:"registry"` + Platform string `yaml:"platform" json:"platform" mapstructure:"platform"` + Name string `yaml:"name" json:"name" mapstructure:"name"` // deprecated + Source sourceConfig `yaml:"source" json:"source" mapstructure:"source"` + Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"` } var _ interface { clio.FlagAdder clio.PostLoader + fangs.FieldDescriber } = (*Catalog)(nil) func DefaultCatalog() Catalog { return Catalog{ - Package: defaultPkg(), - LinuxKernel: defaultLinuxKernel(), - FileMetadata: defaultFileMetadata(), - FileContents: defaultFileContents(), - Source: defaultSourceCfg(), - Parallelism: 1, - ExcludeBinaryOverlapByOwnership: true, + Scope: source.SquashedScope.String(), + Package: defaultPackageConfig(), + LinuxKernel: defaultLinuxKernelConfig(), + File: defaultFileConfig(), + Relationships: defaultRelationshipsConfig(), + Source: defaultSourceConfig(), + Parallelism: 1, + } +} + +func (cfg Catalog) ToSBOMConfig(id clio.Identification) *syft.CreateSBOMConfig { + return syft.DefaultCreateSBOMConfig(). + WithTool(id.Name, id.Version). + WithParallelism(cfg.Parallelism). + WithRelationshipsConfig(cfg.ToRelationshipsConfig()). + WithSearchConfig(cfg.ToSearchConfig()). + WithPackagesConfig(cfg.ToPackagesConfig()). + WithFilesConfig(cfg.ToFilesConfig()). + WithCatalogerSelection( + pkgcataloging.NewSelectionRequest(). + WithDefaults(cfg.DefaultCatalogers...). + WithExpression(cfg.SelectCatalogers...), + ) +} + +func (cfg Catalog) ToSearchConfig() cataloging.SearchConfig { + return cataloging.SearchConfig{ + Scope: source.ParseScope(cfg.Scope), + } +} + +func (cfg Catalog) ToRelationshipsConfig() cataloging.RelationshipsConfig { + return cataloging.RelationshipsConfig{ + PackageFileOwnership: cfg.Relationships.PackageFileOwnership, + PackageFileOwnershipOverlap: cfg.Relationships.PackageFileOwnershipOverlap, + // note: this option was surfaced in the syft application configuration before this relationships section was added + ExcludeBinaryPackagesWithFileOwnershipOverlap: cfg.Package.ExcludeBinaryOverlapByOwnership, + } +} + +func (cfg Catalog) ToFilesConfig() filecataloging.Config { + hashers, err := intFile.Hashers(cfg.File.Metadata.Digests...) + if err != nil { + log.WithFields("error", err).Warn("unable to configure file hashers") + } + + return filecataloging.Config{ + Selection: cfg.File.Metadata.Selection, + Hashers: hashers, + Content: filecontent.Config{ + Globs: cfg.File.Content.Globs, + SkipFilesAboveSize: cfg.File.Content.SkipFilesAboveSize, + }, + } +} + +func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config { + archiveSearch := cataloging.ArchiveSearchConfig{ + IncludeIndexedArchives: cfg.Package.SearchIndexedArchives, + IncludeUnindexedArchives: cfg.Package.SearchUnindexedArchives, + } + return pkgcataloging.Config{ + Binary: binary.DefaultCatalogerConfig(), + Golang: golang.DefaultCatalogerConfig(). + WithSearchLocalModCacheLicenses(cfg.Golang.SearchLocalModCacheLicenses). + WithLocalModCacheDir(cfg.Golang.LocalModCacheDir). + WithSearchRemoteLicenses(cfg.Golang.SearchRemoteLicenses). + WithProxy(cfg.Golang.Proxy). + WithNoProxy(cfg.Golang.NoProxy), + JavaScript: javascript.DefaultCatalogerConfig(). + WithSearchRemoteLicenses(cfg.JavaScript.SearchRemoteLicenses). + WithNpmBaseURL(cfg.JavaScript.NpmBaseURL), + LinuxKernel: kernel.LinuxKernelCatalogerConfig{ + CatalogModules: cfg.LinuxKernel.CatalogModules, + }, + Python: python.CatalogerConfig{ + GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements, + }, + JavaArchive: java.DefaultArchiveCatalogerConfig(). + WithUseNetwork(cfg.Java.UseNetwork). + WithMavenBaseURL(cfg.Java.MavenURL). + WithArchiveTraversal(archiveSearch, cfg.Java.MaxParentRecursiveDepth), } } @@ -66,7 +149,7 @@ func (cfg *Catalog) AddFlags(flags clio.FlagSet) { for _, scope := range source.AllScopes { validScopeValues = append(validScopeValues, strcase.ToDelimited(string(scope), '-')) } - flags.StringVarP(&cfg.Package.Cataloger.Scope, "scope", "s", + flags.StringVarP(&cfg.Scope, "scope", "s", fmt.Sprintf("selection of layers to catalog, options=%v", validScopeValues)) flags.StringVarP(&cfg.Platform, "platform", "", @@ -78,6 +161,20 @@ func (cfg *Catalog) AddFlags(flags clio.FlagSet) { flags.StringArrayVarP(&cfg.Catalogers, "catalogers", "", "enable one or more package catalogers") + if pfp, ok := flags.(fangs.PFlagSetProvider); ok { + if err := pfp.PFlagSet().MarkDeprecated("catalogers", "use: override-default-catalogers and select-catalogers"); err != nil { + panic(err) + } + } else { + panic("unable to mark flags as deprecated") + } + + flags.StringArrayVarP(&cfg.DefaultCatalogers, "override-default-catalogers", "", + "set the base set of catalogers to use (defaults to 'image' or 'directory' depending on the scan source)") + + flags.StringArrayVarP(&cfg.SelectCatalogers, "select-catalogers", "", + "add, remove, and filter the catalogers to be used") + flags.StringVarP(&cfg.Source.Name, "name", "", "set the name of the target being analyzed") @@ -92,25 +189,15 @@ func (cfg *Catalog) AddFlags(flags clio.FlagSet) { flags.StringVarP(&cfg.Source.Version, "source-version", "", "set the version of the target being analyzed") - flags.StringVarP(&cfg.BasePath, "base-path", "", + flags.StringVarP(&cfg.Source.BasePath, "base-path", "", "base directory for scanning, no links will be followed above this directory, and all paths will be reported relative to this directory") } -func (cfg *Catalog) PostLoad() error { - // parse options on this struct - var catalogers []string - for _, c := range cfg.Catalogers { - for _, f := range strings.Split(c, ",") { - catalogers = append(catalogers, strings.TrimSpace(f)) - } - } - sort.Strings(catalogers) - cfg.Catalogers = catalogers - - if err := checkDefaultSourceValues(cfg.DefaultImagePullSource); err != nil { - return err - } +func (cfg *Catalog) DescribeFields(descriptions fangs.FieldDescriptionSet) { + descriptions.Add(&cfg.Parallelism, "number of cataloger workers to run in parallel") +} +func (cfg *Catalog) PostLoad() error { if cfg.Name != "" { log.Warnf("name parameter is deprecated. please use: source-name. name will be removed in a future version") if cfg.Source.Name == "" { @@ -118,66 +205,36 @@ func (cfg *Catalog) PostLoad() error { } } - return nil -} + usingLegacyCatalogers := len(cfg.Catalogers) > 0 + usingNewCatalogers := len(cfg.DefaultCatalogers) > 0 || len(cfg.SelectCatalogers) > 0 -func (cfg Catalog) ToCatalogerConfig() cataloger.Config { - return cataloger.Config{ - Search: cataloger.SearchConfig{ - IncludeIndexedArchives: cfg.Package.SearchIndexedArchives, - IncludeUnindexedArchives: cfg.Package.SearchUnindexedArchives, - Scope: cfg.Package.Cataloger.GetScope(), - }, - Catalogers: cfg.Catalogers, - Parallelism: cfg.Parallelism, - Golang: golangCataloger.DefaultCatalogerConfig(). - WithSearchLocalModCacheLicenses(cfg.Golang.SearchLocalModCacheLicenses). - WithLocalModCacheDir(cfg.Golang.LocalModCacheDir). - WithSearchRemoteLicenses(cfg.Golang.SearchRemoteLicenses). - WithProxy(cfg.Golang.Proxy). - WithNoProxy(cfg.Golang.NoProxy), - LinuxKernel: kernel.LinuxKernelCatalogerConfig{ - CatalogModules: cfg.LinuxKernel.CatalogModules, - }, - Java: javaCataloger.DefaultArchiveCatalogerConfig(). - WithUseNetwork(cfg.Java.UseNetwork). - WithMavenBaseURL(cfg.Java.MavenURL). - WithArchiveTraversal( - cataloging.ArchiveSearchConfig{ - IncludeIndexedArchives: cfg.Package.SearchIndexedArchives, - IncludeUnindexedArchives: cfg.Package.SearchUnindexedArchives, - }, - cfg.Java.MaxParentRecursiveDepth), - Javascript: javascriptCataloger.DefaultCatalogerConfig(). - WithSearchRemoteLicenses(cfg.Javascript.SearchRemoteLicenses). - WithNpmBaseURL(cfg.Javascript.NpmBaseURL), - Binary: binaryCataloger.DefaultCatalogerConfig(), - Python: pythonCataloger.CatalogerConfig{ - GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements, - }, - ExcludeBinaryOverlapByOwnership: cfg.ExcludeBinaryOverlapByOwnership, + if usingLegacyCatalogers && usingNewCatalogers { + return fmt.Errorf("cannot use both 'catalogers' and 'select-catalogers'/'default-catalogers' flags") } -} -var validDefaultSourceValues = []string{"registry", "docker", "podman", ""} + flatten := func(l []string) []string { + var out []string + for _, v := range l { + for _, s := range strings.Split(v, ",") { + out = append(out, strings.TrimSpace(s)) + } + } + sort.Strings(out) -func checkDefaultSourceValues(source string) error { - validValues := strset.New(validDefaultSourceValues...) - if !validValues.Has(source) { - validValuesString := strings.Join(validDefaultSourceValues, ", ") - return fmt.Errorf("%s is not a valid default source; please use one of the following: %s''", source, validValuesString) + return out } - return nil -} + cfg.Catalogers = flatten(cfg.Catalogers) + cfg.DefaultCatalogers = flatten(cfg.DefaultCatalogers) + cfg.SelectCatalogers = flatten(cfg.SelectCatalogers) -func expandFilePath(file string) (string, error) { - if file != "" { - expandedPath, err := homedir.Expand(file) - if err != nil { - return "", fmt.Errorf("unable to expand file path=%q: %w", file, err) - } - file = expandedPath + // for backwards compatibility + cfg.DefaultCatalogers = append(cfg.DefaultCatalogers, cfg.Catalogers...) + + s := source.ParseScope(cfg.Scope) + if s == source.UnknownScope { + return fmt.Errorf("bad scope value %q", cfg.Scope) } - return file, nil + + return nil } diff --git a/cmd/syft/cli/options/catalog_test.go b/cmd/syft/cli/options/catalog_test.go new file mode 100644 index 00000000000..f9a54249ae3 --- /dev/null +++ b/cmd/syft/cli/options/catalog_test.go @@ -0,0 +1,72 @@ +package options + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCatalog_PostLoad(t *testing.T) { + + tests := []struct { + name string + options Catalog + assert func(t *testing.T, options Catalog) + wantErr assert.ErrorAssertionFunc + }{ + { + name: "mutually exclusive cataloger flags (cat / def-cat)", + options: Catalog{ + Catalogers: []string{"foo,bar", "42"}, + DefaultCatalogers: []string{"some,thing"}, + Scope: "squashed", + }, + wantErr: assert.Error, + }, + { + name: "mutually exclusive cataloger flags (cat / sel-cat)", + options: Catalog{ + Catalogers: []string{"foo,bar", "42"}, + SelectCatalogers: []string{"some,thing"}, + Scope: "squashed", + }, + wantErr: assert.Error, + }, + { + name: "allow old cataloger flags", + options: Catalog{ + Catalogers: []string{"foo,bar"}, + Scope: "squashed", + }, + assert: func(t *testing.T, options Catalog) { + assert.Equal(t, []string{"bar", "foo"}, options.DefaultCatalogers) // note: sorted order + assert.Equal(t, []string{"bar", "foo"}, options.Catalogers) // note: sorted order + }, + }, + { + name: "allow new cataloger flags", + options: Catalog{ + SelectCatalogers: []string{"foo,bar", "42"}, + DefaultCatalogers: []string{"some,thing"}, + Scope: "squashed", + }, + assert: func(t *testing.T, options Catalog) { + assert.Equal(t, []string{"42", "bar", "foo"}, options.SelectCatalogers) // note: sorted order + assert.Equal(t, []string{"some", "thing"}, options.DefaultCatalogers) // note: sorted order + assert.Empty(t, options.Catalogers) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = assert.NoError + } + tt.wantErr(t, tt.options.PostLoad(), fmt.Sprintf("PostLoad()")) + if tt.assert != nil { + tt.assert(t, tt.options) + } + }) + } +} diff --git a/cmd/syft/cli/options/config.go b/cmd/syft/cli/options/config.go index 85aeb69ddd3..25158ea75b2 100644 --- a/cmd/syft/cli/options/config.go +++ b/cmd/syft/cli/options/config.go @@ -1,6 +1,12 @@ package options +import "github.com/anchore/fangs" + // Config holds a reference to the specific config file that was used to load application configuration type Config struct { ConfigFile string `yaml:"config" json:"config" mapstructure:"config"` } + +func (cfg *Config) DescribeFields(descriptions fangs.FieldDescriptionSet) { + descriptions.Add(&cfg.ConfigFile, "the configuration file that was used to load application configuration") +} diff --git a/cmd/syft/cli/options/file.go b/cmd/syft/cli/options/file.go new file mode 100644 index 00000000000..b0eb8a1fc96 --- /dev/null +++ b/cmd/syft/cli/options/file.go @@ -0,0 +1,43 @@ +package options + +import ( + "fmt" + + intFile "github.com/anchore/syft/internal/file" + "github.com/anchore/syft/syft/file" +) + +type fileConfig struct { + Metadata fileMetadata `yaml:"metadata" json:"metadata" mapstructure:"metadata"` + Content fileContent `yaml:"content" json:"content" mapstructure:"content"` +} + +type fileMetadata struct { + Selection file.Selection `yaml:"selection" json:"selection" mapstructure:"selection"` + Digests []string `yaml:"digests" json:"digests" mapstructure:"digests"` +} + +type fileContent struct { + SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` + Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"` +} + +func defaultFileConfig() fileConfig { + return fileConfig{ + Metadata: fileMetadata{ + Selection: file.FilesOwnedByPackageSelection, + Digests: []string{"sha1", "sha256"}, + }, + Content: fileContent{ + SkipFilesAboveSize: 250 * intFile.KB, + }, + } +} + +func (c *fileConfig) PostLoad() error { + switch c.Metadata.Selection { + case file.NoFilesSelection, file.FilesOwnedByPackageSelection, file.AllFilesSelection: + return nil + } + return fmt.Errorf("invalid file metadata selection: %q", c.Metadata.Selection) +} diff --git a/cmd/syft/cli/options/file_contents.go b/cmd/syft/cli/options/file_contents.go deleted file mode 100644 index 6dba465f5da..00000000000 --- a/cmd/syft/cli/options/file_contents.go +++ /dev/null @@ -1,21 +0,0 @@ -package options - -import ( - "github.com/anchore/syft/internal/file" - "github.com/anchore/syft/syft/source" -) - -type fileContents struct { - Cataloger scope `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` - SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` - Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"` -} - -func defaultFileContents() fileContents { - return fileContents{ - Cataloger: scope{ - Scope: source.SquashedScope.String(), - }, - SkipFilesAboveSize: 1 * file.MB, - } -} diff --git a/cmd/syft/cli/options/file_metadata.go b/cmd/syft/cli/options/file_metadata.go deleted file mode 100644 index eb2335a24da..00000000000 --- a/cmd/syft/cli/options/file_metadata.go +++ /dev/null @@ -1,19 +0,0 @@ -package options - -import ( - "github.com/anchore/syft/syft/source" -) - -type fileMetadata struct { - Cataloger scope `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` - Digests []string `yaml:"digests" json:"digests" mapstructure:"digests"` -} - -func defaultFileMetadata() fileMetadata { - return fileMetadata{ - Cataloger: scope{ - Scope: source.SquashedScope.String(), - }, - Digests: []string{"sha256"}, - } -} diff --git a/cmd/syft/cli/options/golang.go b/cmd/syft/cli/options/golang.go index ff99f414fd3..c8884602b88 100644 --- a/cmd/syft/cli/options/golang.go +++ b/cmd/syft/cli/options/golang.go @@ -1,6 +1,6 @@ package options -type golang struct { +type golangConfig struct { SearchLocalModCacheLicenses bool `json:"search-local-mod-cache-licenses" yaml:"search-local-mod-cache-licenses" mapstructure:"search-local-mod-cache-licenses"` LocalModCacheDir string `json:"local-mod-cache-dir" yaml:"local-mod-cache-dir" mapstructure:"local-mod-cache-dir"` SearchRemoteLicenses bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"` diff --git a/cmd/syft/cli/options/java.go b/cmd/syft/cli/options/java.go index 7b7b9fd2506..342954068ee 100644 --- a/cmd/syft/cli/options/java.go +++ b/cmd/syft/cli/options/java.go @@ -1,6 +1,6 @@ package options -type java struct { +type javaConfig struct { UseNetwork bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"` MavenURL string `yaml:"maven-url" json:"maven-url" mapstructure:"maven-url"` MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"` diff --git a/cmd/syft/cli/options/javascript.go b/cmd/syft/cli/options/javascript.go index f3f6383fa30..353b190036c 100644 --- a/cmd/syft/cli/options/javascript.go +++ b/cmd/syft/cli/options/javascript.go @@ -1,6 +1,6 @@ package options -type javascript struct { +type javaScriptConfig struct { SearchRemoteLicenses bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"` NpmBaseURL string `json:"npm-base-url" yaml:"npm-base-url" mapstructure:"npm-base-url"` } diff --git a/cmd/syft/cli/options/linux_kernel.go b/cmd/syft/cli/options/linux_kernel.go index c56466abf81..ac6adff2adc 100644 --- a/cmd/syft/cli/options/linux_kernel.go +++ b/cmd/syft/cli/options/linux_kernel.go @@ -1,11 +1,11 @@ package options -type linuxKernel struct { +type linuxKernelConfig struct { CatalogModules bool `json:"catalog-modules" yaml:"catalog-modules" mapstructure:"catalog-modules"` } -func defaultLinuxKernel() linuxKernel { - return linuxKernel{ +func defaultLinuxKernelConfig() linuxKernelConfig { + return linuxKernelConfig{ CatalogModules: true, } } diff --git a/cmd/syft/cli/options/output.go b/cmd/syft/cli/options/output.go index 82431c6a632..961698d1cc8 100644 --- a/cmd/syft/cli/options/output.go +++ b/cmd/syft/cli/options/output.go @@ -96,7 +96,7 @@ func (o Output) SBOMWriter() (sbom.Writer, error) { } } - return makeSBOMWriter(o.Outputs, o.File, encoders) + return makeSBOMWriter(o.Outputs, o.LegacyFile, encoders) } func (o Output) OutputNameSet() *strset.Set { diff --git a/cmd/syft/cli/options/output_file.go b/cmd/syft/cli/options/output_file.go index b23fb91e13a..cf8e5632b96 100644 --- a/cmd/syft/cli/options/output_file.go +++ b/cmd/syft/cli/options/output_file.go @@ -1,6 +1,10 @@ package options import ( + "fmt" + + "github.com/mitchellh/go-homedir" + "github.com/anchore/clio" "github.com/anchore/fangs" "github.com/anchore/syft/syft/sbom" @@ -13,13 +17,13 @@ var _ interface { // Deprecated: OutputFile supports the --file to write the SBOM output to type OutputFile struct { - Enabled bool `yaml:"-" json:"-" mapstructure:"-"` - File string `yaml:"file" json:"file" mapstructure:"file"` + Enabled bool `yaml:"-" json:"-" mapstructure:"-"` + LegacyFile string `yaml:"-" json:"-" mapstructure:"legacyFile"` } func (o *OutputFile) AddFlags(flags clio.FlagSet) { if o.Enabled { - flags.StringVarP(&o.File, "file", "", + flags.StringVarP(&o.LegacyFile, "file", "", "file to write the default report output to (default is STDOUT)") if pfp, ok := flags.(fangs.PFlagSetProvider); ok { @@ -33,12 +37,12 @@ func (o *OutputFile) PostLoad() error { if !o.Enabled { return nil } - if o.File != "" { - file, err := expandFilePath(o.File) + if o.LegacyFile != "" { + file, err := expandFilePath(o.LegacyFile) if err != nil { return err } - o.File = file + o.LegacyFile = file } return nil } @@ -47,10 +51,21 @@ func (o *OutputFile) SBOMWriter(f sbom.FormatEncoder) (sbom.Writer, error) { if !o.Enabled { return nil, nil } - writer, err := newSBOMMultiWriter(newSBOMWriterDescription(f, o.File)) + writer, err := newSBOMMultiWriter(newSBOMWriterDescription(f, o.LegacyFile)) if err != nil { return nil, err } return writer, nil } + +func expandFilePath(file string) (string, error) { + if file != "" { + expandedPath, err := homedir.Expand(file) + if err != nil { + return "", fmt.Errorf("unable to expand file path=%q: %w", file, err) + } + file = expandedPath + } + return file, nil +} diff --git a/cmd/syft/cli/options/pkg.go b/cmd/syft/cli/options/pkg.go index 329dad9ed88..e384e568079 100644 --- a/cmd/syft/cli/options/pkg.go +++ b/cmd/syft/cli/options/pkg.go @@ -1,23 +1,18 @@ package options -import ( - "github.com/anchore/syft/syft/pkg/cataloger" -) +import "github.com/anchore/syft/syft/cataloging" -type pkg struct { - Cataloger scope `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` - SearchUnindexedArchives bool `yaml:"search-unindexed-archives" json:"search-unindexed-archives" mapstructure:"search-unindexed-archives"` - SearchIndexedArchives bool `yaml:"search-indexed-archives" json:"search-indexed-archives" mapstructure:"search-indexed-archives"` +type packageConfig struct { + SearchUnindexedArchives bool `yaml:"search-unindexed-archives" json:"search-unindexed-archives" mapstructure:"search-unindexed-archives"` + SearchIndexedArchives bool `yaml:"search-indexed-archives" json:"search-indexed-archives" mapstructure:"search-indexed-archives"` + ExcludeBinaryOverlapByOwnership bool `yaml:"exclude-binary-overlap-by-ownership" json:"exclude-binary-overlap-by-ownership" mapstructure:"exclude-binary-overlap-by-ownership"` // exclude synthetic binary packages owned by os package files } -func defaultPkg() pkg { - c := cataloger.DefaultSearchConfig() - return pkg{ - SearchIndexedArchives: c.IncludeIndexedArchives, - SearchUnindexedArchives: c.IncludeUnindexedArchives, - Cataloger: scope{ - Enabled: true, - Scope: c.Scope.String(), - }, +func defaultPackageConfig() packageConfig { + c := cataloging.DefaultArchiveSearchConfig() + return packageConfig{ + SearchIndexedArchives: c.IncludeIndexedArchives, + SearchUnindexedArchives: c.IncludeUnindexedArchives, + ExcludeBinaryOverlapByOwnership: true, } } diff --git a/cmd/syft/cli/options/python.go b/cmd/syft/cli/options/python.go index 0efab8713a2..26b36c99ac2 100644 --- a/cmd/syft/cli/options/python.go +++ b/cmd/syft/cli/options/python.go @@ -1,5 +1,5 @@ package options -type python struct { +type pythonConfig struct { GuessUnpinnedRequirements bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"` } diff --git a/cmd/syft/cli/options/registry.go b/cmd/syft/cli/options/registry.go index 455ba93d51f..2ea23653c1b 100644 --- a/cmd/syft/cli/options/registry.go +++ b/cmd/syft/cli/options/registry.go @@ -18,16 +18,16 @@ type RegistryCredentials struct { TLSKey string `yaml:"tls-key,omitempty" json:"tls-key,omitempty" mapstructure:"tls-key"` } -type registry struct { +type registryConfig struct { InsecureSkipTLSVerify bool `yaml:"insecure-skip-tls-verify" json:"insecure-skip-tls-verify" mapstructure:"insecure-skip-tls-verify"` InsecureUseHTTP bool `yaml:"insecure-use-http" json:"insecure-use-http" mapstructure:"insecure-use-http"` Auth []RegistryCredentials `yaml:"auth" json:"auth" mapstructure:"auth"` CACert string `yaml:"ca-cert" json:"ca-cert" mapstructure:"ca-cert"` } -var _ clio.PostLoader = (*registry)(nil) +var _ clio.PostLoader = (*registryConfig)(nil) -func (cfg *registry) PostLoad() error { +func (cfg *registryConfig) PostLoad() error { // there may be additional credentials provided by env var that should be appended to the set of credentials authority, username, password, token, tlsCert, tlsKey := os.Getenv("SYFT_REGISTRY_AUTH_AUTHORITY"), @@ -62,7 +62,7 @@ func hasNonEmptyCredentials(username, password, token, tlsCert, tlsKey string) b return hasUserPass || hasToken || hasTLSMaterial } -func (cfg *registry) ToOptions() *image.RegistryOptions { +func (cfg *registryConfig) ToOptions() *image.RegistryOptions { var auth = make([]image.RegistryCredentials, len(cfg.Auth)) for i, a := range cfg.Auth { auth[i] = image.RegistryCredentials{ diff --git a/cmd/syft/cli/options/registry_test.go b/cmd/syft/cli/options/registry_test.go index 4979fcbc281..150f2007805 100644 --- a/cmd/syft/cli/options/registry_test.go +++ b/cmd/syft/cli/options/registry_test.go @@ -72,19 +72,19 @@ func TestHasNonEmptyCredentials(t *testing.T) { func Test_registry_ToOptions(t *testing.T) { tests := []struct { name string - input registry + input registryConfig expected image.RegistryOptions }{ { name: "no registry options", - input: registry{}, + input: registryConfig{}, expected: image.RegistryOptions{ Credentials: []image.RegistryCredentials{}, }, }, { name: "set InsecureSkipTLSVerify", - input: registry{ + input: registryConfig{ InsecureSkipTLSVerify: true, }, expected: image.RegistryOptions{ @@ -94,7 +94,7 @@ func Test_registry_ToOptions(t *testing.T) { }, { name: "set InsecureUseHTTP", - input: registry{ + input: registryConfig{ InsecureUseHTTP: true, }, expected: image.RegistryOptions{ @@ -104,7 +104,7 @@ func Test_registry_ToOptions(t *testing.T) { }, { name: "set all bool options", - input: registry{ + input: registryConfig{ InsecureSkipTLSVerify: true, InsecureUseHTTP: true, }, @@ -116,7 +116,7 @@ func Test_registry_ToOptions(t *testing.T) { }, { name: "provide all tls configuration", - input: registry{ + input: registryConfig{ CACert: "ca.crt", InsecureSkipTLSVerify: true, Auth: []RegistryCredentials{ diff --git a/cmd/syft/cli/options/relationships.go b/cmd/syft/cli/options/relationships.go new file mode 100644 index 00000000000..5bc491b85e0 --- /dev/null +++ b/cmd/syft/cli/options/relationships.go @@ -0,0 +1,22 @@ +package options + +import "github.com/anchore/fangs" + +var _ fangs.FieldDescriber = (*relationshipsConfig)(nil) + +type relationshipsConfig struct { + PackageFileOwnership bool `mapstructure:"package-file-ownership" json:"package-file-ownership" yaml:"package-file-ownership"` + PackageFileOwnershipOverlap bool `mapstructure:"package-file-ownership-overlap" json:"package-file-ownership-overlap" yaml:"package-file-ownership-overlap"` +} + +func defaultRelationshipsConfig() relationshipsConfig { + return relationshipsConfig{ + PackageFileOwnership: true, + PackageFileOwnershipOverlap: true, + } +} + +func (r *relationshipsConfig) DescribeFields(descriptions fangs.FieldDescriptionSet) { + descriptions.Add(&r.PackageFileOwnership, "include package-to-file relationships that indicate which files are owned by which packages.") + descriptions.Add(&r.PackageFileOwnershipOverlap, "include package-to-package relationships that indicate one package is owned by another due to files claimed to be owned by one package are also evidence of another package's existence.") +} diff --git a/cmd/syft/cli/options/scope.go b/cmd/syft/cli/options/scope.go deleted file mode 100644 index ae6efcffea7..00000000000 --- a/cmd/syft/cli/options/scope.go +++ /dev/null @@ -1,27 +0,0 @@ -package options - -import ( - "fmt" - - "github.com/anchore/clio" - "github.com/anchore/syft/syft/source" -) - -type scope struct { - Enabled bool `yaml:"enabled" json:"enabled" mapstructure:"enabled"` - Scope string `yaml:"scope" json:"scope" mapstructure:"scope"` -} - -var _ clio.PostLoader = (*scope)(nil) - -func (opt *scope) PostLoad() error { - s := opt.GetScope() - if s == source.UnknownScope { - return fmt.Errorf("bad scope value %v", opt.Scope) - } - return nil -} - -func (opt scope) GetScope() source.Scope { - return source.ParseScope(opt.Scope) -} diff --git a/cmd/syft/cli/options/source.go b/cmd/syft/cli/options/source.go index 41e30199645..609b4132e8c 100644 --- a/cmd/syft/cli/options/source.go +++ b/cmd/syft/cli/options/source.go @@ -1,19 +1,51 @@ package options -type sourceCfg struct { - Name string `json:"name" yaml:"name" mapstructure:"name"` - Version string `json:"version" yaml:"version" mapstructure:"version"` - File fileSource `json:"file" yaml:"file" mapstructure:"file"` +import ( + "fmt" + "strings" + + "github.com/scylladb/go-set/strset" +) + +type sourceConfig struct { + Name string `json:"name" yaml:"name" mapstructure:"name"` + Version string `json:"version" yaml:"version" mapstructure:"version"` + BasePath string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` // specify base path for all file paths + File fileSource `json:"file" yaml:"file" mapstructure:"file"` + Image imageSource `json:"image" yaml:"image" mapstructure:"image"` } type fileSource struct { Digests []string `json:"digests" yaml:"digests" mapstructure:"digests"` } -func defaultSourceCfg() sourceCfg { - return sourceCfg{ +type imageSource struct { + DefaultPullSource string `json:"default-pull-source" yaml:"default-pull-source" mapstructure:"default-pull-source"` +} + +func defaultSourceConfig() sourceConfig { + return sourceConfig{ File: fileSource{ Digests: []string{"sha256"}, }, + Image: imageSource{ + DefaultPullSource: "", + }, + } +} + +func (c imageSource) PostLoad() error { + return checkDefaultSourceValues(c.DefaultPullSource) +} + +var validDefaultSourceValues = []string{"registry", "docker", "podman", ""} + +func checkDefaultSourceValues(source string) error { + validValues := strset.New(validDefaultSourceValues...) + if !validValues.Has(source) { + validValuesString := strings.Join(validDefaultSourceValues, ", ") + return fmt.Errorf("%s is not a valid default source; please use one of the following: %s''", source, validValuesString) } + + return nil } diff --git a/cmd/syft/cli/options/update_check.go b/cmd/syft/cli/options/update_check.go index a05b5242061..0f396a5d9fc 100644 --- a/cmd/syft/cli/options/update_check.go +++ b/cmd/syft/cli/options/update_check.go @@ -1,5 +1,7 @@ package options +import "github.com/anchore/fangs" + type UpdateCheck struct { CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not } @@ -9,3 +11,7 @@ func DefaultUpdateCheck() UpdateCheck { CheckForAppUpdate: true, } } + +func (cfg *UpdateCheck) DescribeFields(descriptions fangs.FieldDescriptionSet) { + descriptions.Add(&cfg.CheckForAppUpdate, "whether to check for an application update on start up or not") +} diff --git a/cmd/syft/cli/ui/__snapshots__/handle_file_digests_cataloger_test.snap b/cmd/syft/cli/ui/__snapshots__/handle_file_digests_cataloger_test.snap deleted file mode 100755 index b4572c26541..00000000000 --- a/cmd/syft/cli/ui/__snapshots__/handle_file_digests_cataloger_test.snap +++ /dev/null @@ -1,8 +0,0 @@ - -[TestHandler_handleFileDigestsCatalogerStarted/cataloging_in_progress - 1] - ⠋ Cataloging file digests ━━━━━━━━━━━━━━━━━━━━ [current] ---- - -[TestHandler_handleFileDigestsCatalogerStarted/cataloging_complete - 1] - ✔ Cataloged file digests ---- diff --git a/cmd/syft/cli/ui/__snapshots__/handle_file_metadata_cataloger_test.snap b/cmd/syft/cli/ui/__snapshots__/handle_file_metadata_cataloger_test.snap deleted file mode 100755 index 200a0dfb4ad..00000000000 --- a/cmd/syft/cli/ui/__snapshots__/handle_file_metadata_cataloger_test.snap +++ /dev/null @@ -1,8 +0,0 @@ - -[TestHandler_handleFileMetadataCatalogerStarted/cataloging_in_progress - 1] - ⠋ Cataloging file metadata ━━━━━━━━━━━━━━━━━━━━ [current] ---- - -[TestHandler_handleFileMetadataCatalogerStarted/cataloging_complete - 1] - ✔ Cataloged file metadata ---- diff --git a/cmd/syft/cli/ui/__snapshots__/handle_package_cataloger_test.snap b/cmd/syft/cli/ui/__snapshots__/handle_package_cataloger_test.snap deleted file mode 100755 index 5d5c165e03d..00000000000 --- a/cmd/syft/cli/ui/__snapshots__/handle_package_cataloger_test.snap +++ /dev/null @@ -1,16 +0,0 @@ - -[TestHandler_handlePackageCatalogerStarted/cataloging_in_progress - 1] - ⠋ Cataloging packages [50 packages] ---- - -[TestHandler_handlePackageCatalogerStarted/cataloging_only_files_complete - 1] - ⠋ Cataloging packages [50 packages] ---- - -[TestHandler_handlePackageCatalogerStarted/cataloging_only_packages_complete - 1] - ⠋ Cataloging packages [100 packages] ---- - -[TestHandler_handlePackageCatalogerStarted/cataloging_complete - 1] - ✔ Cataloged packages [100 packages] ---- diff --git a/cmd/syft/cli/ui/__snapshots__/handle_secrets_cataloger_test.snap b/cmd/syft/cli/ui/__snapshots__/handle_secrets_cataloger_test.snap deleted file mode 100755 index 00a123ef7f0..00000000000 --- a/cmd/syft/cli/ui/__snapshots__/handle_secrets_cataloger_test.snap +++ /dev/null @@ -1,8 +0,0 @@ - -[TestHandler_handleSecretsCatalogerStarted/cataloging_in_progress - 1] - ⠋ Cataloging secrets ━━━━━━━━━━━━━━━━━━━━ [64 secrets] ---- - -[TestHandler_handleSecretsCatalogerStarted/cataloging_complete - 1] - ✔ Cataloged secrets [64 secrets] ---- diff --git a/syft/pkg/relationships_by_file_ownership.go b/internal/relationship/by_file_ownership.go similarity index 70% rename from syft/pkg/relationships_by_file_ownership.go rename to internal/relationship/by_file_ownership.go index 8dc5c88e431..3c420217c0d 100644 --- a/syft/pkg/relationships_by_file_ownership.go +++ b/internal/relationship/by_file_ownership.go @@ -1,4 +1,4 @@ -package pkg +package relationship import ( "sort" @@ -7,20 +7,23 @@ import ( "github.com/scylladb/go-set/strset" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" ) -// AltRpmDBGlob allows db matches against new locations introduced in fedora:{36,37} +// altRpmDBGlob allows db matches against new locations introduced in fedora:{36,37} // See https://github.com/anchore/syft/issues/1077 for larger context -const AltRpmDBGlob = "**/rpm/{Packages,Packages.db,rpmdb.sqlite}" +const altRpmDBGlob = "**/rpm/{Packages,Packages.db,rpmdb.sqlite}" var globsForbiddenFromBeingOwned = []string{ // any OS DBs should automatically be ignored to prevent cyclic issues (e.g. the "rpm" RPM owns the path to the // RPM DB, so if not ignored that package would own all other packages on the system). - ApkDBGlob, - DpkgDBGlob, - RpmDBGlob, - AltRpmDBGlob, + pkg.ApkDBGlob, + pkg.DpkgDBGlob, + pkg.RpmDBGlob, + altRpmDBGlob, // DEB packages share common copyright info between, this does not mean that sharing these paths implies ownership. "/usr/share/doc/**/copyright", } @@ -29,9 +32,21 @@ type ownershipByFilesMetadata struct { Files []string `json:"files"` } -// RelationshipsByFileOwnership creates a package-to-package relationship based on discovering which packages have +func byFileOwnershipOverlapWorker(accessor sbomsync.Accessor) { + var relationships []artifact.Relationship + + accessor.ReadFromSBOM(func(s *sbom.SBOM) { + relationships = byFileOwnershipOverlap(s.Artifacts.Packages) + }) + + accessor.WriteToSBOM(func(s *sbom.SBOM) { + s.Relationships = append(s.Relationships, relationships...) + }) +} + +// byFileOwnershipOverlap creates a package-to-package relationship based on discovering which packages have // evidence locations that overlap with ownership claim from another package's package manager metadata. -func RelationshipsByFileOwnership(catalog *Collection) []artifact.Relationship { +func byFileOwnershipOverlap(catalog *pkg.Collection) []artifact.Relationship { var relationships = findOwnershipByFilesRelationships(catalog) var edges []artifact.Relationship @@ -39,9 +54,13 @@ func RelationshipsByFileOwnership(catalog *Collection) []artifact.Relationship { for childID, files := range children { fs := files.List() sort.Strings(fs) + + parent := catalog.Package(parentID) // TODO: this is potentially expensive + child := catalog.Package(childID) // TODO: this is potentially expensive + edges = append(edges, artifact.Relationship{ - From: catalog.byID[parentID], - To: catalog.byID[childID], + From: parent, + To: child, Type: artifact.OwnershipByFileOverlapRelationship, Data: ownershipByFilesMetadata{ Files: fs, @@ -55,7 +74,7 @@ func RelationshipsByFileOwnership(catalog *Collection) []artifact.Relationship { // findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of // a package is found to be owned by another (from the owner's .Metadata.Files[]). -func findOwnershipByFilesRelationships(catalog *Collection) map[artifact.ID]map[artifact.ID]*strset.Set { +func findOwnershipByFilesRelationships(catalog *pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set { var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set) if catalog == nil { @@ -69,7 +88,7 @@ func findOwnershipByFilesRelationships(catalog *Collection) map[artifact.ID]map[ } // check to see if this is a file owner - pkgFileOwner, ok := candidateOwnerPkg.Metadata.(FileOwner) + pkgFileOwner, ok := candidateOwnerPkg.Metadata.(pkg.FileOwner) if !ok { continue } diff --git a/syft/pkg/relationships_by_file_ownership_test.go b/internal/relationship/by_file_ownership_test.go similarity index 71% rename from syft/pkg/relationships_by_file_ownership_test.go rename to internal/relationship/by_file_ownership_test.go index c155a8a4fa5..3961502406b 100644 --- a/syft/pkg/relationships_by_file_ownership_test.go +++ b/internal/relationship/by_file_ownership_test.go @@ -1,4 +1,4 @@ -package pkg +package relationship import ( "testing" @@ -7,25 +7,26 @@ import ( "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" ) func TestOwnershipByFilesRelationship(t *testing.T) { tests := []struct { name string - setup func(t testing.TB) ([]Package, []artifact.Relationship) + setup func(t testing.TB) ([]pkg.Package, []artifact.Relationship) }{ { name: "owns-by-real-path", - setup: func(t testing.TB) ([]Package, []artifact.Relationship) { - parent := Package{ + setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) { + parent := pkg.Package{ Locations: file.NewLocationSet( file.NewVirtualLocation("/a/path", "/another/path"), file.NewVirtualLocation("/b/path", "/bee/path"), ), - Type: RpmPkg, - Metadata: RpmDBEntry{ - Files: []RpmFileRecord{ + Type: pkg.RpmPkg, + Metadata: pkg.RpmDBEntry{ + Files: []pkg.RpmFileRecord{ {Path: "/owning/path/1"}, {Path: "/owning/path/2"}, {Path: "/d/path"}, @@ -34,12 +35,12 @@ func TestOwnershipByFilesRelationship(t *testing.T) { } parent.SetID() - child := Package{ + child := pkg.Package{ Locations: file.NewLocationSet( file.NewVirtualLocation("/c/path", "/another/path"), file.NewVirtualLocation("/d/path", "/another/path"), ), - Type: NpmPkg, + Type: pkg.NpmPkg, } child.SetID() @@ -54,20 +55,20 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, } - return []Package{parent, child}, []artifact.Relationship{relationship} + return []pkg.Package{parent, child}, []artifact.Relationship{relationship} }, }, { name: "owns-by-virtual-path", - setup: func(t testing.TB) ([]Package, []artifact.Relationship) { - parent := Package{ + setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) { + parent := pkg.Package{ Locations: file.NewLocationSet( file.NewVirtualLocation("/a/path", "/some/other/path"), file.NewVirtualLocation("/b/path", "/bee/path"), ), - Type: RpmPkg, - Metadata: RpmDBEntry{ - Files: []RpmFileRecord{ + Type: pkg.RpmPkg, + Metadata: pkg.RpmDBEntry{ + Files: []pkg.RpmFileRecord{ {Path: "/owning/path/1"}, {Path: "/owning/path/2"}, {Path: "/another/path"}, @@ -76,12 +77,12 @@ func TestOwnershipByFilesRelationship(t *testing.T) { } parent.SetID() - child := Package{ + child := pkg.Package{ Locations: file.NewLocationSet( file.NewVirtualLocation("/c/path", "/another/path"), file.NewLocation("/d/path"), ), - Type: NpmPkg, + Type: pkg.NpmPkg, } child.SetID() @@ -95,20 +96,20 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, }, } - return []Package{parent, child}, []artifact.Relationship{relationship} + return []pkg.Package{parent, child}, []artifact.Relationship{relationship} }, }, { name: "ignore-empty-path", - setup: func(t testing.TB) ([]Package, []artifact.Relationship) { - parent := Package{ + setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) { + parent := pkg.Package{ Locations: file.NewLocationSet( file.NewVirtualLocation("/a/path", "/some/other/path"), file.NewVirtualLocation("/b/path", "/bee/path"), ), - Type: RpmPkg, - Metadata: RpmDBEntry{ - Files: []RpmFileRecord{ + Type: pkg.RpmPkg, + Metadata: pkg.RpmDBEntry{ + Files: []pkg.RpmFileRecord{ {Path: "/owning/path/1"}, {Path: "/owning/path/2"}, {Path: ""}, @@ -118,17 +119,17 @@ func TestOwnershipByFilesRelationship(t *testing.T) { parent.SetID() - child := Package{ + child := pkg.Package{ Locations: file.NewLocationSet( file.NewVirtualLocation("/c/path", "/another/path"), file.NewLocation("/d/path"), ), - Type: NpmPkg, + Type: pkg.NpmPkg, } child.SetID() - return []Package{parent, child}, nil + return []pkg.Package{parent, child}, nil }, }, } @@ -136,8 +137,8 @@ func TestOwnershipByFilesRelationship(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { pkgs, expectedRelations := test.setup(t) - c := NewCollection(pkgs...) - relationships := RelationshipsByFileOwnership(c) + c := pkg.NewCollection(pkgs...) + relationships := byFileOwnershipOverlap(c) assert.Len(t, relationships, len(expectedRelations)) for idx, expectedRelationship := range expectedRelations { diff --git a/syft/pkg/relationships_evident_by.go b/internal/relationship/evident_by.go similarity index 67% rename from syft/pkg/relationships_evident_by.go rename to internal/relationship/evident_by.go index 26512a91767..d241b512c5b 100644 --- a/syft/pkg/relationships_evident_by.go +++ b/internal/relationship/evident_by.go @@ -1,14 +1,15 @@ -package pkg +package relationship import ( "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" ) -func RelationshipsEvidentBy(catalog *Collection) []artifact.Relationship { +func evidentBy(catalog *pkg.Collection) []artifact.Relationship { var edges []artifact.Relationship for _, p := range catalog.Sorted() { for _, l := range p.Locations.ToSlice() { - if v, exists := l.Annotations[EvidenceAnnotationKey]; !exists || v != PrimaryEvidenceAnnotation { + if v, exists := l.Annotations[pkg.EvidenceAnnotationKey]; !exists || v != pkg.PrimaryEvidenceAnnotation { // skip non-primary evidence from being expressed as a relationship. // note: this may be configurable in the future. continue diff --git a/syft/pkg/relationships_evident_by_test.go b/internal/relationship/evident_by_test.go similarity index 74% rename from syft/pkg/relationships_evident_by_test.go rename to internal/relationship/evident_by_test.go index 21e7801bfd5..d65069053e3 100644 --- a/syft/pkg/relationships_evident_by_test.go +++ b/internal/relationship/evident_by_test.go @@ -1,4 +1,4 @@ -package pkg +package relationship import ( "testing" @@ -8,11 +8,12 @@ import ( "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" ) func TestRelationshipsEvidentBy(t *testing.T) { - c := NewCollection() + c := pkg.NewCollection() coordA := file.Coordinates{ RealPath: "/somewhere/real", @@ -26,12 +27,12 @@ func TestRelationshipsEvidentBy(t *testing.T) { RealPath: "/somewhere/real", FileSystemID: "abc", } - pkgA := Package{ + pkgA := pkg.Package{ Locations: file.NewLocationSet( // added! - file.NewLocationFromCoordinates(coordA).WithAnnotation(EvidenceAnnotationKey, PrimaryEvidenceAnnotation), + file.NewLocationFromCoordinates(coordA).WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), // ignored... - file.NewLocationFromCoordinates(coordC).WithAnnotation(EvidenceAnnotationKey, SupportingEvidenceAnnotation), + file.NewLocationFromCoordinates(coordC).WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation), file.NewLocationFromCoordinates(coordD), ), } @@ -42,10 +43,10 @@ func TestRelationshipsEvidentBy(t *testing.T) { RealPath: "/somewhere-else/real", FileSystemID: "def", } - pkgB := Package{ + pkgB := pkg.Package{ Locations: file.NewLocationSet( // added! - file.NewLocationFromCoordinates(coordB).WithAnnotation(EvidenceAnnotationKey, PrimaryEvidenceAnnotation), + file.NewLocationFromCoordinates(coordB).WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), ), } pkgB.SetID() @@ -53,7 +54,7 @@ func TestRelationshipsEvidentBy(t *testing.T) { tests := []struct { name string - catalog *Collection + catalog *pkg.Collection want []artifact.Relationship }{ { @@ -75,7 +76,7 @@ func TestRelationshipsEvidentBy(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - actual := RelationshipsEvidentBy(tt.catalog) + actual := evidentBy(tt.catalog) require.Len(t, actual, len(tt.want)) for i := range actual { assert.Equal(t, tt.want[i].From.ID(), actual[i].From.ID(), "from mismatch at index %d", i) diff --git a/syft/pkg/cataloger/package_exclusions.go b/internal/relationship/exclude_binaries_by_file_ownership_overlap.go similarity index 64% rename from syft/pkg/cataloger/package_exclusions.go rename to internal/relationship/exclude_binaries_by_file_ownership_overlap.go index c9d67dd61c7..c85b12f7586 100644 --- a/syft/pkg/cataloger/package_exclusions.go +++ b/internal/relationship/exclude_binaries_by_file_ownership_overlap.go @@ -1,10 +1,12 @@ -package cataloger +package relationship import ( "slices" + "github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" ) var ( @@ -21,13 +23,24 @@ var ( } ) -// ExcludeBinaryByFileOwnershipOverlap will remove packages from a collection given the following properties are true +func excludeBinariesByFileOwnershipOverlap(accessor sbomsync.Accessor) { + accessor.WriteToSBOM(func(s *sbom.SBOM) { + for _, r := range s.Relationships { + if excludeBinaryByFileOwnershipOverlap(r, s.Artifacts.Packages) { + s.Artifacts.Packages.Delete(r.To.ID()) + s.Relationships = RemoveRelationshipsByID(s.Relationships, r.To.ID()) + } + } + }) +} + +// excludeBinaryByFileOwnershipOverlap will remove packages from a collection given the following properties are true // 1) the relationship between packages is OwnershipByFileOverlap // 2) the parent is an "os" package // 3) the child is a synthetic package generated by the binary cataloger // 4) the package names are identical // This was implemented as a way to help resolve: https://github.com/anchore/syft/issues/931 -func ExcludeBinaryByFileOwnershipOverlap(r artifact.Relationship, c *pkg.Collection) bool { +func excludeBinaryByFileOwnershipOverlap(r artifact.Relationship, c *pkg.Collection) bool { if artifact.OwnershipByFileOverlapRelationship != r.Type { return false } diff --git a/syft/pkg/cataloger/package_exclusions_test.go b/internal/relationship/exclude_binaries_by_file_ownership_overlap_test.go similarity index 95% rename from syft/pkg/cataloger/package_exclusions_test.go rename to internal/relationship/exclude_binaries_by_file_ownership_overlap_test.go index b14ee29c6b4..6074e0c162a 100644 --- a/syft/pkg/cataloger/package_exclusions_test.go +++ b/internal/relationship/exclude_binaries_by_file_ownership_overlap_test.go @@ -1,4 +1,4 @@ -package cataloger +package relationship import ( "testing" @@ -67,7 +67,7 @@ func TestExclude(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - if !ExcludeBinaryByFileOwnershipOverlap(test.relationship, test.packages) && test.shouldExclude { + if !excludeBinaryByFileOwnershipOverlap(test.relationship, test.packages) && test.shouldExclude { t.Errorf("expected to exclude relationship %+v", test.relationship) } }) diff --git a/internal/relationship/finalize.go b/internal/relationship/finalize.go new file mode 100644 index 00000000000..bfe6bd66fba --- /dev/null +++ b/internal/relationship/finalize.go @@ -0,0 +1,37 @@ +package relationship + +import ( + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/cataloging" + "github.com/anchore/syft/syft/sbom" +) + +func Finalize(builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src artifact.Identifiable) { + accessor := builder.(sbomsync.Accessor) + + // add relationships showing packages that are evident by a file which is owned by another package (package-to-package) + if cfg.PackageFileOwnershipOverlap { + byFileOwnershipOverlapWorker(accessor) + } + + // conditionally remove binary packages based on file ownership overlap relationships found + // https://github.com/anchore/syft/issues/931 + if cfg.ExcludeBinaryPackagesWithFileOwnershipOverlap { + excludeBinariesByFileOwnershipOverlap(accessor) + } + + // add source "contains package" relationship (source-to-package) + var sourceRelationships []artifact.Relationship + accessor.ReadFromSBOM(func(s *sbom.SBOM) { + sourceRelationships = toSource(src, s.Artifacts.Packages) + }) + builder.AddRelationships(sourceRelationships...) + + // add evident-by relationships (package-to-file) + var evidentByRelationships []artifact.Relationship + accessor.ReadFromSBOM(func(s *sbom.SBOM) { + evidentByRelationships = evidentBy(s.Artifacts.Packages) + }) + builder.AddRelationships(evidentByRelationships...) +} diff --git a/internal/relationship/remove.go b/internal/relationship/remove.go new file mode 100644 index 00000000000..a1b969d4903 --- /dev/null +++ b/internal/relationship/remove.go @@ -0,0 +1,14 @@ +package relationship + +import "github.com/anchore/syft/syft/artifact" + +// TODO: put under test... +func RemoveRelationshipsByID(relationships []artifact.Relationship, id artifact.ID) []artifact.Relationship { + var filtered []artifact.Relationship + for _, r := range relationships { + if r.To.ID() != id && r.From.ID() != id { + filtered = append(filtered, r) + } + } + return filtered +} diff --git a/syft/pkg/relationships.go b/internal/relationship/sort.go similarity index 53% rename from syft/pkg/relationships.go rename to internal/relationship/sort.go index 204dab2d906..88582b4e922 100644 --- a/syft/pkg/relationships.go +++ b/internal/relationship/sort.go @@ -1,31 +1,26 @@ -package pkg +package relationship import ( "sort" "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" ) -func NewRelationships(catalog *Collection) []artifact.Relationship { - rels := RelationshipsByFileOwnership(catalog) - rels = append(rels, RelationshipsEvidentBy(catalog)...) - return rels -} - -// SortRelationships takes a set of package-to-package relationships and sorts them in a stable order by name and version. +// Sort takes a set of package-to-package relationships and sorts them in a stable order by name and version. // Note: this does not consider package-to-other, other-to-package, or other-to-other relationships. // TODO: ideally this should be replaced with a more type-agnostic sort function that resides in the artifact package. -func SortRelationships(rels []artifact.Relationship) { +func Sort(rels []artifact.Relationship) { sort.SliceStable(rels, func(i, j int) bool { - return relationshipLess(rels[i], rels[j]) + return less(rels[i], rels[j]) }) } -func relationshipLess(i, j artifact.Relationship) bool { - iFrom, ok1 := i.From.(Package) - iTo, ok2 := i.To.(Package) - jFrom, ok3 := j.From.(Package) - jTo, ok4 := j.To.(Package) +func less(i, j artifact.Relationship) bool { + iFrom, ok1 := i.From.(pkg.Package) + iTo, ok2 := i.To.(pkg.Package) + jFrom, ok3 := j.From.(pkg.Package) + jTo, ok4 := j.To.(pkg.Package) if !(ok1 && ok2 && ok3 && ok4) { return false diff --git a/internal/relationship/to_source.go b/internal/relationship/to_source.go new file mode 100644 index 00000000000..f11a8d5e119 --- /dev/null +++ b/internal/relationship/to_source.go @@ -0,0 +1,19 @@ +package relationship + +import ( + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" +) + +func toSource(src artifact.Identifiable, c *pkg.Collection) []artifact.Relationship { + relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method? + for p := range c.Enumerate() { + relationships = append(relationships, artifact.Relationship{ + From: src, + To: p, + Type: artifact.ContainsRelationship, + }) + } + + return relationships +} diff --git a/internal/sbomsync/builder.go b/internal/sbomsync/builder.go new file mode 100644 index 00000000000..f7404fcebc9 --- /dev/null +++ b/internal/sbomsync/builder.go @@ -0,0 +1,83 @@ +package sbomsync + +import ( + "sync" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/linux" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +var _ interface { + Accessor + Builder +} = (*sbomBuilder)(nil) // integrity check + +// Builder provides a simple facade for simple additions to the SBOM +type Builder interface { + // nodes + + AddPackages(...pkg.Package) + + // edges + + AddRelationships(...artifact.Relationship) + + // other + + SetLinuxDistribution(linux.Release) +} + +// Accessor allows for low-level access to the SBOM +type Accessor interface { + WriteToSBOM(func(*sbom.SBOM)) + ReadFromSBOM(func(*sbom.SBOM)) +} + +type sbomBuilder struct { + sbom *sbom.SBOM + lock *sync.RWMutex +} + +func NewBuilder(s *sbom.SBOM) Builder { + return &sbomBuilder{ + sbom: s, + lock: &sync.RWMutex{}, + } +} + +func (b sbomBuilder) WriteToSBOM(fn func(*sbom.SBOM)) { + b.lock.Lock() + defer b.lock.Unlock() + + fn(b.sbom) +} + +func (b sbomBuilder) ReadFromSBOM(fn func(*sbom.SBOM)) { + b.lock.RLock() + defer b.lock.RUnlock() + + fn(b.sbom) +} + +func (b sbomBuilder) AddPackages(p ...pkg.Package) { + b.lock.Lock() + defer b.lock.Unlock() + + b.sbom.Artifacts.Packages.Add(p...) +} + +func (b sbomBuilder) AddRelationships(relationship ...artifact.Relationship) { + b.lock.Lock() + defer b.lock.Unlock() + + b.sbom.Relationships = append(b.sbom.Relationships, relationship...) +} + +func (b sbomBuilder) SetLinuxDistribution(release linux.Release) { + b.lock.Lock() + defer b.lock.Unlock() + + b.sbom.Artifacts.LinuxDistribution = &release +} diff --git a/internal/task/environment_tasks.go b/internal/task/environment_tasks.go new file mode 100644 index 00000000000..ab14faff1bd --- /dev/null +++ b/internal/task/environment_tasks.go @@ -0,0 +1,24 @@ +package task + +import ( + "context" + + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/linux" +) + +// TODO: add tui element here? + +func NewEnvironmentTask() Task { + fn := func(ctx context.Context, resolver file.Resolver, builder sbomsync.Builder) error { + release := linux.IdentifyRelease(resolver) + if release != nil { + builder.SetLinuxDistribution(*release) + } + + return nil + } + + return NewTask("environment-cataloger", fn) +} diff --git a/internal/task/executor.go b/internal/task/executor.go new file mode 100644 index 00000000000..5ba48b58aa0 --- /dev/null +++ b/internal/task/executor.go @@ -0,0 +1,72 @@ +package task + +import ( + "context" + "fmt" + "runtime/debug" + "sync" + + "github.com/hashicorp/go-multierror" + + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/event/monitor" + "github.com/anchore/syft/syft/file" +) + +type Executor struct { + numWorkers int + tasks chan Task +} + +func NewTaskExecutor(tasks []Task, numWorkers int) *Executor { + p := &Executor{ + numWorkers: numWorkers, + tasks: make(chan Task, len(tasks)), + } + + for i := range tasks { + p.tasks <- tasks[i] + } + close(p.tasks) + + return p +} + +func (p *Executor) Execute(ctx context.Context, resolver file.Resolver, s sbomsync.Builder, prog *monitor.CatalogerTaskProgress) error { + var errs error + wg := &sync.WaitGroup{} + for i := 0; i < p.numWorkers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + + for { + tsk, ok := <-p.tasks + if !ok { + return + } + + if err := runTaskSafely(ctx, tsk, resolver, s); err != nil { + errs = multierror.Append(errs, fmt.Errorf("failed to run task: %w", err)) + prog.SetError(err) + } + prog.Increment() + } + }() + } + + wg.Wait() + + return errs +} + +func runTaskSafely(ctx context.Context, t Task, resolver file.Resolver, s sbomsync.Builder) (err error) { + // handle individual cataloger panics + defer func() { + if e := recover(); e != nil { + err = fmt.Errorf("%v at:\n%s", e, string(debug.Stack())) + } + }() + + return t.Execute(ctx, resolver, s) +} diff --git a/internal/task/executor_test.go b/internal/task/executor_test.go new file mode 100644 index 00000000000..5da1a0f40cf --- /dev/null +++ b/internal/task/executor_test.go @@ -0,0 +1,27 @@ +package task + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "github.com/wagoodman/go-progress" + + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/event/monitor" + "github.com/anchore/syft/syft/file" +) + +func Test_TaskExecutor_PanicHandling(t *testing.T) { + tsk := NewTask("panicking-cataloger", func(_ context.Context, _ file.Resolver, _ sbomsync.Builder) error { + panic("something bad happened") + }) + ex := NewTaskExecutor([]Task{tsk}, 1) + + err := ex.Execute(context.Background(), nil, nil, &monitor.CatalogerTaskProgress{ + Manual: progress.NewManual(-1), + }) + + require.Error(t, err) + require.Contains(t, err.Error(), "executor_test.go") +} diff --git a/internal/task/expression.go b/internal/task/expression.go new file mode 100644 index 00000000000..e99551c6f3c --- /dev/null +++ b/internal/task/expression.go @@ -0,0 +1,238 @@ +package task + +import ( + "fmt" + "regexp" + "sort" + + "github.com/hashicorp/go-multierror" + "github.com/scylladb/go-set/strset" + + "github.com/anchore/syft/syft/cataloging/pkgcataloging" +) + +var expressionNodePattern = regexp.MustCompile(`^([a-zA-Z0-9][a-zA-Z0-9-+]*)+$`) + +const ( + SetOperation Operation = "set" + AddOperation Operation = "add" + SubSelectOperation Operation = "sub-select" + RemoveOperation Operation = "remove" +) + +var ( + ErrEmptyToken = fmt.Errorf("no value given") + ErrInvalidToken = fmt.Errorf("invalid token given: only alphanumeric characters and hyphens are allowed") + ErrInvalidOperator = fmt.Errorf("invalid operator given") + ErrUnknownNameOrTag = fmt.Errorf("unknown name or tag given") + ErrTagsNotAllowed = fmt.Errorf("tags are not allowed with this operation (must use exact names)") + ErrNamesNotAllowed = fmt.Errorf("names are not allowed with this operation (must use tags)") + ErrAllNotAllowed = fmt.Errorf("cannot use the 'all' operand in this context") +) + +// ErrInvalidExpression represents an expression that cannot be parsed or can be parsed but is logically invalid. +type ErrInvalidExpression struct { + Expression string + Operation Operation + Err error +} + +func (e ErrInvalidExpression) Error() string { + return fmt.Sprintf("invalid expression: %q: %s", e.Expression, e.Err.Error()) +} + +func newErrInvalidExpression(exp string, op Operation, err error) ErrInvalidExpression { + return ErrInvalidExpression{ + Expression: exp, + Operation: op, + Err: err, + } +} + +// Expression represents a single operation-operand pair with (all validation errors). +// E.g. "+foo", "-bar", or "something" are all expressions. Some validations are relevant to not only the +// syntax (operation and operator) but other are sensitive to the context of the operand (e.g. if a given operand +// is a tag or a name, validated against the operation). +type Expression struct { + Operation Operation + Operand string + Errors []error +} + +// Operation represents the type of operation to perform on the operand (set, add, remove, sub-select). +type Operation string + +// Expressions represents a list of expressions. +type Expressions []Expression + +// expressionContext represents all information needed to validate an expression (e.g. the set of all tasks and their tags). +type expressionContext struct { + Names *strset.Set + Tags *strset.Set +} + +func newExpressionContext(ts []Task) *expressionContext { + ec := &expressionContext{ + Names: strset.New(tasks(ts).Names()...), + Tags: strset.New(tasks(ts).Tags()...), + } + + ec.Tags.Add("all") + + return ec +} + +// newExpression creates a new validated Expression object relative to the task names and tags. +func (ec expressionContext) newExpression(exp string, operation Operation, token string) Expression { + if token == "" { + return Expression{ + Operation: operation, + Operand: token, + Errors: []error{newErrInvalidExpression(exp, operation, ErrEmptyToken)}, + } + } + + if !isValidNode(token) { + return Expression{ + Operation: operation, + Operand: token, + Errors: []error{newErrInvalidExpression(exp, operation, ErrInvalidToken)}, + } + } + + var err error + switch operation { + case SetOperation, RemoveOperation: + // names and tags allowed + if !ec.Tags.Has(token) && !ec.Names.Has(token) { + err = newErrInvalidExpression(exp, operation, ErrUnknownNameOrTag) + } + case AddOperation: + // only names are allowed + if !ec.Names.Has(token) { + if ec.Tags.Has(token) { + err = newErrInvalidExpression(exp, operation, ErrTagsNotAllowed) + } else { + err = newErrInvalidExpression(exp, operation, ErrUnknownNameOrTag) + } + } + case SubSelectOperation: + if token == "all" { + // special case: we cannot sub-select all (this is most likely a misconfiguration and the user intended to use the set operation) + err = newErrInvalidExpression(exp, operation, ErrAllNotAllowed) + } else if !ec.Tags.Has(token) { + // only tags are allowed... + if ec.Names.Has(token) { + err = newErrInvalidExpression(exp, operation, ErrNamesNotAllowed) + } else { + err = newErrInvalidExpression(exp, operation, ErrUnknownNameOrTag) + } + } + } + + var errs []error + if err != nil { + errs = append(errs, err) + } + + return Expression{ + Operation: operation, + Operand: token, + Errors: errs, + } +} + +func newExpressionsFromSelectionRequest(nc *expressionContext, selectionRequest pkgcataloging.SelectionRequest) Expressions { + var all Expressions + + for _, exp := range selectionRequest.DefaultNamesOrTags { + all = append(all, nc.newExpression(exp, SetOperation, exp)) + } + + for _, exp := range selectionRequest.SubSelectTags { + all = append(all, nc.newExpression(exp, SubSelectOperation, exp)) + } + + for _, exp := range selectionRequest.AddNames { + all = append(all, nc.newExpression(exp, AddOperation, exp)) + } + + for _, exp := range selectionRequest.RemoveNamesOrTags { + all = append(all, nc.newExpression(exp, RemoveOperation, exp)) + } + + sort.Sort(all) + return all +} + +func isValidNode(s string) bool { + return expressionNodePattern.Match([]byte(s)) +} + +func (e Expressions) Clone() Expressions { + clone := make(Expressions, len(e)) + copy(clone, e) + return clone +} + +func (e Expression) String() string { + var op string + switch e.Operation { + case AddOperation: + op = "+" + case RemoveOperation: + op = "-" + case SubSelectOperation: + op = "" + case SetOperation: + op = "" + default: + op = "?" + } + return op + e.Operand +} + +func (e Expressions) Len() int { + return len(e) +} + +func (e Expressions) Swap(i, j int) { + e[i], e[j] = e[j], e[i] +} + +// order of operations +var orderOfOps = map[Operation]int{ + SetOperation: 1, + SubSelectOperation: 2, + RemoveOperation: 3, + AddOperation: 4, +} + +func (e Expressions) Less(i, j int) bool { + ooi := orderOfOps[e[i].Operation] + ooj := orderOfOps[e[j].Operation] + + if ooi != ooj { + return ooi < ooj + } + + return i < j +} + +func (e Expressions) Errors() (errs []error) { + for _, n := range e { + if len(n.Errors) > 0 { + errs = append(errs, n.Errors...) + } + } + return errs +} + +func (e Expressions) Validate() error { + errs := e.Errors() + if len(errs) == 0 { + return nil + } + var err error + return multierror.Append(err, e.Errors()...) +} diff --git a/internal/task/expression_test.go b/internal/task/expression_test.go new file mode 100644 index 00000000000..e1a97ab2213 --- /dev/null +++ b/internal/task/expression_test.go @@ -0,0 +1,242 @@ +package task + +import ( + "sort" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/cataloging/pkgcataloging" +) + +func Test_newExpressionsFromSelectionRequest(t *testing.T) { + ts := []Task{ + dummyTask("1", "t1"), + dummyTask("2", "t2"), + dummyTask("3", "t3"), + dummyTask("4", "t4"), + dummyTask("5"), + dummyTask("6"), + } + + nc := newExpressionContext(ts) + + var tests = []struct { + name string + basis []string + expressions []string + expected Expressions + expectedErrors []error + }{ + { + name: "empty input", + basis: []string{}, + expressions: []string{}, + expected: nil, + }, + { + name: "valid single set operation", + basis: []string{"1"}, + expressions: []string{}, + expected: []Expression{ + {Operation: SetOperation, Operand: "1"}, + }, + }, + { + name: "add operation", + basis: []string{}, + expressions: []string{"+4"}, + expected: []Expression{ + {Operation: AddOperation, Operand: "4"}, + }, + }, + { + name: "remove operation", + basis: []string{}, + expressions: []string{"-3"}, + expected: []Expression{ + {Operation: RemoveOperation, Operand: "3"}, + }, + }, + { + name: "select operation", + basis: []string{}, + expressions: []string{"t2"}, + expected: []Expression{ + {Operation: SubSelectOperation, Operand: "t2"}, + }, + }, + { + name: "mixed operations order", + basis: []string{"1"}, + expressions: []string{"+4", "-3", "t2"}, + expected: []Expression{ + // note they are sorted by operation + {Operation: SetOperation, Operand: "1"}, + {Operation: SubSelectOperation, Operand: "t2"}, + {Operation: RemoveOperation, Operand: "3"}, + {Operation: AddOperation, Operand: "4"}, + }, + }, + { + name: "invalid token", + basis: []string{"!1"}, + expressions: []string{}, + expected: nil, + expectedErrors: []error{ErrInvalidToken}, + }, + { + name: "use + operator in basis", + basis: []string{"+1"}, + expressions: []string{}, + expected: nil, + expectedErrors: []error{ErrInvalidToken}, + }, + { + name: "use - operator in basis", + basis: []string{"-1"}, + expressions: []string{}, + expected: nil, + expectedErrors: []error{ErrInvalidToken}, + }, + { + name: "invalid name", + basis: []string{}, + expressions: []string{"+t1"}, + expected: nil, + expectedErrors: []error{ErrTagsNotAllowed}, + }, + { + name: "invalid tag", + basis: []string{}, + expressions: []string{"1"}, + expected: nil, + expectedErrors: []error{ErrNamesNotAllowed}, + }, + { + name: "invalid use of all", + basis: []string{}, + expressions: []string{"all"}, + expected: nil, + expectedErrors: []error{ErrAllNotAllowed}, + }, + { + name: "allow all operand", + basis: []string{"all"}, + expressions: []string{}, + expected: []Expression{ + // note they are sorted by operation + {Operation: SetOperation, Operand: "all"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + req := pkgcataloging.NewSelectionRequest().WithDefaults(tt.basis...).WithExpression(tt.expressions...) + + result := newExpressionsFromSelectionRequest(nc, req) + if tt.expectedErrors != nil { + errs := result.Errors() + require.Len(t, errs, len(tt.expectedErrors)) + for i, err := range tt.expectedErrors { + var target ErrInvalidExpression + require.ErrorAs(t, errs[i], &target) + assert.Equal(t, err, target.Err) + } + } else { + assert.Empty(t, result.Errors()) + assert.Equal(t, tt.expected, result) + } + }) + } +} + +func Test_expressionNodes_sort(t *testing.T) { + tests := []struct { + name string + subject Expressions + want Expressions + }{ + { + name: "sort operations but keep token order", + subject: []Expression{ + { + Operation: AddOperation, + Operand: "8", + }, + { + Operation: AddOperation, + Operand: "7", + }, + { + Operation: RemoveOperation, + Operand: "6", + }, + { + Operation: RemoveOperation, + Operand: "5", + }, + { + Operation: SetOperation, + Operand: "2", + }, + { + Operation: SetOperation, + Operand: "1", + }, + { + Operation: SubSelectOperation, + Operand: "4", + }, + { + Operation: SubSelectOperation, + Operand: "3", + }, + }, + want: []Expression{ + { + Operation: SetOperation, + Operand: "2", + }, + { + Operation: SetOperation, + Operand: "1", + }, + { + Operation: SubSelectOperation, + Operand: "4", + }, + { + Operation: SubSelectOperation, + Operand: "3", + }, + { + Operation: RemoveOperation, + Operand: "6", + }, + { + Operation: RemoveOperation, + Operand: "5", + }, + { + Operation: AddOperation, + Operand: "8", + }, + { + Operation: AddOperation, + Operand: "7", + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := tt.subject.Clone() + sort.Sort(s) + assert.Equal(t, tt.want, s) + }) + } +} diff --git a/internal/task/file_tasks.go b/internal/task/file_tasks.go new file mode 100644 index 00000000000..ec0dd86f2d6 --- /dev/null +++ b/internal/task/file_tasks.go @@ -0,0 +1,135 @@ +package task + +import ( + "context" + "crypto" + "fmt" + + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/file/cataloger/filecontent" + "github.com/anchore/syft/syft/file/cataloger/filedigest" + "github.com/anchore/syft/syft/file/cataloger/filemetadata" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +func NewFileDigestCatalogerTask(selection file.Selection, hashers ...crypto.Hash) Task { + if selection == file.NoFilesSelection || len(hashers) == 0 { + return nil + } + + digestsCataloger := filedigest.NewCataloger(hashers) + + fn := func(ctx context.Context, resolver file.Resolver, builder sbomsync.Builder) error { + accessor := builder.(sbomsync.Accessor) + + coordinates, ok := coordinatesForSelection(selection, builder.(sbomsync.Accessor)) + if !ok { + return nil + } + + result, err := digestsCataloger.Catalog(resolver, coordinates...) + if err != nil { + return fmt.Errorf("unable to catalog file digests: %w", err) + } + + accessor.WriteToSBOM(func(sbom *sbom.SBOM) { + sbom.Artifacts.FileDigests = result + }) + + return nil + } + + return NewTask("file-digest-cataloger", fn) +} + +func NewFileMetadataCatalogerTask(selection file.Selection) Task { + if selection == file.NoFilesSelection { + return nil + } + + metadataCataloger := filemetadata.NewCataloger() + + fn := func(ctx context.Context, resolver file.Resolver, builder sbomsync.Builder) error { + accessor := builder.(sbomsync.Accessor) + + coordinates, ok := coordinatesForSelection(selection, builder.(sbomsync.Accessor)) + if !ok { + return nil + } + + result, err := metadataCataloger.Catalog(resolver, coordinates...) + if err != nil { + return err + } + + accessor.WriteToSBOM(func(sbom *sbom.SBOM) { + sbom.Artifacts.FileMetadata = result + }) + + return nil + } + + return NewTask("file-metadata-cataloger", fn) +} + +func NewFileContentCatalogerTask(cfg filecontent.Config) Task { + if len(cfg.Globs) == 0 { + return nil + } + + cat := filecontent.NewCataloger(cfg) + + fn := func(ctx context.Context, resolver file.Resolver, builder sbomsync.Builder) error { + accessor := builder.(sbomsync.Accessor) + + result, err := cat.Catalog(resolver) + if err != nil { + return err + } + + accessor.WriteToSBOM(func(sbom *sbom.SBOM) { + sbom.Artifacts.FileContents = result + }) + + return nil + } + + return NewTask("file-content-cataloger", fn) +} + +// TODO: this should be replaced with a fix that allows passing a coordinate or location iterator to the cataloger +// Today internal to both cataloger this functions differently: a slice of coordinates vs a channel of locations +func coordinatesForSelection(selection file.Selection, accessor sbomsync.Accessor) ([]file.Coordinates, bool) { + if selection == file.AllFilesSelection { + return nil, true + } + + if selection == file.FilesOwnedByPackageSelection { + var coordinates []file.Coordinates + + accessor.ReadFromSBOM(func(sbom *sbom.SBOM) { + for _, r := range sbom.Relationships { + if r.Type != artifact.ContainsRelationship { + continue + } + if _, ok := r.From.(pkg.Package); !ok { + continue + } + if c, ok := r.To.(file.Coordinates); ok { + coordinates = append(coordinates, c) + } + } + }) + + if len(coordinates) == 0 { + return nil, false + } + + return coordinates, true + } + + return nil, false +} diff --git a/internal/task/file_tasks_test.go b/internal/task/file_tasks_test.go new file mode 100644 index 00000000000..b24e7e254a2 --- /dev/null +++ b/internal/task/file_tasks_test.go @@ -0,0 +1,104 @@ +package task + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +func Test_coordinatesForSelection(t *testing.T) { + + tests := []struct { + name string + selection file.Selection + sbom *sbom.SBOM + files []file.Coordinates + ok bool + }{ + { + name: "all files", + selection: file.AllFilesSelection, + files: nil, + ok: true, + }, + { + name: "no files", + selection: file.NoFilesSelection, + files: nil, + ok: false, + }, + { + name: "specific files with hits", + selection: file.FilesOwnedByPackageSelection, + sbom: &sbom.SBOM{ + Relationships: []artifact.Relationship{ + { + From: pkg.Package{}, + To: file.Coordinates{ + RealPath: "path", + FileSystemID: "fs", + }, + Type: artifact.ContainsRelationship, + }, + }, + }, + files: []file.Coordinates{ + { + RealPath: "path", + FileSystemID: "fs", + }, + }, + ok: true, + }, + { + name: "specific files no hits (by wrong type)", + selection: file.FilesOwnedByPackageSelection, + sbom: &sbom.SBOM{ + Relationships: []artifact.Relationship{ + { + From: pkg.Package{}, + To: file.Coordinates{ + RealPath: "path", + FileSystemID: "fs", + }, + // wrong type + Type: artifact.DependencyOfRelationship, + }, + }, + }, + files: nil, + ok: false, + }, + { + name: "specific files no hits (by wrong node types)", + selection: file.FilesOwnedByPackageSelection, + sbom: &sbom.SBOM{ + Relationships: []artifact.Relationship{ + { + From: file.Coordinates{}, // wrong type + To: file.Coordinates{ + RealPath: "path", + FileSystemID: "fs", + }, + Type: artifact.ContainsRelationship, + }, + }, + }, + files: nil, + ok: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + files, ok := coordinatesForSelection(tt.selection, sbomsync.NewBuilder(tt.sbom).(sbomsync.Accessor)) + assert.Equal(t, tt.files, files) + assert.Equal(t, tt.ok, ok) + }) + } +} diff --git a/internal/task/package_task_factory.go b/internal/task/package_task_factory.go new file mode 100644 index 00000000000..20d6249c419 --- /dev/null +++ b/internal/task/package_task_factory.go @@ -0,0 +1,207 @@ +package task + +import ( + "context" + "fmt" + "sort" + "strings" + "unicode" + + "github.com/scylladb/go-set/strset" + + "github.com/anchore/syft/internal/bus" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/cataloging" + "github.com/anchore/syft/syft/cataloging/pkgcataloging" + "github.com/anchore/syft/syft/event/monitor" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" +) + +type packageTaskFactory func(cfg CatalogingFactoryConfig) Task + +type PackageTaskFactories []packageTaskFactory + +type CatalogingFactoryConfig struct { + SearchConfig cataloging.SearchConfig + RelationshipsConfig cataloging.RelationshipsConfig + DataGenerationConfig cataloging.DataGenerationConfig + PackagesConfig pkgcataloging.Config +} + +func DefaultCatalogingFactoryConfig() CatalogingFactoryConfig { + return CatalogingFactoryConfig{ + SearchConfig: cataloging.DefaultSearchConfig(), + RelationshipsConfig: cataloging.DefaultRelationshipsConfig(), + DataGenerationConfig: cataloging.DefaultDataGenerationConfig(), + PackagesConfig: pkgcataloging.DefaultConfig(), + } +} + +func newPackageTaskFactory(catalogerFactory func(CatalogingFactoryConfig) pkg.Cataloger, tags ...string) packageTaskFactory { + return func(cfg CatalogingFactoryConfig) Task { + return NewPackageTask(cfg, catalogerFactory(cfg), tags...) + } +} + +func newSimplePackageTaskFactory(catalogerFactory func() pkg.Cataloger, tags ...string) packageTaskFactory { + return func(cfg CatalogingFactoryConfig) Task { + return NewPackageTask(cfg, catalogerFactory(), tags...) + } +} + +func (f PackageTaskFactories) Tasks(cfg CatalogingFactoryConfig) ([]Task, error) { + var allTasks []Task + taskNames := strset.New() + duplicateTaskNames := strset.New() + var err error + for _, factory := range f { + tsk := factory(cfg) + if tsk == nil { + continue + } + tskName := tsk.Name() + if taskNames.Has(tskName) { + duplicateTaskNames.Add(tskName) + } + + allTasks = append(allTasks, tsk) + taskNames.Add(tskName) + } + if duplicateTaskNames.Size() > 0 { + names := duplicateTaskNames.List() + sort.Strings(names) + err = fmt.Errorf("duplicate cataloger task names: %v", strings.Join(names, ", ")) + } + + return allTasks, err +} + +// NewPackageTask creates a Task function for a generic pkg.Cataloger, honoring the common configuration options. +// +//nolint:funlen +func NewPackageTask(cfg CatalogingFactoryConfig, c pkg.Cataloger, tags ...string) Task { + fn := func(ctx context.Context, resolver file.Resolver, sbom sbomsync.Builder) error { + catalogerName := c.Name() + log.WithFields("name", catalogerName).Trace("starting package cataloger") + + info := monitor.GenericTask{ + Title: monitor.Title{ + Default: prettyName(catalogerName), + }, + ID: catalogerName, + ParentID: monitor.PackageCatalogingTaskID, + Context: "", + HideOnSuccess: true, + } + + t := bus.StartCatalogerTask(info, -1, "") + + pkgs, relationships, err := c.Catalog(resolver) + if err != nil { + return fmt.Errorf("unable to catalog packages with %q: %w", c.Name(), err) + } + + log.WithFields("cataloger", c.Name()).Debugf("discovered %d packages", len(pkgs)) + + for i, p := range pkgs { + if cfg.DataGenerationConfig.GenerateCPEs { + // generate CPEs (note: this is excluded from package ID, so is safe to mutate) + // we might have binary classified CPE already with the package so we want to append here + dictionaryCPE, ok := cpe.DictionaryFind(p) + if ok { + log.Tracef("used CPE dictionary to find CPE for %s package %q: %s", p.Type, p.Name, dictionaryCPE.BindToFmtString()) + p.CPEs = append(p.CPEs, dictionaryCPE) + } else { + p.CPEs = append(p.CPEs, cpe.Generate(p)...) + } + } + + // if we were not able to identify the language we have an opportunity + // to try and get this value from the PURL. Worst case we assert that + // we could not identify the language at either stage and set UnknownLanguage + if p.Language == "" { + p.Language = pkg.LanguageFromPURL(p.PURL) + } + + if cfg.RelationshipsConfig.PackageFileOwnership { + // create file-to-package relationships for files owned by the package + owningRelationships, err := packageFileOwnershipRelationships(p, resolver) + if err != nil { + log.Warnf("unable to create any package-file relationships for package name=%q type=%q: %w", p.Name, p.Type, err) + } else { + relationships = append(relationships, owningRelationships...) + } + } + + pkgs[i] = p + } + + sbom.AddPackages(pkgs...) + sbom.AddRelationships(relationships...) + t.Add(int64(len(pkgs))) + + t.SetCompleted() + log.WithFields("name", c.Name()).Trace("package cataloger completed") + + return nil + } + tags = append(tags, pkgcataloging.PackageTag) + + return NewTask(c.Name(), fn, tags...) +} + +func prettyName(s string) string { + if s == "" { + return "" + } + + // Convert first character to uppercase + r := []rune(s) + r[0] = unicode.ToUpper(r[0]) + + return strings.ReplaceAll(string(r), "-", " ") +} + +func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) { + fileOwner, ok := p.Metadata.(pkg.FileOwner) + if !ok { + return nil, nil + } + + locations := map[artifact.ID]file.Location{} + + for _, path := range fileOwner.OwnedFiles() { + pathRefs, err := resolver.FilesByPath(path) + if err != nil { + return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err) + } + + if len(pathRefs) == 0 { + // ideally we want to warn users about missing files from a package, however, it is very common for + // container image authors to delete files that are not needed in order to keep image sizes small. Adding + // a warning here would be needlessly noisy (even for popular base images). + continue + } + + for _, ref := range pathRefs { + if oldRef, ok := locations[ref.Coordinates.ID()]; ok { + log.Debugf("found path duplicate of %s", oldRef.RealPath) + } + locations[ref.Coordinates.ID()] = ref + } + } + + var relationships []artifact.Relationship + for _, location := range locations { + relationships = append(relationships, artifact.Relationship{ + From: p, + To: location.Coordinates, + Type: artifact.ContainsRelationship, + }) + } + return relationships, nil +} diff --git a/internal/task/package_tasks.go b/internal/task/package_tasks.go new file mode 100644 index 00000000000..bea0c018271 --- /dev/null +++ b/internal/task/package_tasks.go @@ -0,0 +1,120 @@ +package task + +import ( + "github.com/anchore/syft/syft/cataloging/pkgcataloging" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/alpine" + "github.com/anchore/syft/syft/pkg/cataloger/arch" + "github.com/anchore/syft/syft/pkg/cataloger/binary" + "github.com/anchore/syft/syft/pkg/cataloger/cpp" + "github.com/anchore/syft/syft/pkg/cataloger/dart" + "github.com/anchore/syft/syft/pkg/cataloger/debian" + "github.com/anchore/syft/syft/pkg/cataloger/dotnet" + "github.com/anchore/syft/syft/pkg/cataloger/elixir" + "github.com/anchore/syft/syft/pkg/cataloger/erlang" + "github.com/anchore/syft/syft/pkg/cataloger/gentoo" + "github.com/anchore/syft/syft/pkg/cataloger/githubactions" + "github.com/anchore/syft/syft/pkg/cataloger/golang" + "github.com/anchore/syft/syft/pkg/cataloger/haskell" + "github.com/anchore/syft/syft/pkg/cataloger/java" + "github.com/anchore/syft/syft/pkg/cataloger/javascript" + "github.com/anchore/syft/syft/pkg/cataloger/nix" + "github.com/anchore/syft/syft/pkg/cataloger/php" + "github.com/anchore/syft/syft/pkg/cataloger/python" + "github.com/anchore/syft/syft/pkg/cataloger/r" + "github.com/anchore/syft/syft/pkg/cataloger/redhat" + "github.com/anchore/syft/syft/pkg/cataloger/ruby" + "github.com/anchore/syft/syft/pkg/cataloger/rust" + sbomCataloger "github.com/anchore/syft/syft/pkg/cataloger/sbom" + "github.com/anchore/syft/syft/pkg/cataloger/swift" +) + +//nolint:funlen +func DefaultPackageTaskFactories() PackageTaskFactories { + return []packageTaskFactory{ + // OS package installed catalogers /////////////////////////////////////////////////////////////////////////// + newSimplePackageTaskFactory(arch.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "alpm", "archlinux"), + newSimplePackageTaskFactory(alpine.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "apk", "alpine"), + newSimplePackageTaskFactory(debian.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "dpkg", "debian"), + newSimplePackageTaskFactory(gentoo.NewPortageCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "portage", "gentoo"), + newSimplePackageTaskFactory(redhat.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "rpm", "redhat"), + + // OS package declared catalogers /////////////////////////////////////////////////////////////////////////// + newSimplePackageTaskFactory(redhat.NewArchiveCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.OSTag, "linux", "rpm", "redhat"), + + // language-specific package installed catalogers /////////////////////////////////////////////////////////////////////////// + newSimplePackageTaskFactory(cpp.NewConanInfoCataloger, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "cpp", "conan"), + newSimplePackageTaskFactory(javascript.NewPackageCataloger, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "javascript", "node"), + newSimplePackageTaskFactory(php.NewComposerInstalledCataloger, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "php", "composer"), + newSimplePackageTaskFactory(r.NewPackageCataloger, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "r"), + newSimplePackageTaskFactory(ruby.NewInstalledGemSpecCataloger, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "ruby", "gem", "gemspec"), + newSimplePackageTaskFactory(rust.NewAuditBinaryCataloger, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "rust", "binary"), + + // language-specific package declared catalogers /////////////////////////////////////////////////////////////////////////// + newSimplePackageTaskFactory(cpp.NewConanCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "cpp", "conan"), + newSimplePackageTaskFactory(dart.NewPubspecLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "dart"), + newSimplePackageTaskFactory(dotnet.NewDotnetDepsCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "dotnet", "c#"), + newSimplePackageTaskFactory(elixir.NewMixLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "elixir"), + newSimplePackageTaskFactory(erlang.NewRebarLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "erlang"), + newSimplePackageTaskFactory(haskell.NewHackageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "haskell", "hackage", "cabal"), + newPackageTaskFactory( + func(cfg CatalogingFactoryConfig) pkg.Cataloger { + return golang.NewGoModuleFileCataloger(cfg.PackagesConfig.Golang) + }, + pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "go", "golang", "gomod", + ), + newSimplePackageTaskFactory(java.NewGradleLockfileCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "java", "gradle"), + newPackageTaskFactory( + func(cfg CatalogingFactoryConfig) pkg.Cataloger { + return java.NewPomCataloger(cfg.PackagesConfig.JavaArchive) + }, + pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "java", "maven", + ), + newPackageTaskFactory( + func(cfg CatalogingFactoryConfig) pkg.Cataloger { + return javascript.NewLockCataloger(cfg.PackagesConfig.JavaScript) + }, + pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "javascript", "node", "npm", + ), + newSimplePackageTaskFactory(php.NewComposerLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "php", "composer"), + newPackageTaskFactory( + func(cfg CatalogingFactoryConfig) pkg.Cataloger { + return python.NewPackageCataloger(cfg.PackagesConfig.Python) + }, + pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "python", + ), + newSimplePackageTaskFactory(ruby.NewGemFileLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "ruby", "gem"), + newSimplePackageTaskFactory(ruby.NewGemSpecCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "ruby", "gem", "gemspec"), + newSimplePackageTaskFactory(rust.NewCargoLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "rust", "cargo"), + newSimplePackageTaskFactory(swift.NewCocoapodsCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "swift", "cocoapods"), + newSimplePackageTaskFactory(swift.NewSwiftPackageManagerCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "swift", "spm"), + + // language-specific package for both image and directory scans (but not necessarily declared) //////////////////////////////////////// + newSimplePackageTaskFactory(dotnet.NewDotnetPortableExecutableCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "dotnet", "c#", "binary"), + newSimplePackageTaskFactory(python.NewInstalledPackageCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "python"), + newPackageTaskFactory( + func(cfg CatalogingFactoryConfig) pkg.Cataloger { + return golang.NewGoModuleBinaryCataloger(cfg.PackagesConfig.Golang) + }, + pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "go", "golang", "gomod", "binary", + ), + newPackageTaskFactory( + func(cfg CatalogingFactoryConfig) pkg.Cataloger { + return java.NewArchiveCataloger(cfg.PackagesConfig.JavaArchive) + }, + pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "java", "maven", + ), + newSimplePackageTaskFactory(java.NewNativeImageCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "java"), + newSimplePackageTaskFactory(nix.NewStoreCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "nix"), + + // other package catalogers /////////////////////////////////////////////////////////////////////////// + newPackageTaskFactory( + func(cfg CatalogingFactoryConfig) pkg.Cataloger { + return binary.NewCataloger(cfg.PackagesConfig.Binary) + }, + pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "binary"), + newSimplePackageTaskFactory(githubactions.NewActionUsageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, "github", "github-actions"), + newSimplePackageTaskFactory(githubactions.NewWorkflowUsageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, "github", "github-actions"), + newSimplePackageTaskFactory(sbomCataloger.NewCataloger, pkgcataloging.ImageTag, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "sbom"), // note: not evidence of installed packages + } +} diff --git a/internal/task/relationship_tasks.go b/internal/task/relationship_tasks.go new file mode 100644 index 00000000000..6f4b6a16d07 --- /dev/null +++ b/internal/task/relationship_tasks.go @@ -0,0 +1,35 @@ +package task + +import ( + "context" + + "github.com/anchore/syft/internal/relationship" + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/cataloging" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/source" +) + +var _ artifact.Identifiable = (*sourceIdentifierAdapter)(nil) + +type sourceIdentifierAdapter struct { + desc source.Description +} + +func (s sourceIdentifierAdapter) ID() artifact.ID { + return artifact.ID(s.desc.ID) +} + +func NewRelationshipsTask(cfg cataloging.RelationshipsConfig, src source.Description) Task { + fn := func(ctx context.Context, resolver file.Resolver, builder sbomsync.Builder) error { + relationship.Finalize( + builder, + cfg, + &sourceIdentifierAdapter{desc: src}) + + return nil + } + + return NewTask("relationships-cataloger", fn) +} diff --git a/internal/task/selection.go b/internal/task/selection.go new file mode 100644 index 00000000000..bd0cacdfbeb --- /dev/null +++ b/internal/task/selection.go @@ -0,0 +1,176 @@ +package task + +import ( + "fmt" + "sort" + + "github.com/scylladb/go-set/strset" + + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/cataloging/pkgcataloging" +) + +// Selection represents the users request for a subset of tasks to run and the resulting set of task names that were +// selected. Additionally, all tokens that were matched on to reach the returned conclusion are also provided. +type Selection struct { + Request pkgcataloging.SelectionRequest + Result *strset.Set + TokensByTask map[string]TokenSelection +} + +// TokenSelection represents the tokens that were matched on to either include or exclude a given task (based on expression evaluation). +type TokenSelection struct { + SelectedOn *strset.Set + DeselectedOn *strset.Set +} + +func newTokenSelection(selected, deselected []string) TokenSelection { + return TokenSelection{ + SelectedOn: strset.New(selected...), + DeselectedOn: strset.New(deselected...), + } +} + +func (ts *TokenSelection) merge(other ...TokenSelection) { + for _, o := range other { + if ts.SelectedOn != nil { + ts.SelectedOn.Add(o.SelectedOn.List()...) + } + if ts.DeselectedOn != nil { + ts.DeselectedOn.Add(o.DeselectedOn.List()...) + } + } +} + +func newSelection() Selection { + return Selection{ + Result: strset.New(), + TokensByTask: make(map[string]TokenSelection), + } +} + +// Select parses the given expressions as two sets: expressions that represent a "set" operation, and expressions that +// represent all other operations. The parsed expressions are then evaluated against the given tasks to return +// a subset (or the same) set of tasks. +func Select(allTasks []Task, selectionRequest pkgcataloging.SelectionRequest) ([]Task, Selection, error) { + nodes := newExpressionsFromSelectionRequest(newExpressionContext(allTasks), selectionRequest) + + finalTasks, selection := selectByExpressions(allTasks, nodes) + + selection.Request = selectionRequest + + return finalTasks, selection, nodes.Validate() +} + +// selectByExpressions the set of tasks to run based on the given expression(s). +func selectByExpressions(ts tasks, nodes Expressions) (tasks, Selection) { + if len(nodes) == 0 { + return ts, newSelection() + } + + finalSet := newSet() + selectionSet := newSet() + addSet := newSet() + removeSet := newSet() + + allSelections := make(map[string]TokenSelection) + + nodes = nodes.Clone() + sort.Sort(nodes) + + for i, node := range nodes { + if len(node.Errors) > 0 { + continue + } + selectedTasks, selections := evaluateExpression(ts, node) + + for name, ss := range selections { + if selection, exists := allSelections[name]; exists { + ss.merge(selection) + } + allSelections[name] = ss + } + + if len(selectedTasks) == 0 { + log.WithFields("selection", fmt.Sprintf("%q", node.String())).Warn("no cataloger tasks selected found for given selection (this might be a misconfiguration)") + } + + switch node.Operation { + case SetOperation: + finalSet.Add(selectedTasks...) + case AddOperation, "": + addSet.Add(selectedTasks...) + case RemoveOperation: + removeSet.Add(selectedTasks...) + case SubSelectOperation: + selectionSet.Add(selectedTasks...) + default: + nodes[i].Errors = append(nodes[i].Errors, ErrInvalidOperator) + } + } + + if len(selectionSet.tasks) > 0 { + finalSet.Intersect(selectionSet.Tasks()...) + } + finalSet.Remove(removeSet.Tasks()...) + finalSet.Add(addSet.Tasks()...) + + finalTasks := finalSet.Tasks() + + return finalTasks, Selection{ + Result: strset.New(finalTasks.Names()...), + TokensByTask: allSelections, + } +} + +// evaluateExpression returns the set of tasks that match the given expression (as well as all tokens that were matched +// on to reach the returned conclusion). +func evaluateExpression(ts tasks, node Expression) ([]Task, map[string]TokenSelection) { + selection := make(map[string]TokenSelection) + var finalTasks []Task + + for _, t := range ts { + if !isSelected(t, node.Operand) { + continue + } + + s := newTokenSelection(nil, nil) + + switch node.Operation { + case SetOperation, SubSelectOperation, AddOperation: + s.SelectedOn.Add(node.Operand) + case RemoveOperation: + s.DeselectedOn.Add(node.Operand) + } + + finalTasks = append(finalTasks, t) + + if og, exists := selection[t.Name()]; exists { + s.merge(og) + } + + selection[t.Name()] = s + } + return finalTasks, selection +} + +// isSelected returns true if the given task matches the given token. If the token is "all" then the task is always selected. +func isSelected(td Task, token string) bool { + if token == "all" { + return true + } + + if ts, ok := td.(Selector); ok { + // use the selector to verify all tags + if ts.HasAllSelectors(token) { + return true + } + } + + // only do exact name matching + if td.Name() == token { + return true + } + + return false +} diff --git a/internal/task/selection_test.go b/internal/task/selection_test.go new file mode 100644 index 00000000000..d834480df8d --- /dev/null +++ b/internal/task/selection_test.go @@ -0,0 +1,369 @@ +package task + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/cataloging/pkgcataloging" + "github.com/anchore/syft/syft/file" +) + +func dummyTask(name string, tags ...string) Task { + return NewTask(name, func(ctx context.Context, resolver file.Resolver, sbom sbomsync.Builder) error { + panic("not implemented") + }, tags...) +} + +// note: this test fixture does not need to be kept up to date here, but makes a great test subject +func createDummyTasks() tasks { + return []Task{ + // OS package installed catalogers + dummyTask("alpm-db-cataloger", "directory", "installed", "image", "os", "alpm", "archlinux"), + dummyTask("apk-db-cataloger", "directory", "installed", "image", "os", "apk", "alpine"), + dummyTask("dpkg-db-cataloger", "directory", "installed", "image", "os", "dpkg", "debian"), + dummyTask("portage-cataloger", "directory", "installed", "image", "os", "portage", "gentoo"), + dummyTask("rpm-db-cataloger", "directory", "installed", "image", "os", "rpm", "redhat"), + + // OS package declared catalogers + dummyTask("rpm-archive-cataloger", "declared", "directory", "os", "rpm", "redhat"), + + // language-specific package installed catalogers + dummyTask("conan-info-cataloger", "installed", "image", "language", "cpp", "conan"), + dummyTask("javascript-package-cataloger", "installed", "image", "language", "javascript", "node"), + dummyTask("php-composer-installed-cataloger", "installed", "image", "language", "php", "composer"), + dummyTask("ruby-installed-gemspec-cataloger", "installed", "image", "language", "ruby", "gem", "gemspec"), + dummyTask("rust-cargo-lock-cataloger", "installed", "image", "language", "rust", "binary"), + + // language-specific package declared catalogers + dummyTask("conan-cataloger", "declared", "directory", "language", "cpp", "conan"), + dummyTask("dart-pubspec-lock-cataloger", "declared", "directory", "language", "dart"), + dummyTask("dotnet-deps-cataloger", "declared", "directory", "language", "dotnet", "c#"), + dummyTask("elixir-mix-lock-cataloger", "declared", "directory", "language", "elixir"), + dummyTask("erlang-rebar-lock-cataloger", "declared", "directory", "language", "erlang"), + dummyTask("javascript-lock-cataloger", "declared", "directory", "language", "javascript", "node", "npm"), + + // language-specific package for both image and directory scans (but not necessarily declared) + dummyTask("dotnet-portable-executable-cataloger", "directory", "installed", "image", "language", "dotnet", "c#"), + dummyTask("python-installed-package-cataloger", "directory", "installed", "image", "language", "python"), + dummyTask("go-module-binary-cataloger", "directory", "installed", "image", "language", "go", "golang", "gomod", "binary"), + dummyTask("java-archive-cataloger", "directory", "installed", "image", "language", "java", "maven"), + dummyTask("graalvm-native-image-cataloger", "directory", "installed", "image", "language", "java"), + + // other package catalogers + dummyTask("binary-cataloger", "declared", "directory", "image", "binary"), + dummyTask("github-actions-usage-cataloger", "declared", "directory", "github", "github-actions"), + dummyTask("github-action-workflow-usage-cataloger", "declared", "directory", "github", "github-actions"), + dummyTask("sbom-cataloger", "declared", "directory", "image", "sbom"), + } +} + +func TestSelect(t *testing.T) { + + tests := []struct { + name string + allTasks []Task + basis []string + expressions []string + wantNames []string + wantTokens map[string]TokenSelection + wantRequest pkgcataloging.SelectionRequest + wantErr assert.ErrorAssertionFunc + }{ + { + name: "empty input", + allTasks: []Task{}, + basis: []string{}, + expressions: []string{}, + wantNames: []string{}, + wantTokens: map[string]TokenSelection{}, + wantRequest: pkgcataloging.SelectionRequest{}, + }, + { + name: "use default tasks", + allTasks: createDummyTasks(), + basis: []string{ + "image", + }, + expressions: []string{}, + wantNames: []string{ + "alpm-db-cataloger", + "apk-db-cataloger", + "dpkg-db-cataloger", + "portage-cataloger", + "rpm-db-cataloger", + "conan-info-cataloger", + "javascript-package-cataloger", + "php-composer-installed-cataloger", + "ruby-installed-gemspec-cataloger", + "rust-cargo-lock-cataloger", + "dotnet-portable-executable-cataloger", + "python-installed-package-cataloger", + "go-module-binary-cataloger", + "java-archive-cataloger", + "graalvm-native-image-cataloger", + "binary-cataloger", + "sbom-cataloger", + }, + wantTokens: map[string]TokenSelection{ + "alpm-db-cataloger": newTokenSelection([]string{"image"}, nil), + "apk-db-cataloger": newTokenSelection([]string{"image"}, nil), + "dpkg-db-cataloger": newTokenSelection([]string{"image"}, nil), + "portage-cataloger": newTokenSelection([]string{"image"}, nil), + "rpm-db-cataloger": newTokenSelection([]string{"image"}, nil), + "conan-info-cataloger": newTokenSelection([]string{"image"}, nil), + "javascript-package-cataloger": newTokenSelection([]string{"image"}, nil), + "php-composer-installed-cataloger": newTokenSelection([]string{"image"}, nil), + "ruby-installed-gemspec-cataloger": newTokenSelection([]string{"image"}, nil), + "rust-cargo-lock-cataloger": newTokenSelection([]string{"image"}, nil), + "dotnet-portable-executable-cataloger": newTokenSelection([]string{"image"}, nil), + "python-installed-package-cataloger": newTokenSelection([]string{"image"}, nil), + "go-module-binary-cataloger": newTokenSelection([]string{"image"}, nil), + "java-archive-cataloger": newTokenSelection([]string{"image"}, nil), + "graalvm-native-image-cataloger": newTokenSelection([]string{"image"}, nil), + "binary-cataloger": newTokenSelection([]string{"image"}, nil), + "sbom-cataloger": newTokenSelection([]string{"image"}, nil), + }, + wantRequest: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + }, + }, + { + name: "select, add, and remove tasks", + allTasks: createDummyTasks(), + basis: []string{ + "image", + }, + expressions: []string{ + "+github-actions-usage-cataloger", + "-dpkg", + "os", + }, + wantNames: []string{ + "alpm-db-cataloger", + "apk-db-cataloger", + "portage-cataloger", + "rpm-db-cataloger", + "github-actions-usage-cataloger", + }, + wantTokens: map[string]TokenSelection{ + // selected + "alpm-db-cataloger": newTokenSelection([]string{"image", "os"}, nil), + "apk-db-cataloger": newTokenSelection([]string{"image", "os"}, nil), + "dpkg-db-cataloger": newTokenSelection([]string{"image", "os"}, []string{"dpkg"}), + "portage-cataloger": newTokenSelection([]string{"image", "os"}, nil), + "rpm-db-cataloger": newTokenSelection([]string{"image", "os"}, nil), + "github-actions-usage-cataloger": newTokenSelection([]string{"github-actions-usage-cataloger"}, nil), + + // ultimately not selected + "rpm-archive-cataloger": newTokenSelection([]string{"os"}, nil), + "conan-info-cataloger": newTokenSelection([]string{"image"}, nil), + "javascript-package-cataloger": newTokenSelection([]string{"image"}, nil), + "php-composer-installed-cataloger": newTokenSelection([]string{"image"}, nil), + "ruby-installed-gemspec-cataloger": newTokenSelection([]string{"image"}, nil), + "rust-cargo-lock-cataloger": newTokenSelection([]string{"image"}, nil), + "dotnet-portable-executable-cataloger": newTokenSelection([]string{"image"}, nil), + "python-installed-package-cataloger": newTokenSelection([]string{"image"}, nil), + "go-module-binary-cataloger": newTokenSelection([]string{"image"}, nil), + "java-archive-cataloger": newTokenSelection([]string{"image"}, nil), + "graalvm-native-image-cataloger": newTokenSelection([]string{"image"}, nil), + "binary-cataloger": newTokenSelection([]string{"image"}, nil), + "sbom-cataloger": newTokenSelection([]string{"image"}, nil), + }, + wantRequest: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + SubSelectTags: []string{"os"}, + RemoveNamesOrTags: []string{"dpkg"}, + AddNames: []string{"github-actions-usage-cataloger"}, + }, + }, + { + name: "allow for partial selections", + allTasks: createDummyTasks(), + basis: []string{ + "image", + }, + expressions: []string{ + // valid... + "+github-actions-usage-cataloger", + "-dpkg", + "os", + // invalid... + "+python", + "rust-cargo-lock-cataloger", + }, + wantNames: []string{ + "alpm-db-cataloger", + "apk-db-cataloger", + "portage-cataloger", + "rpm-db-cataloger", + "github-actions-usage-cataloger", + }, + wantTokens: map[string]TokenSelection{ + // selected + "alpm-db-cataloger": newTokenSelection([]string{"image", "os"}, nil), + "apk-db-cataloger": newTokenSelection([]string{"image", "os"}, nil), + "dpkg-db-cataloger": newTokenSelection([]string{"image", "os"}, []string{"dpkg"}), + "portage-cataloger": newTokenSelection([]string{"image", "os"}, nil), + "rpm-db-cataloger": newTokenSelection([]string{"image", "os"}, nil), + "github-actions-usage-cataloger": newTokenSelection([]string{"github-actions-usage-cataloger"}, nil), + + // ultimately not selected + "rpm-archive-cataloger": newTokenSelection([]string{"os"}, nil), + "conan-info-cataloger": newTokenSelection([]string{"image"}, nil), + "javascript-package-cataloger": newTokenSelection([]string{"image"}, nil), + "php-composer-installed-cataloger": newTokenSelection([]string{"image"}, nil), + "ruby-installed-gemspec-cataloger": newTokenSelection([]string{"image"}, nil), + "rust-cargo-lock-cataloger": newTokenSelection([]string{"image"}, nil), + "dotnet-portable-executable-cataloger": newTokenSelection([]string{"image"}, nil), + "python-installed-package-cataloger": newTokenSelection([]string{"image"}, nil), // note: there is no python token used for selection + "go-module-binary-cataloger": newTokenSelection([]string{"image"}, nil), + "java-archive-cataloger": newTokenSelection([]string{"image"}, nil), + "graalvm-native-image-cataloger": newTokenSelection([]string{"image"}, nil), + "binary-cataloger": newTokenSelection([]string{"image"}, nil), + "sbom-cataloger": newTokenSelection([]string{"image"}, nil), + }, + wantRequest: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + SubSelectTags: []string{"os", "rust-cargo-lock-cataloger"}, + RemoveNamesOrTags: []string{"dpkg"}, + AddNames: []string{"github-actions-usage-cataloger", "python"}, + }, + wantErr: assert.Error, // !important! + }, + { + name: "select all tasks", + allTasks: createDummyTasks(), + basis: []string{ + "all", + }, + expressions: []string{}, + wantNames: []string{ + "alpm-db-cataloger", + "apk-db-cataloger", + "dpkg-db-cataloger", + "portage-cataloger", + "rpm-db-cataloger", + "rpm-archive-cataloger", + "conan-info-cataloger", + "javascript-package-cataloger", + "php-composer-installed-cataloger", + "ruby-installed-gemspec-cataloger", + "rust-cargo-lock-cataloger", + "conan-cataloger", + "dart-pubspec-lock-cataloger", + "dotnet-deps-cataloger", + "elixir-mix-lock-cataloger", + "erlang-rebar-lock-cataloger", + "javascript-lock-cataloger", + "dotnet-portable-executable-cataloger", + "python-installed-package-cataloger", + "go-module-binary-cataloger", + "java-archive-cataloger", + "graalvm-native-image-cataloger", + "binary-cataloger", + "github-actions-usage-cataloger", + "github-action-workflow-usage-cataloger", + "sbom-cataloger", + }, + wantTokens: map[string]TokenSelection{ + "alpm-db-cataloger": newTokenSelection([]string{"all"}, nil), + "apk-db-cataloger": newTokenSelection([]string{"all"}, nil), + "dpkg-db-cataloger": newTokenSelection([]string{"all"}, nil), + "portage-cataloger": newTokenSelection([]string{"all"}, nil), + "rpm-db-cataloger": newTokenSelection([]string{"all"}, nil), + "rpm-archive-cataloger": newTokenSelection([]string{"all"}, nil), + "conan-info-cataloger": newTokenSelection([]string{"all"}, nil), + "javascript-package-cataloger": newTokenSelection([]string{"all"}, nil), + "php-composer-installed-cataloger": newTokenSelection([]string{"all"}, nil), + "ruby-installed-gemspec-cataloger": newTokenSelection([]string{"all"}, nil), + "rust-cargo-lock-cataloger": newTokenSelection([]string{"all"}, nil), + "conan-cataloger": newTokenSelection([]string{"all"}, nil), + "dart-pubspec-lock-cataloger": newTokenSelection([]string{"all"}, nil), + "dotnet-deps-cataloger": newTokenSelection([]string{"all"}, nil), + "elixir-mix-lock-cataloger": newTokenSelection([]string{"all"}, nil), + "erlang-rebar-lock-cataloger": newTokenSelection([]string{"all"}, nil), + "javascript-lock-cataloger": newTokenSelection([]string{"all"}, nil), + "dotnet-portable-executable-cataloger": newTokenSelection([]string{"all"}, nil), + "python-installed-package-cataloger": newTokenSelection([]string{"all"}, nil), + "go-module-binary-cataloger": newTokenSelection([]string{"all"}, nil), + "java-archive-cataloger": newTokenSelection([]string{"all"}, nil), + "graalvm-native-image-cataloger": newTokenSelection([]string{"all"}, nil), + "binary-cataloger": newTokenSelection([]string{"all"}, nil), + "github-actions-usage-cataloger": newTokenSelection([]string{"all"}, nil), + "github-action-workflow-usage-cataloger": newTokenSelection([]string{"all"}, nil), + "sbom-cataloger": newTokenSelection([]string{"all"}, nil), + }, + wantRequest: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"all"}, + }, + }, + { + name: "set default with multiple tags", + allTasks: createDummyTasks(), + basis: []string{ + "gemspec", + "python", + }, + expressions: []string{}, + wantNames: []string{ + "ruby-installed-gemspec-cataloger", + "python-installed-package-cataloger", + }, + wantTokens: map[string]TokenSelection{ + "ruby-installed-gemspec-cataloger": newTokenSelection([]string{"gemspec"}, nil), + "python-installed-package-cataloger": newTokenSelection([]string{"python"}, nil), + }, + wantRequest: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"gemspec", "python"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = assert.NoError + } + + req := pkgcataloging.NewSelectionRequest().WithDefaults(tt.basis...).WithExpression(tt.expressions...) + + got, gotEvidence, err := Select(tt.allTasks, req) + tt.wantErr(t, err) + if err != nil { + // dev note: this is useful for debugging when needed... + //for _, e := range gotEvidence.Request.Expressions { + // t.Logf("expression (errors %q): %#v", e.Errors, e) + //} + + // note: we DON'T bail early in validations... this is because we should always return the full set of + // of selected tasks and surrounding evidence. + } + + gotNames := make([]string, 0) + for _, g := range got { + gotNames = append(gotNames, g.Name()) + } + + assert.Equal(t, tt.wantNames, gotNames) + + // names in selection should match all tasks returned + require.Len(t, tt.wantNames, gotEvidence.Result.Size(), "selected tasks should match all tasks returned (but does not)") + assert.ElementsMatch(t, tt.wantNames, gotEvidence.Result.List(), "selected tasks should match all tasks returned (but does not)") + + setCompare := cmp.Comparer(func(x, y *strset.Set) bool { + return x.IsEqual(y) + }) + + if d := cmp.Diff(tt.wantTokens, gotEvidence.TokensByTask, setCompare); d != "" { + t.Errorf("unexpected tokens by task (-want +got):\n%s", d) + } + assert.Equal(t, tt.wantRequest, gotEvidence.Request) + + }) + } +} diff --git a/internal/task/set.go b/internal/task/set.go new file mode 100644 index 00000000000..6d2b91dafe4 --- /dev/null +++ b/internal/task/set.go @@ -0,0 +1,70 @@ +package task + +type set struct { + order []string + tasks map[string]Task +} + +func newSet(tasks ...Task) *set { + s := &set{ + order: []string{}, + tasks: make(map[string]Task), + } + + s.Add(tasks...) + + return s +} + +func (ts *set) Len() int { + return len(ts.tasks) +} + +func (ts *set) Add(tasks ...Task) { + for _, t := range tasks { + taskName := t.Name() + if _, exists := ts.tasks[taskName]; exists { + continue + } + ts.tasks[taskName] = t + ts.order = append(ts.order, taskName) + } +} + +func (ts *set) Remove(tasks ...Task) { + for _, t := range tasks { + taskName := t.Name() + if _, exists := ts.tasks[taskName]; !exists { + continue + } + + delete(ts.tasks, taskName) + for i, t := range ts.order { + if t == taskName { + ts.order = append(ts.order[:i], ts.order[i+1:]...) + break + } + } + } +} + +func (ts *set) Intersect(tasks ...Task) { + other := newSet(tasks...) + result := newSet() + for _, taskName := range ts.order { + // we make a new set to prevent the original set from being modified while we are iterating over "order" + if _, exists := other.tasks[taskName]; exists { + // note: keep the original task and ordering + result.Add(ts.tasks[taskName]) + } + } + *ts = *result +} + +func (ts set) Tasks() tasks { + var result []Task + for _, name := range ts.order { + result = append(result, ts.tasks[name]) + } + return result +} diff --git a/internal/task/set_test.go b/internal/task/set_test.go new file mode 100644 index 00000000000..d5bfea8a170 --- /dev/null +++ b/internal/task/set_test.go @@ -0,0 +1,154 @@ +package task + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/file" +) + +var _ Task = (*mockTask)(nil) + +type mockTask struct { + name string +} + +func (m mockTask) Execute(_ context.Context, _ file.Resolver, _ sbomsync.Builder) error { + panic("implement me") +} + +func (m mockTask) Name() string { + return m.name +} + +func Test_set_Add(t *testing.T) { + tests := []struct { + name string + initialTasks []Task + newTasks []Task + expected []string + }{ + { + name: "add unique tasks", + initialTasks: []Task{mockTask{"task2"}, mockTask{"task1"}}, + newTasks: []Task{mockTask{"task3"}}, + expected: []string{ + "task2", // note order is honored + "task1", + "task3", + }, + }, + { + name: "add duplicate tasks", + initialTasks: []Task{mockTask{"task1"}, mockTask{"task2"}}, + newTasks: []Task{mockTask{"task1"}}, + expected: []string{ + "task1", + "task2", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := newSet(tt.initialTasks...) + s.Add(tt.newTasks...) + got := s.Tasks() + var gotNames []string + for _, tsk := range got { + gotNames = append(gotNames, tsk.Name()) + } + assert.Equal(t, tt.expected, gotNames) + }) + } +} + +func Test_set_Remove(t *testing.T) { + tests := []struct { + name string + initialTasks []Task + tasksToRemove []Task + expectedOrder []string + }{ + { + name: "remove existing tasks", + initialTasks: []Task{mockTask{"task1"}, mockTask{"task2"}, mockTask{"task3"}}, + tasksToRemove: []Task{mockTask{"task2"}}, + expectedOrder: []string{"task1", "task3"}, + }, + { + name: "remove non-existing tasks", + initialTasks: []Task{mockTask{"task1"}, mockTask{"task2"}}, + tasksToRemove: []Task{mockTask{"task3"}}, + expectedOrder: []string{"task1", "task2"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := newSet(tt.initialTasks...) + s.Remove(tt.tasksToRemove...) + assert.Equal(t, tt.expectedOrder, s.order) + }) + } +} + +func Test_set_Intersect(t *testing.T) { + tests := []struct { + name string + initialTasks []Task + intersectTasks []Task + expectedOrder []string + }{ + { + name: "intersect with overlapping tasks", + initialTasks: []Task{mockTask{"task1"}, mockTask{"task2"}}, + intersectTasks: []Task{mockTask{"task2"}, mockTask{"task3"}}, + expectedOrder: []string{"task2"}, + }, + { + name: "intersect with non-overlapping tasks", + initialTasks: []Task{mockTask{"task1"}, mockTask{"task4"}}, + intersectTasks: []Task{mockTask{"task2"}, mockTask{"task3"}}, + expectedOrder: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := newSet(tt.initialTasks...) + s.Intersect(tt.intersectTasks...) + assert.Equal(t, tt.expectedOrder, s.order) + }) + } +} + +func Test_set_Tasks(t *testing.T) { + tests := []struct { + name string + initialTasks []Task + expectedTasks tasks + }{ + { + name: "empty set", + initialTasks: []Task{}, + expectedTasks: nil, + }, + { + name: "get tasks from set", + initialTasks: []Task{mockTask{"task1"}, mockTask{"task2"}}, + expectedTasks: []Task{mockTask{"task1"}, mockTask{"task2"}}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := newSet(tt.initialTasks...) + resultTasks := s.Tasks() + assert.Equal(t, tt.expectedTasks, resultTasks) + }) + } +} diff --git a/internal/task/task.go b/internal/task/task.go new file mode 100644 index 00000000000..a1d9add056a --- /dev/null +++ b/internal/task/task.go @@ -0,0 +1,89 @@ +package task + +import ( + "context" + "fmt" + "sort" + + "github.com/scylladb/go-set/strset" + + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/file" +) + +var _ interface { + Task + Selector +} = (*task)(nil) + +// Task is a function that can wrap a cataloger to populate the SBOM with data (coordinated through the mutex). +type Task interface { + Name() string + Execute(context.Context, file.Resolver, sbomsync.Builder) error +} + +type Selector interface { + HasAllSelectors(...string) bool + Selectors() []string +} + +type tasks []Task + +type task struct { + name string + selectors *strset.Set + task func(context.Context, file.Resolver, sbomsync.Builder) error +} + +func NewTask(name string, tsk func(context.Context, file.Resolver, sbomsync.Builder) error, tags ...string) Task { + if tsk == nil { + panic(fmt.Errorf("task cannot be nil")) + } + tags = append(tags, name) + return &task{ + name: name, + selectors: strset.New(tags...), + task: tsk, + } +} + +func (t task) HasAllSelectors(ids ...string) bool { + // tags or name + return t.selectors.Has(ids...) +} + +func (t task) Selectors() []string { + return t.selectors.List() +} + +func (t task) Name() string { + return t.name +} + +func (t task) Execute(ctx context.Context, resolver file.Resolver, sbom sbomsync.Builder) error { + return t.task(ctx, resolver, sbom) +} + +func (ts tasks) Names() []string { + var names []string + for _, td := range ts { + names = append(names, td.Name()) + } + return names +} + +func (ts tasks) Tags() []string { + tags := strset.New() + for _, td := range ts { + if s, ok := td.(Selector); ok { + tags.Add(s.Selectors()...) + } + + tags.Remove(td.Name()) + } + + tagsList := tags.List() + sort.Strings(tagsList) + + return tagsList +} diff --git a/syft/cataloging/config.go b/syft/cataloging/archive_search.go similarity index 63% rename from syft/cataloging/config.go rename to syft/cataloging/archive_search.go index d8e4f397391..bfaf0a1b2a9 100644 --- a/syft/cataloging/config.go +++ b/syft/cataloging/archive_search.go @@ -11,3 +11,13 @@ func DefaultArchiveSearchConfig() ArchiveSearchConfig { IncludeUnindexedArchives: false, } } + +func (c ArchiveSearchConfig) WithIncludeIndexedArchives(include bool) ArchiveSearchConfig { + c.IncludeIndexedArchives = include + return c +} + +func (c ArchiveSearchConfig) WithIncludeUnindexedArchives(include bool) ArchiveSearchConfig { + c.IncludeUnindexedArchives = include + return c +} diff --git a/syft/cataloging/data_generation.go b/syft/cataloging/data_generation.go new file mode 100644 index 00000000000..8505298f7e6 --- /dev/null +++ b/syft/cataloging/data_generation.go @@ -0,0 +1,16 @@ +package cataloging + +type DataGenerationConfig struct { + GenerateCPEs bool `yaml:"generate-cpes" json:"generate-cpes" mapstructure:"generate-cpes"` +} + +func DefaultDataGenerationConfig() DataGenerationConfig { + return DataGenerationConfig{ + GenerateCPEs: true, + } +} + +func (c DataGenerationConfig) WithGenerateCPEs(generate bool) DataGenerationConfig { + c.GenerateCPEs = generate + return c +} diff --git a/syft/cataloging/filecataloging/config.go b/syft/cataloging/filecataloging/config.go new file mode 100644 index 00000000000..80559fb00b2 --- /dev/null +++ b/syft/cataloging/filecataloging/config.go @@ -0,0 +1,78 @@ +package filecataloging + +import ( + "crypto" + "encoding/json" + "fmt" + "strings" + + intFile "github.com/anchore/syft/internal/file" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/file/cataloger/filecontent" +) + +type Config struct { + Selection file.Selection `yaml:"selection" json:"selection" mapstructure:"selection"` + Hashers []crypto.Hash `yaml:"hashers" json:"hashers" mapstructure:"hashers"` + Content filecontent.Config `yaml:"content" json:"content" mapstructure:"content"` +} + +type configMarshaledForm struct { + Selection file.Selection `yaml:"selection" json:"selection" mapstructure:"selection"` + Hashers []string `yaml:"hashers" json:"hashers" mapstructure:"hashers"` + Content filecontent.Config `yaml:"content" json:"content" mapstructure:"content"` +} + +func DefaultConfig() Config { + hashers, err := intFile.Hashers("sha256") + if err != nil { + log.WithFields("error", err).Warn("unable to create file hashers") + } + return Config{ + Selection: file.FilesOwnedByPackageSelection, + Hashers: hashers, + Content: filecontent.DefaultConfig(), + } +} + +func (cfg Config) MarshalJSON() ([]byte, error) { + marshaled := configMarshaledForm{ + Selection: cfg.Selection, + Hashers: hashersToString(cfg.Hashers), + } + return json.Marshal(marshaled) +} + +func hashersToString(hashers []crypto.Hash) []string { + var result []string + for _, h := range hashers { + result = append(result, strings.ToLower(h.String())) + } + return result +} + +func (cfg *Config) UnmarshalJSON(data []byte) error { + var marshaled configMarshaledForm + if err := json.Unmarshal(data, &marshaled); err != nil { + return err + } + + hashers, err := intFile.Hashers(marshaled.Hashers...) + if err != nil { + return fmt.Errorf("unable to parse configured hashers: %w", err) + } + cfg.Selection = marshaled.Selection + cfg.Hashers = hashers + return nil +} + +func (cfg Config) WithSelection(selection file.Selection) Config { + cfg.Selection = selection + return cfg +} + +func (cfg Config) WithHashers(hashers []crypto.Hash) Config { + cfg.Hashers = hashers + return cfg +} diff --git a/syft/cataloging/filecataloging/config_test.go b/syft/cataloging/filecataloging/config_test.go new file mode 100644 index 00000000000..ca62dccb1a1 --- /dev/null +++ b/syft/cataloging/filecataloging/config_test.go @@ -0,0 +1,74 @@ +package filecataloging + +import ( + "crypto" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/file" +) + +func TestConfig_MarshalJSON(t *testing.T) { + tests := []struct { + name string + cfg Config + want []byte + wantErr require.ErrorAssertionFunc + }{ + { + name: "converts hashers to strings", + cfg: Config{ + Selection: file.FilesOwnedByPackageSelection, + Hashers: []crypto.Hash{crypto.SHA256}, + }, + want: []byte(`{"selection":"owned-by-package","hashers":["sha-256"],"content":{"globs":null,"skip-files-above-size":0}}`), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + got, err := tt.cfg.MarshalJSON() + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(got, tt.want); d != "" { + t.Errorf("MarshalJSON() mismatch (-want +got):\n%s", d) + } + }) + } +} + +func TestConfig_UnmarshalJSON(t *testing.T) { + + tests := []struct { + name string + data []byte + want Config + wantErr bool + }{ + { + name: "converts strings to hashers", + data: []byte(`{"selection":"owned-by-package","hashers":["sha-256"]}`), + want: Config{ + Selection: file.FilesOwnedByPackageSelection, + Hashers: []crypto.Hash{crypto.SHA256}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cfg := Config{} + if err := cfg.UnmarshalJSON(tt.data); (err != nil) != tt.wantErr { + t.Errorf("UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + } + + assert.Equal(t, tt.want, cfg) + }) + } +} diff --git a/syft/cataloging/pkgcataloging/cataloger_reference.go b/syft/cataloging/pkgcataloging/cataloger_reference.go new file mode 100644 index 00000000000..715350032b6 --- /dev/null +++ b/syft/cataloging/pkgcataloging/cataloger_reference.go @@ -0,0 +1,23 @@ +package pkgcataloging + +import "github.com/anchore/syft/syft/pkg" + +type CatalogerReference struct { + Cataloger pkg.Cataloger + AlwaysEnabled bool + Tags []string +} + +func NewCatalogerReference(cataloger pkg.Cataloger, tags []string) CatalogerReference { + return CatalogerReference{ + Cataloger: cataloger, + Tags: tags, + } +} + +func NewAlwaysEnabledCatalogerReference(cataloger pkg.Cataloger) CatalogerReference { + return CatalogerReference{ + Cataloger: cataloger, + AlwaysEnabled: true, + } +} diff --git a/syft/cataloging/pkgcataloging/config.go b/syft/cataloging/pkgcataloging/config.go new file mode 100644 index 00000000000..192ab4cf531 --- /dev/null +++ b/syft/cataloging/pkgcataloging/config.go @@ -0,0 +1,58 @@ +package pkgcataloging + +import ( + "github.com/anchore/syft/syft/pkg/cataloger/binary" + "github.com/anchore/syft/syft/pkg/cataloger/golang" + "github.com/anchore/syft/syft/pkg/cataloger/java" + "github.com/anchore/syft/syft/pkg/cataloger/javascript" + "github.com/anchore/syft/syft/pkg/cataloger/kernel" + "github.com/anchore/syft/syft/pkg/cataloger/python" +) + +type Config struct { + Binary binary.CatalogerConfig `yaml:"binary" json:"binary" mapstructure:"binary"` + Golang golang.CatalogerConfig `yaml:"golang" json:"golang" mapstructure:"golang"` + JavaArchive java.ArchiveCatalogerConfig `yaml:"java-archive" json:"java-archive" mapstructure:"java-archive"` + JavaScript javascript.CatalogerConfig `yaml:"javascript" json:"javascript" mapstructure:"javascript"` + LinuxKernel kernel.LinuxKernelCatalogerConfig `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"` + Python python.CatalogerConfig `yaml:"python" json:"python" mapstructure:"python"` +} + +func DefaultConfig() Config { + return Config{ + Golang: golang.DefaultCatalogerConfig(), + LinuxKernel: kernel.DefaultLinuxCatalogerConfig(), + Python: python.DefaultCatalogerConfig(), + JavaArchive: java.DefaultArchiveCatalogerConfig(), + } +} + +func (c Config) WithBinaryConfig(cfg binary.CatalogerConfig) Config { + c.Binary = cfg + return c +} + +func (c Config) WithGolangConfig(cfg golang.CatalogerConfig) Config { + c.Golang = cfg + return c +} + +func (c Config) WithJavascriptConfig(cfg javascript.CatalogerConfig) Config { + c.JavaScript = cfg + return c +} + +func (c Config) WithLinuxKernelConfig(cfg kernel.LinuxKernelCatalogerConfig) Config { + c.LinuxKernel = cfg + return c +} + +func (c Config) WithPythonConfig(cfg python.CatalogerConfig) Config { + c.Python = cfg + return c +} + +func (c Config) WithJavaArchiveConfig(cfg java.ArchiveCatalogerConfig) Config { + c.JavaArchive = cfg + return c +} diff --git a/syft/cataloging/pkgcataloging/selection_request.go b/syft/cataloging/pkgcataloging/selection_request.go new file mode 100644 index 00000000000..7dc711e3ae7 --- /dev/null +++ b/syft/cataloging/pkgcataloging/selection_request.go @@ -0,0 +1,65 @@ +package pkgcataloging + +import ( + "strings" +) + +type SelectionRequest struct { + DefaultNamesOrTags []string `json:"default,omitempty"` + SubSelectTags []string `json:"selection,omitempty"` + AddNames []string `json:"addition,omitempty"` + RemoveNamesOrTags []string `json:"removal,omitempty"` +} + +func NewSelectionRequest() SelectionRequest { + return SelectionRequest{} +} + +func (s SelectionRequest) WithExpression(expressions ...string) SelectionRequest { + expressions = cleanSelection(expressions) + for _, expr := range expressions { + switch { + case strings.HasPrefix(expr, "+"): + s = s.WithAdditions(strings.TrimPrefix(expr, "+")) + case strings.HasPrefix(expr, "-"): + s = s.WithRemovals(strings.TrimPrefix(expr, "-")) + default: + s = s.WithSubSelections(expr) + } + } + return s +} + +func (s SelectionRequest) WithDefaults(nameOrTags ...string) SelectionRequest { + s.DefaultNamesOrTags = append(s.DefaultNamesOrTags, nameOrTags...) + return s +} + +func (s SelectionRequest) WithSubSelections(tags ...string) SelectionRequest { + s.SubSelectTags = append(s.SubSelectTags, tags...) + return s +} + +func (s SelectionRequest) WithAdditions(names ...string) SelectionRequest { + s.AddNames = append(s.AddNames, names...) + return s +} + +func (s SelectionRequest) WithRemovals(nameOrTags ...string) SelectionRequest { + s.RemoveNamesOrTags = append(s.RemoveNamesOrTags, nameOrTags...) + return s +} + +func cleanSelection(tags []string) []string { + var cleaned []string + for _, tag := range tags { + for _, t := range strings.Split(tag, ",") { + t = strings.TrimSpace(t) + if t == "" { + continue + } + cleaned = append(cleaned, t) + } + } + return cleaned +} diff --git a/syft/cataloging/pkgcataloging/tags.go b/syft/cataloging/pkgcataloging/tags.go new file mode 100644 index 00000000000..5c7b46bb077 --- /dev/null +++ b/syft/cataloging/pkgcataloging/tags.go @@ -0,0 +1,24 @@ +package pkgcataloging + +const ( + // InstalledTag is to identify packages found to be positively installed. + InstalledTag = "installed" + + // DeclaredTag is to identify packages described but not necessarily installed. + DeclaredTag = "declared" + + // ImageTag indicates the cataloger should be used when cataloging images. + ImageTag = "image" + + // DirectoryTag indicates the cataloger should be used when cataloging directories. + DirectoryTag = "directory" + + // PackageTag should be used to identify catalogers that are package-based. + PackageTag = "package" + + // OSTag should be used to identify catalogers that cataloging OS packages. + OSTag = "os" + + // LanguageTag should be used to identify catalogers that cataloging language-specific packages. + LanguageTag = "language" +) diff --git a/syft/cataloging/relationships.go b/syft/cataloging/relationships.go new file mode 100644 index 00000000000..1dbbd283235 --- /dev/null +++ b/syft/cataloging/relationships.go @@ -0,0 +1,39 @@ +package cataloging + +type RelationshipsConfig struct { + // PackageFileOwnership will include package-to-file relationships that indicate which files are owned by which packages. + PackageFileOwnership bool `yaml:"package-file-ownership" json:"package-file-ownership" mapstructure:"package-file-ownership"` + + // PackageFileOwnershipOverlap will include package-to-package relationships that indicate one package is owned by another due to files claimed to be owned by one package are also evidence of another package's existence. + // For example, if an RPM package is installed and claims to own /etc/app/package.lock and a separate NPM package was discovered by cataloging /etc/app/package.lock, then the two packages will + // have ownership overlap relationship. + PackageFileOwnershipOverlap bool `yaml:"package-file-ownership-overlap" json:"package-file-ownership-overlap" mapstructure:"package-file-ownership-overlap"` + + // ExcludeBinaryPackagesWithFileOwnershipOverlap will exclude binary packages from the package catalog that are evident by files also owned by another package. + // For example, if a binary package representing the /bin/python binary is discovered and there is a python RPM package installed which claims to + // orn /bin/python, then the binary package will be excluded from the catalog altogether if this configuration is set to true. + ExcludeBinaryPackagesWithFileOwnershipOverlap bool `yaml:"exclude-binary-packages-with-file-ownership-overlap" json:"exclude-binary-packages-with-file-ownership-overlap" mapstructure:"exclude-binary-packages-with-file-ownership-overlap"` +} + +func DefaultRelationshipsConfig() RelationshipsConfig { + return RelationshipsConfig{ + PackageFileOwnership: true, + PackageFileOwnershipOverlap: true, + ExcludeBinaryPackagesWithFileOwnershipOverlap: true, + } +} + +func (c RelationshipsConfig) WithPackageFileOwnership(ownership bool) RelationshipsConfig { + c.PackageFileOwnership = ownership + return c +} + +func (c RelationshipsConfig) WithPackageFileOwnershipOverlap(overlap bool) RelationshipsConfig { + c.PackageFileOwnershipOverlap = overlap + return c +} + +func (c RelationshipsConfig) WithExcludeBinaryPackagesWithFileOwnershipOverlap(exclude bool) RelationshipsConfig { + c.ExcludeBinaryPackagesWithFileOwnershipOverlap = exclude + return c +} diff --git a/syft/cataloging/search.go b/syft/cataloging/search.go new file mode 100644 index 00000000000..762494ade25 --- /dev/null +++ b/syft/cataloging/search.go @@ -0,0 +1,18 @@ +package cataloging + +import "github.com/anchore/syft/syft/source" + +type SearchConfig struct { + Scope source.Scope `yaml:"scope" json:"scope" mapstructure:"scope"` +} + +func DefaultSearchConfig() SearchConfig { + return SearchConfig{ + Scope: source.SquashedScope, + } +} + +func (c SearchConfig) WithScope(scope source.Scope) SearchConfig { + c.Scope = scope + return c +} diff --git a/syft/configuration_audit_trail.go b/syft/configuration_audit_trail.go new file mode 100644 index 00000000000..78f43fece61 --- /dev/null +++ b/syft/configuration_audit_trail.go @@ -0,0 +1,80 @@ +package syft + +import ( + "encoding/json" + "reflect" + + "github.com/anchore/syft/syft/cataloging" + "github.com/anchore/syft/syft/cataloging/filecataloging" + "github.com/anchore/syft/syft/cataloging/pkgcataloging" +) + +// configurationAuditTrail is all input configuration was used to generate the SBOM +type configurationAuditTrail struct { + Search cataloging.SearchConfig `json:"search" yaml:"search" mapstructure:"search"` + Relationships cataloging.RelationshipsConfig `json:"relationships" yaml:"relationships" mapstructure:"relationships"` + DataGeneration cataloging.DataGenerationConfig `json:"data-generation" yaml:"data-generation" mapstructure:"data-generation"` + Packages pkgcataloging.Config `json:"packages" yaml:"packages" mapstructure:"packages"` + Files filecataloging.Config `json:"files" yaml:"files" mapstructure:"files"` + Catalogers catalogerManifest `json:"catalogers" yaml:"catalogers" mapstructure:"catalogers"` + ExtraConfigs any `json:"extra,omitempty" yaml:"extra" mapstructure:"extra"` +} + +type catalogerManifest struct { + Requested pkgcataloging.SelectionRequest `json:"requested" yaml:"requested" mapstructure:"requested"` + Used []string `json:"used" yaml:"used" mapstructure:"used"` +} + +type marshalAPIConfiguration configurationAuditTrail + +func (cfg configurationAuditTrail) MarshalJSON() ([]byte, error) { + // since the api configuration is placed into the SBOM in an empty interface, and we want a stable ordering of + // keys (not guided by the struct ordering) we need to convert the struct to a map. This is best done with + // simply marshalling and unmarshalling. Mapstructure is used to ensure we are honoring all json struct + // tags. Once we have a map, we can lean on the stable ordering of json map keys in the stdlib. This is an + // implementation detail that can be at least relied on until Go 2 (at which point it can change). + // This dance allows us to guarantee ordering of keys in the configuration section of the SBOM. + + initialJSON, err := json.Marshal(marshalAPIConfiguration(cfg)) + if err != nil { + return nil, err + } + + var dataMap map[string]interface{} + if err := json.Unmarshal(initialJSON, &dataMap); err != nil { + return nil, err + } + + if v, exists := dataMap["extra"]; exists && v == nil { + // remove the extra key if it renders as nil + delete(dataMap, "extra") + } + + return marshalSorted(dataMap) +} + +// marshalSorted recursively marshals a map with sorted keys +func marshalSorted(m interface{}) ([]byte, error) { + if reflect.TypeOf(m).Kind() != reflect.Map { + return json.Marshal(m) + } + + val := reflect.ValueOf(m) + sortedMap := make(map[string]interface{}) + + for _, key := range val.MapKeys() { + value := val.MapIndex(key).Interface() + + if value != nil && reflect.TypeOf(value).Kind() == reflect.Map { + sortedValue, err := marshalSorted(value) + if err != nil { + return nil, err + } + sortedMap[key.String()] = json.RawMessage(sortedValue) + } else { + sortedMap[key.String()] = value + } + } + + return json.Marshal(sortedMap) +} diff --git a/syft/configuration_audit_trail_test.go b/syft/configuration_audit_trail_test.go new file mode 100644 index 00000000000..56e9a20d78e --- /dev/null +++ b/syft/configuration_audit_trail_test.go @@ -0,0 +1,346 @@ +package syft + +import ( + "bytes" + "crypto" + "encoding/json" + "fmt" + "io" + "reflect" + "sort" + "testing" + + "github.com/hashicorp/go-multierror" + "github.com/iancoleman/strcase" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/cataloging/filecataloging" + "github.com/anchore/syft/syft/file" +) + +func Test_configurationAuditTrail_StructTags(t *testing.T) { + // we need to ensure that the output for any configuration is well-formed and follows conventions. + // We ensure that: + // 1. all fields have a JSON tag + // 2. the tag value follows lowercase kebab-case style + + jsonTags := getJSONTags(t, configurationAuditTrail{}) + + for _, tag := range jsonTags { + assertLowercaseKebab(t, tag) + } + +} + +func getJSONTags(t *testing.T, v interface{}) []string { + var tags []string + err := collectJSONTags(t, reflect.ValueOf(v), &tags, "", "") + require.NoError(t, err) + return tags +} + +func collectJSONTags(t *testing.T, v reflect.Value, tags *[]string, parentTag string, path string) error { + var errs error + + if v.Kind() == reflect.Ptr { + v = v.Elem() + } + + if v.Kind() != reflect.Struct { + return errs + } + + tType := v.Type() + for i := 0; i < v.NumField(); i++ { + field := v.Field(i) + fieldType := tType.Field(i) + + curPath := path + "." + fieldType.Name + + // account for embeddings + if fieldType.Anonymous { + embeddedField := field + + if embeddedField.Kind() == reflect.Ptr { + // this can be enhanced in the future if the need arises... + errs = multierror.Append(errs, fmt.Errorf("field '%s' is a pointer to an embedded struct, this is not supported in the test helper", curPath)) + } + + if embeddedField.Kind() == reflect.Struct { + err := collectJSONTags(t, field, tags, parentTag, curPath) + if err != nil { + errs = multierror.Append(errs, err) + } + } + + continue + } + + var tag string + var ok bool + if fieldType.PkgPath == "" { + tag, ok = fieldType.Tag.Lookup("json") + if !ok || (tag == "" && parentTag == "") { + errs = multierror.Append(errs, fmt.Errorf("field '%s' does not have a json tag", curPath)) + return errs + } + if tag != "" && tag != "-" { + *tags = append(*tags, tag) + } + } + + if field.Kind() == reflect.Struct || (field.Kind() == reflect.Ptr && field.Elem().Kind() == reflect.Struct) { + err := collectJSONTags(t, field, tags, tag, curPath) + if err != nil { + errs = multierror.Append(errs, err) + } + } + } + return errs +} + +func assertLowercaseKebab(t *testing.T, tag string) { + t.Helper() + require.NotEmpty(t, tag) + assert.Equal(t, strcase.ToKebab(tag), tag) +} + +func Test_collectJSONTags(t *testing.T) { + // though this is not used in production, this is a sensitive and complex enough of a check to warrant testing the test helper. + type good struct { + A string `json:"a"` + } + + type missing struct { + A string `json:"a"` + B string + } + + type exclude struct { + A string `json:"a"` + B string `json:"-"` + } + + type goodEmbedded struct { + good `json:""` + } + + type badEmbedded struct { + missing `json:""` + } + + // simply not covered and require further development to support + type goodPtrEmbedded struct { + *good `json:""` + } + + // simply not covered and require further development to support + type badPtrEmbedded struct { + *missing `json:""` + } + + tests := []struct { + name string + v interface{} + want []string + wantErr require.ErrorAssertionFunc + }{ + { + name: "good", + v: good{}, + want: []string{ + "a", + }, + }, + { + name: "missing", + v: missing{}, + wantErr: require.Error, + }, + { + name: "exclude", + v: exclude{}, + want: []string{ + "a", + }, + }, + { + name: "bad embedded", + v: badEmbedded{}, + wantErr: require.Error, + }, + { + name: "good embedded", + v: goodEmbedded{}, + want: []string{ + "a", + }, + }, + // these cases are simply not covered and require further development to support + { + name: "bad ptr embedded", + v: badPtrEmbedded{}, + wantErr: require.Error, + }, + { + name: "good ptr embedded", + v: goodPtrEmbedded{}, + want: []string{ + "a", + }, + wantErr: require.Error, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + + var tags []string + + err := collectJSONTags(t, reflect.ValueOf(tt.v), &tags, "", "") + + tt.wantErr(t, err) + if err != nil { + return + } + + assert.Equal(t, tt.want, tags) + }) + } + +} + +func Test_configurationAuditTrail_MarshalJSON(t *testing.T) { + + tests := []struct { + name string + cfg configurationAuditTrail + assert func(t *testing.T, got []byte) + }{ + { + name: "ensure other marshallers are called", + cfg: configurationAuditTrail{ + + Files: filecataloging.Config{ + Selection: file.FilesOwnedByPackageSelection, + Hashers: []crypto.Hash{ + crypto.SHA256, + }, + }, + }, + // the custom file marshaller swaps ints for strings for hashers + assert: func(t *testing.T, got []byte) { + assert.Contains(t, string(got), `"hashers":["sha-256"]`) + }, + }, + { + name: "ensure maps are sorted", + cfg: configurationAuditTrail{}, + assert: func(t *testing.T, got []byte) { + assert.NoError(t, assertJSONKeysSorted(got)) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + got, err := tt.cfg.MarshalJSON() + require.NoError(t, err) + if tt.assert == nil { + t.Fatal("assert function must be provided") + } + tt.assert(t, got) + + }) + } +} + +// assertJSONKeysSorted asserts that all keys in JSON maps are sorted. +func assertJSONKeysSorted(jsonBytes []byte) error { + var errs error + decoder := json.NewDecoder(bytes.NewReader(jsonBytes)) + var keys []string + var inObject bool + + for { + token, err := decoder.Token() + if err != nil { + if err == io.EOF { + break + } + errs = multierror.Append(errs, fmt.Errorf("error decoding JSON: %w", err)) + } + + switch v := token.(type) { + case json.Delim: + switch v { + case '{': + inObject = true + keys = nil // Reset keys for a new object + case '}': + inObject = false + if !sort.StringsAreSorted(keys) { + errs = multierror.Append(errs, fmt.Errorf("Keys are not sorted: %v", keys)) + } + } + case string: + if inObject && v != "" { + keys = append(keys, v) + } + } + } + return errs +} + +func Test_assertJSONKeysSorted(t *testing.T) { + // this test function is sufficiently complicated enough to warrant its own test... + + sorted := []byte(`{"a":1,"b":2}`) + unsorted := []byte(`{"b":2,"a":1}`) + + nestedSorted := []byte(`{"a":1,"b":{"a":1,"b":2}}`) + nestedUnsorted := []byte(`{"a":1,"b":{"b":2,"a":1}}`) + + tests := []struct { + name string + json []byte + wantErr require.ErrorAssertionFunc + }{ + { + name: "sorted", + json: sorted, + wantErr: require.NoError, + }, + { + name: "unsorted", + json: unsorted, + wantErr: require.Error, + }, + { + name: "nested sorted", + json: nestedSorted, + wantErr: require.NoError, + }, + { + name: "nested unsorted", + json: nestedUnsorted, + wantErr: require.Error, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + + err := assertJSONKeysSorted(tt.json) + tt.wantErr(t, err) + }) + + } +} diff --git a/syft/create_sbom.go b/syft/create_sbom.go new file mode 100644 index 00000000000..5acf9e50100 --- /dev/null +++ b/syft/create_sbom.go @@ -0,0 +1,145 @@ +package syft + +import ( + "context" + "fmt" + "sort" + "time" + + "github.com/dustin/go-humanize" + "github.com/scylladb/go-set/strset" + "github.com/wagoodman/go-progress" + + "github.com/anchore/syft/internal/bus" + "github.com/anchore/syft/internal/relationship" + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/internal/task" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/event/monitor" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +// CreateSBOM creates a software bill-of-materials from the given source. If the CreateSBOMConfig is nil, then +// default options will be used. +func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) (*sbom.SBOM, error) { + if cfg == nil { + cfg = DefaultCreateSBOMConfig() + } + if err := cfg.validate(); err != nil { + return nil, fmt.Errorf("invalid configuration: %w", err) + } + + srcMetadata := src.Describe() + + taskGroups, audit, err := cfg.makeTaskGroups(srcMetadata) + if err != nil { + return nil, err + } + + resolver, err := src.FileResolver(cfg.Search.Scope) + if err != nil { + return nil, fmt.Errorf("unable to get file resolver: %w", err) + } + + s := sbom.SBOM{ + Source: srcMetadata, + Descriptor: sbom.Descriptor{ + Name: cfg.ToolName, + Version: cfg.ToolVersion, + Configuration: configurationAuditTrail{ + Search: cfg.Search, + Relationships: cfg.Relationships, + DataGeneration: cfg.DataGeneration, + Packages: cfg.Packages, + Files: cfg.Files, + Catalogers: *audit, + ExtraConfigs: cfg.ToolConfiguration, + }, + }, + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(), + }, + } + + catalogingProgress := monitorCatalogingTask(src.ID(), taskGroups) + packageCatalogingProgress := monitorPackageCatalogingTask(s.Artifacts.Packages) + + builder := sbomsync.NewBuilder(&s) + for i := range taskGroups { + err := task.NewTaskExecutor(taskGroups[i], cfg.Parallelism).Execute(ctx, resolver, builder, catalogingProgress) + if err != nil { + // TODO: tie this to the open progress monitors... + return nil, fmt.Errorf("failed to run tasks: %w", err) + } + } + + packageCatalogingProgress.SetCompleted() + catalogingProgress.SetCompleted() + + relationship.Finalize(builder, cfg.Relationships, src) + + return &s, nil +} + +func monitorPackageCatalogingTask(pkgs *pkg.Collection) *monitor.CatalogerTaskProgress { + info := monitor.GenericTask{ + Title: monitor.Title{ + Default: "Packages", + }, + ID: monitor.PackageCatalogingTaskID, + HideOnSuccess: false, + ParentID: monitor.TopLevelCatalogingTaskID, + } + + prog := bus.StartCatalogerTask(info, -1, "") + + go func() { + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + for { + <-ticker.C + + count := humanize.Comma(int64(pkgs.PackageCount())) + prog.AtomicStage.Set(fmt.Sprintf("%s packages", count)) + + if progress.IsCompleted(prog) { + break + } + } + }() + + return prog +} + +func monitorCatalogingTask(srcID artifact.ID, tasks [][]task.Task) *monitor.CatalogerTaskProgress { + info := monitor.GenericTask{ + Title: monitor.Title{ + Default: "Catalog contents", + WhileRunning: "Cataloging contents", + OnSuccess: "Cataloged contents", + }, + ID: monitor.TopLevelCatalogingTaskID, + Context: string(srcID), + HideOnSuccess: false, + } + + var length int64 + for _, tg := range tasks { + length += int64(len(tg)) + } + + return bus.StartCatalogerTask(info, length, "") +} + +func formatTaskNames(tasks []task.Task) []string { + set := strset.New() + for _, td := range tasks { + set.Add(td.Name()) + } + list := set.List() + sort.Strings(list) + return list +} diff --git a/syft/create_sbom_config.go b/syft/create_sbom_config.go new file mode 100644 index 00000000000..14ef5a0fc40 --- /dev/null +++ b/syft/create_sbom_config.go @@ -0,0 +1,335 @@ +package syft + +import ( + "context" + "errors" + "fmt" + "strings" + + "github.com/anchore/syft/internal/task" + "github.com/anchore/syft/syft/cataloging" + "github.com/anchore/syft/syft/cataloging/filecataloging" + "github.com/anchore/syft/syft/cataloging/pkgcataloging" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +// CreateSBOMConfig specifies all parameters needed for creating an SBOM. +type CreateSBOMConfig struct { + // required configuration input to specify how cataloging should be performed + Search cataloging.SearchConfig + Relationships cataloging.RelationshipsConfig + DataGeneration cataloging.DataGenerationConfig + Packages pkgcataloging.Config + Files filecataloging.Config + Parallelism int + CatalogerSelection pkgcataloging.SelectionRequest + + // audit what tool is being used to generate the SBOM + ToolName string + ToolVersion string + ToolConfiguration interface{} + + packageTaskFactories task.PackageTaskFactories + packageCatalogerReferences []pkgcataloging.CatalogerReference +} + +func DefaultCreateSBOMConfig() *CreateSBOMConfig { + return &CreateSBOMConfig{ + Search: cataloging.DefaultSearchConfig(), + Relationships: cataloging.DefaultRelationshipsConfig(), + DataGeneration: cataloging.DefaultDataGenerationConfig(), + Packages: pkgcataloging.DefaultConfig(), + Files: filecataloging.DefaultConfig(), + Parallelism: 1, + packageTaskFactories: task.DefaultPackageTaskFactories(), + } +} + +// WithTool allows for setting the specific name, version, and any additional configuration that is not captured +// in the syft default API configuration. This could cover inputs for catalogers that were user-provided, thus, +// is not visible to the syft API, but would be useful to see in the SBOM output. +func (c *CreateSBOMConfig) WithTool(name, version string, cfg ...any) *CreateSBOMConfig { + c.ToolName = name + c.ToolVersion = version + c.ToolConfiguration = cfg + return c +} + +// WithParallelism allows for setting the number of concurrent cataloging tasks that can be performed at once +func (c *CreateSBOMConfig) WithParallelism(p int) *CreateSBOMConfig { + if p < 1 { + // TODO: warn? + p = 1 + } + c.Parallelism = p + return c +} + +// WithSearchConfig allows for setting the specific search configuration for cataloging. +func (c *CreateSBOMConfig) WithSearchConfig(cfg cataloging.SearchConfig) *CreateSBOMConfig { + c.Search = cfg + return c +} + +// WithRelationshipsConfig allows for defining the specific relationships that should be captured during cataloging. +func (c *CreateSBOMConfig) WithRelationshipsConfig(cfg cataloging.RelationshipsConfig) *CreateSBOMConfig { + c.Relationships = cfg + return c +} + +// WithDataGenerationConfig allows for defining what data elements that cannot be discovered from the underlying +// target being scanned that should be generated after package creation. +func (c *CreateSBOMConfig) WithDataGenerationConfig(cfg cataloging.DataGenerationConfig) *CreateSBOMConfig { + c.DataGeneration = cfg + return c +} + +// WithPackagesConfig allows for defining any specific behavior for syft-implemented catalogers. +func (c *CreateSBOMConfig) WithPackagesConfig(cfg pkgcataloging.Config) *CreateSBOMConfig { + c.Packages = cfg + return c +} + +// WithFilesConfig allows for defining file-based cataloging parameters. +func (c *CreateSBOMConfig) WithFilesConfig(cfg filecataloging.Config) *CreateSBOMConfig { + c.Files = cfg + return c +} + +// WithoutFiles allows for disabling file cataloging altogether. +func (c *CreateSBOMConfig) WithoutFiles() *CreateSBOMConfig { + c.Files = filecataloging.Config{ + Selection: file.NoFilesSelection, + Hashers: nil, + } + return c +} + +// WithCatalogerSelection allows for adding to, removing from, or sub-selecting the final set of catalogers by name or tag. +func (c *CreateSBOMConfig) WithCatalogerSelection(selection pkgcataloging.SelectionRequest) *CreateSBOMConfig { + c.CatalogerSelection = selection + return c +} + +// WithoutCatalogers removes all catalogers from the final set of catalogers. This is useful if you want to only use +// user-provided catalogers (without the default syft-provided catalogers). +func (c *CreateSBOMConfig) WithoutCatalogers() *CreateSBOMConfig { + c.packageTaskFactories = nil + c.packageCatalogerReferences = nil + return c +} + +// WithCatalogers allows for adding user-provided catalogers to the final set of catalogers that will always be run +// regardless of the source type or any cataloger selections provided. +func (c *CreateSBOMConfig) WithCatalogers(catalogerRefs ...pkgcataloging.CatalogerReference) *CreateSBOMConfig { + c.packageCatalogerReferences = append(c.packageCatalogerReferences, catalogerRefs...) + + return c +} + +// makeTaskGroups considers the entire configuration and finalizes the set of tasks to be run. Tasks are run in +// groups, where each task in a group can be run concurrently, while tasks in different groups must be run serially. +// The final set of task groups is returned along with a cataloger manifest that describes the catalogers that were +// selected and the tokens that were sensitive to this selection (both for adding and removing from the final set). +func (c *CreateSBOMConfig) makeTaskGroups(src source.Description) ([][]task.Task, *catalogerManifest, error) { + var taskGroups [][]task.Task + + // generate package and file tasks based on the configuration + environmentTasks := c.environmentTasks() + relationshipsTasks := c.relationshipTasks(src) + fileTasks := c.fileTasks() + pkgTasks, selectionEvidence, err := c.packageTasks(src) + if err != nil { + return nil, nil, err + } + + // combine the user-provided and configured tasks + if c.Files.Selection == file.FilesOwnedByPackageSelection { + // special case: we need the package info when we are cataloging files owned by packages + taskGroups = append(taskGroups, pkgTasks, fileTasks) + } else { + taskGroups = append(taskGroups, append(pkgTasks, fileTasks...)) + } + + // all relationship work must be done after all nodes (files and packages) have been cataloged + if len(relationshipsTasks) > 0 { + taskGroups = append(taskGroups, relationshipsTasks) + } + + // identifying the environment (i.e. the linux release) must be done first as this is required for package cataloging + taskGroups = append( + [][]task.Task{ + environmentTasks, + }, + taskGroups..., + ) + + return taskGroups, &catalogerManifest{ + Requested: selectionEvidence.Request, + Used: formatTaskNames(pkgTasks), + }, nil +} + +// fileTasks returns the set of tasks that should be run to catalog files. +func (c *CreateSBOMConfig) fileTasks() []task.Task { + var tsks []task.Task + + if t := task.NewFileDigestCatalogerTask(c.Files.Selection, c.Files.Hashers...); t != nil { + tsks = append(tsks, t) + } + if t := task.NewFileMetadataCatalogerTask(c.Files.Selection); t != nil { + tsks = append(tsks, t) + } + if t := task.NewFileContentCatalogerTask(c.Files.Content); t != nil { + tsks = append(tsks, t) + } + + return tsks +} + +// packageTasks returns the set of tasks that should be run to catalog packages. +func (c *CreateSBOMConfig) packageTasks(src source.Description) ([]task.Task, *task.Selection, error) { + cfg := task.CatalogingFactoryConfig{ + SearchConfig: c.Search, + RelationshipsConfig: c.Relationships, + DataGenerationConfig: c.DataGeneration, + PackagesConfig: c.Packages, + } + + persistentTasks, selectableTasks, err := c.allPackageTasks(cfg) + if err != nil { + return nil, nil, fmt.Errorf("unable to create package cataloger tasks: %w", err) + } + + req, err := finalSelectionRequest(c.CatalogerSelection, src) + if err != nil { + return nil, nil, err + } + + finalTasks, selection, err := task.Select(selectableTasks, *req) + if err != nil { + return nil, nil, err + } + + finalTasks = append(finalTasks, persistentTasks...) + + if len(finalTasks) == 0 { + return nil, nil, fmt.Errorf("no catalogers selected") + } + + return finalTasks, &selection, nil +} + +func finalSelectionRequest(req pkgcataloging.SelectionRequest, src source.Description) (*pkgcataloging.SelectionRequest, error) { + if len(req.DefaultNamesOrTags) == 0 { + defaultTag, err := findDefaultTag(src) + if err != nil { + return nil, fmt.Errorf("unable to determine default cataloger tag: %w", err) + } + + if defaultTag != "" { + req.DefaultNamesOrTags = append(req.DefaultNamesOrTags, defaultTag) + } + + req.RemoveNamesOrTags = replaceDefaultTagReferences(defaultTag, req.RemoveNamesOrTags) + req.SubSelectTags = replaceDefaultTagReferences(defaultTag, req.SubSelectTags) + } + + return &req, nil +} + +func (c *CreateSBOMConfig) allPackageTasks(cfg task.CatalogingFactoryConfig) ([]task.Task, []task.Task, error) { + persistentPackageTasks, selectablePackageTasks, err := c.userPackageTasks(cfg) + if err != nil { + return nil, nil, err + } + + tsks, err := c.packageTaskFactories.Tasks(cfg) + if err != nil { + return nil, nil, fmt.Errorf("unable to create package cataloger tasks: %w", err) + } + + return persistentPackageTasks, append(tsks, selectablePackageTasks...), nil +} + +func (c *CreateSBOMConfig) userPackageTasks(cfg task.CatalogingFactoryConfig) ([]task.Task, []task.Task, error) { + var ( + persistentPackageTasks []task.Task + selectablePackageTasks []task.Task + ) + + for _, catalogerRef := range c.packageCatalogerReferences { + if catalogerRef.Cataloger == nil { + return nil, nil, errors.New("provided cataloger reference without a cataloger") + } + if catalogerRef.AlwaysEnabled { + persistentPackageTasks = append(persistentPackageTasks, task.NewPackageTask(cfg, catalogerRef.Cataloger, catalogerRef.Tags...)) + continue + } + if len(catalogerRef.Tags) == 0 { + return nil, nil, errors.New("provided cataloger reference without tags") + } + selectablePackageTasks = append(selectablePackageTasks, task.NewPackageTask(cfg, catalogerRef.Cataloger, catalogerRef.Tags...)) + } + + return persistentPackageTasks, selectablePackageTasks, nil +} + +// relationshipTasks returns the set of tasks that should be run to generate additional relationships as well as +// prune existing relationships. +func (c *CreateSBOMConfig) relationshipTasks(src source.Description) []task.Task { + var tsks []task.Task + + if t := task.NewRelationshipsTask(c.Relationships, src); t != nil { + tsks = append(tsks, t) + } + return tsks +} + +// environmentTasks returns the set of tasks that should be run to identify what is being scanned or the context +// of where it is being scanned. Today this is used to identify the linux distribution release for container images +// being scanned. +func (c *CreateSBOMConfig) environmentTasks() []task.Task { + var tsks []task.Task + + if t := task.NewEnvironmentTask(); t != nil { + tsks = append(tsks, t) + } + return tsks +} + +func (c *CreateSBOMConfig) validate() error { + if c.Relationships.ExcludeBinaryPackagesWithFileOwnershipOverlap { + if !c.Relationships.PackageFileOwnershipOverlap { + return fmt.Errorf("invalid configuration: to exclude binary packages based on file ownership overlap relationships, cataloging file ownership overlap relationships must be enabled") + } + } + return nil +} + +// Create creates an SBOM from the given source with the current SBOM configuration. +func (c *CreateSBOMConfig) Create(ctx context.Context, src source.Source) (*sbom.SBOM, error) { + return CreateSBOM(ctx, src, c) +} + +func findDefaultTag(src source.Description) (string, error) { + switch m := src.Metadata.(type) { + case source.StereoscopeImageSourceMetadata: + return pkgcataloging.ImageTag, nil + case source.FileSourceMetadata, source.DirectorySourceMetadata: + return pkgcataloging.DirectoryTag, nil + default: + return "", fmt.Errorf("unable to determine default cataloger tag for source type=%T", m) + } +} + +func replaceDefaultTagReferences(defaultTag string, lst []string) []string { + for i, tag := range lst { + if strings.ToLower(tag) == "default" { + lst[i] = defaultTag + } + } + return lst +} diff --git a/syft/create_sbom_config_test.go b/syft/create_sbom_config_test.go new file mode 100644 index 00000000000..760787d7795 --- /dev/null +++ b/syft/create_sbom_config_test.go @@ -0,0 +1,502 @@ +package syft + +import ( + "sort" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/internal/task" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/cataloging" + "github.com/anchore/syft/syft/cataloging/filecataloging" + "github.com/anchore/syft/syft/cataloging/pkgcataloging" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" +) + +var _ pkg.Cataloger = (*dummyCataloger)(nil) + +type dummyCataloger struct { + name string +} + +func newDummyCataloger(name string) pkg.Cataloger { + return dummyCataloger{name: name} +} + +func (d dummyCataloger) Name() string { + return d.name +} + +func (d dummyCataloger) Catalog(_ file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { + return nil, nil, nil +} + +func TestCreateSBOMConfig_makeTaskGroups(t *testing.T) { + pkgIntersect := func(intersect ...string) []string { + var sets []*strset.Set + for _, s := range intersect { + sets = append(sets, strset.New(pkgCatalogerNamesWithTagOrName(t, s)...)) + } + + intersectSet := strset.Intersection(sets...) + + slice := intersectSet.List() + + sort.Strings(slice) + + return slice + } + + addTo := func(slice []string, add ...string) []string { + slice = append(slice, add...) + sort.Strings(slice) + return slice + } + + imgSrc := source.Description{ + Metadata: source.StereoscopeImageSourceMetadata{}, + } + + dirSrc := source.Description{ + Metadata: source.DirectorySourceMetadata{}, + } + + fileSrc := source.Description{ + Metadata: source.FileSourceMetadata{}, + } + + tests := []struct { + name string + src source.Description + cfg *CreateSBOMConfig + wantTaskNames [][]string + wantManifest *catalogerManifest + wantErr require.ErrorAssertionFunc + }{ + { + name: "default catalogers for image source", + src: imgSrc, + cfg: DefaultCreateSBOMConfig(), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + pkgCatalogerNamesWithTagOrName(t, "image"), + fileCatalogerNames(true, true), + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + }, + Used: pkgCatalogerNamesWithTagOrName(t, "image"), + }, + wantErr: require.NoError, + }, + { + name: "default catalogers for directory source", + src: dirSrc, + cfg: DefaultCreateSBOMConfig(), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + pkgCatalogerNamesWithTagOrName(t, "directory"), + fileCatalogerNames(true, true), + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"directory"}, + }, + Used: pkgCatalogerNamesWithTagOrName(t, "directory"), + }, + wantErr: require.NoError, + }, + { + // note, the file source acts like a directory scan + name: "default catalogers for file source", + src: fileSrc, + cfg: DefaultCreateSBOMConfig(), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + pkgCatalogerNamesWithTagOrName(t, "directory"), + fileCatalogerNames(true, true), + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"directory"}, + }, + Used: pkgCatalogerNamesWithTagOrName(t, "directory"), + }, + wantErr: require.NoError, + }, + { + name: "no file digest cataloger", + src: imgSrc, + cfg: DefaultCreateSBOMConfig().WithFilesConfig(filecataloging.DefaultConfig().WithHashers(nil)), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + pkgCatalogerNamesWithTagOrName(t, "image"), + fileCatalogerNames(false, true), // note: the digest cataloger is not included + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + }, + Used: pkgCatalogerNamesWithTagOrName(t, "image"), + }, + wantErr: require.NoError, + }, + { + name: "select no file catalogers", + src: imgSrc, + cfg: DefaultCreateSBOMConfig().WithFilesConfig(filecataloging.DefaultConfig().WithSelection(file.NoFilesSelection)), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + pkgCatalogerNamesWithTagOrName(t, "image"), + // note: there are no file catalogers in their own group + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + }, + Used: pkgCatalogerNamesWithTagOrName(t, "image"), + }, + wantErr: require.NoError, + }, + { + name: "select all file catalogers", + src: imgSrc, + cfg: DefaultCreateSBOMConfig().WithFilesConfig(filecataloging.DefaultConfig().WithSelection(file.AllFilesSelection)), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + // note: there is a single group of catalogers for pkgs and files + append( + pkgCatalogerNamesWithTagOrName(t, "image"), + fileCatalogerNames(true, true)..., + ), + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + }, + Used: pkgCatalogerNamesWithTagOrName(t, "image"), + }, + wantErr: require.NoError, + }, + { + name: "user-provided persistent cataloger is always run (image)", + src: imgSrc, + cfg: DefaultCreateSBOMConfig().WithCatalogers( + pkgcataloging.NewAlwaysEnabledCatalogerReference(newDummyCataloger("persistent")), + ), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + addTo(pkgCatalogerNamesWithTagOrName(t, "image"), "persistent"), + fileCatalogerNames(true, true), + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + }, + Used: addTo(pkgCatalogerNamesWithTagOrName(t, "image"), "persistent"), + }, + wantErr: require.NoError, + }, + { + name: "user-provided persistent cataloger is always run (directory)", + src: dirSrc, + cfg: DefaultCreateSBOMConfig().WithCatalogers( + pkgcataloging.NewAlwaysEnabledCatalogerReference(newDummyCataloger("persistent")), + ), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + addTo(pkgCatalogerNamesWithTagOrName(t, "directory"), "persistent"), + fileCatalogerNames(true, true), + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"directory"}, + }, + Used: addTo(pkgCatalogerNamesWithTagOrName(t, "directory"), "persistent"), + }, + wantErr: require.NoError, + }, + { + name: "user-provided persistent cataloger is always run (user selection does not affect this)", + src: imgSrc, + cfg: DefaultCreateSBOMConfig().WithCatalogers( + pkgcataloging.NewAlwaysEnabledCatalogerReference(newDummyCataloger("persistent")), + ).WithCatalogerSelection(pkgcataloging.NewSelectionRequest().WithSubSelections("javascript")), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + addTo(pkgIntersect("image", "javascript"), "persistent"), + fileCatalogerNames(true, true), + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + SubSelectTags: []string{"javascript"}, + }, + Used: addTo(pkgIntersect("image", "javascript"), "persistent"), + }, + wantErr: require.NoError, + }, + { + name: "user-provided cataloger runs when selected", + src: imgSrc, + cfg: DefaultCreateSBOMConfig().WithCatalogers( + pkgcataloging.NewCatalogerReference(newDummyCataloger("user-provided"), []string{"image"}), + ), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + addTo(pkgCatalogerNamesWithTagOrName(t, "image"), "user-provided"), + fileCatalogerNames(true, true), + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + }, + Used: addTo(pkgCatalogerNamesWithTagOrName(t, "image"), "user-provided"), + }, + wantErr: require.NoError, + }, + { + name: "user-provided cataloger NOT run when NOT selected", + src: imgSrc, + cfg: DefaultCreateSBOMConfig().WithCatalogers( + pkgcataloging.NewCatalogerReference(newDummyCataloger("user-provided"), []string{"bogus-selector-will-never-be-used"}), + ), + wantTaskNames: [][]string{ + environmentCatalogerNames(), + pkgCatalogerNamesWithTagOrName(t, "image"), + fileCatalogerNames(true, true), + relationshipCatalogerNames(), + }, + wantManifest: &catalogerManifest{ + Requested: pkgcataloging.SelectionRequest{ + DefaultNamesOrTags: []string{"image"}, + }, + Used: pkgCatalogerNamesWithTagOrName(t, "image"), + }, + wantErr: require.NoError, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + + // sanity check + require.NotEmpty(t, tt.wantTaskNames) + for _, group := range tt.wantTaskNames { + require.NotEmpty(t, group) + } + + // test the subject + gotTasks, gotManifest, err := tt.cfg.makeTaskGroups(tt.src) + tt.wantErr(t, err) + if err != nil { + return + } + + gotNames := taskGroupNames(gotTasks) + + if d := cmp.Diff( + tt.wantTaskNames, + gotNames, + // order within a group does not matter + cmpopts.SortSlices(func(a, b string) bool { + return a < b + }), + ); d != "" { + t.Errorf("mismatched task group names (-want +got):\n%s", d) + } + + if d := cmp.Diff(tt.wantManifest, gotManifest); d != "" { + t.Errorf("mismatched cataloger manifest (-want +got):\n%s", d) + } + }) + } +} + +func pkgCatalogerNamesWithTagOrName(t *testing.T, token string) []string { + var names []string + cfg := task.DefaultCatalogingFactoryConfig() + for _, factory := range task.DefaultPackageTaskFactories() { + cat := factory(cfg) + + name := cat.Name() + + if selector, ok := cat.(task.Selector); ok { + if selector.HasAllSelectors(token) { + names = append(names, name) + continue + } + } + if name == token { + names = append(names, name) + } + } + + // these thresholds are arbitrary but should be large enough to catch any major changes + switch token { + case "image": + require.Greater(t, len(names), 18, "minimum cataloger sanity check failed token") + case "directory": + require.Greater(t, len(names), 25, "minimum cataloger sanity check failed token") + default: + require.Greater(t, len(names), 0, "minimum cataloger sanity check failed token") + } + + sort.Strings(names) + return names +} + +func fileCatalogerNames(digest, metadata bool) []string { + var names []string + if digest { + names = append(names, "file-digest-cataloger") + } + if metadata { + names = append(names, "file-metadata-cataloger") + } + return names +} + +func relationshipCatalogerNames() []string { + return []string{"relationships-cataloger"} +} + +func environmentCatalogerNames() []string { + return []string{"environment-cataloger"} +} + +func taskGroupNames(groups [][]task.Task) [][]string { + var names [][]string + for _, group := range groups { + var groupNames []string + for _, tsk := range group { + groupNames = append(groupNames, tsk.Name()) + } + names = append(names, groupNames) + } + return names +} + +func Test_replaceDefaultTagReferences(t *testing.T) { + + tests := []struct { + name string + lst []string + want []string + }{ + { + name: "no default tag", + lst: []string{"foo", "bar"}, + want: []string{"foo", "bar"}, + }, + { + name: "replace default tag", + lst: []string{"foo", "default", "bar"}, + want: []string{"foo", "replacement", "bar"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, replaceDefaultTagReferences("replacement", tt.lst)) + }) + } +} + +func Test_findDefaultTag(t *testing.T) { + + tests := []struct { + name string + src source.Description + want string + wantErr require.ErrorAssertionFunc + }{ + { + name: "image", + src: source.Description{ + Metadata: source.StereoscopeImageSourceMetadata{}, + }, + want: pkgcataloging.ImageTag, + }, + { + name: "directory", + src: source.Description{ + Metadata: source.DirectorySourceMetadata{}, + }, + want: pkgcataloging.DirectoryTag, + }, + { + name: "file", + src: source.Description{ + Metadata: source.FileSourceMetadata{}, + }, + want: pkgcataloging.DirectoryTag, // not a mistake... + }, + { + name: "unknown", + src: source.Description{ + Metadata: struct{}{}, + }, + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + got, err := findDefaultTag(tt.src) + tt.wantErr(t, err) + if err != nil { + return + } + assert.Equal(t, tt.want, got) + }) + } +} + +func TestCreateSBOMConfig_validate(t *testing.T) { + tests := []struct { + name string + cfg *CreateSBOMConfig + wantErr assert.ErrorAssertionFunc + }{ + { + name: "incompatible ExcludeBinaryPackagesWithFileOwnershipOverlap selection", + cfg: DefaultCreateSBOMConfig(). + WithRelationshipsConfig( + cataloging.DefaultRelationshipsConfig(). + WithExcludeBinaryPackagesWithFileOwnershipOverlap(true). + WithPackageFileOwnershipOverlap(false), + ), + wantErr: assert.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = assert.NoError + } + tt.wantErr(t, tt.cfg.validate()) + }) + } +} diff --git a/syft/event/monitor/cataloger_task_progress.go b/syft/event/monitor/cataloger_task_progress.go index 6f459c8e20b..49f438fd038 100644 --- a/syft/event/monitor/cataloger_task_progress.go +++ b/syft/event/monitor/cataloger_task_progress.go @@ -4,6 +4,11 @@ import ( "github.com/wagoodman/go-progress" ) +const ( + TopLevelCatalogingTaskID = "cataloging" + PackageCatalogingTaskID = "package-cataloging" +) + type CatalogerTaskProgress struct { *progress.AtomicStage *progress.Manual diff --git a/syft/file/cataloger/filecontent/cataloger.go b/syft/file/cataloger/filecontent/cataloger.go index cc04253729d..e9454b4bed4 100644 --- a/syft/file/cataloger/filecontent/cataloger.go +++ b/syft/file/cataloger/filecontent/cataloger.go @@ -6,21 +6,40 @@ import ( "fmt" "io" + "github.com/dustin/go-humanize" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/bus" + intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/event/monitor" "github.com/anchore/syft/syft/file" ) +type Config struct { + // Globs are the file patterns that must be matched for a file to be considered for cataloging. + Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"` + + // SkipFilesAboveSize is the maximum file size (in bytes) to allow to be considered while cataloging. If the file is larger than this size it will be skipped. + SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` +} + type Cataloger struct { globs []string skipFilesAboveSizeInBytes int64 } -func NewCataloger(globs []string, skipFilesAboveSize int64) (*Cataloger, error) { +func DefaultConfig() Config { + return Config{ + SkipFilesAboveSize: 250 * intFile.KB, + } +} + +func NewCataloger(cfg Config) *Cataloger { return &Cataloger{ - globs: globs, - skipFilesAboveSizeInBytes: skipFilesAboveSize, - }, nil + globs: cfg.Globs, + skipFilesAboveSizeInBytes: cfg.SkipFilesAboveSize, + } } func (i *Cataloger) Catalog(resolver file.Resolver) (map[file.Coordinates]string, error) { @@ -31,9 +50,15 @@ func (i *Cataloger) Catalog(resolver file.Resolver) (map[file.Coordinates]string if err != nil { return nil, err } + + prog := catalogingProgress(int64(len(locations))) + for _, location := range locations { + prog.AtomicStage.Set(location.Path()) + metadata, err := resolver.FileMetadataByLocation(location) if err != nil { + prog.SetError(err) return nil, err } @@ -47,12 +72,20 @@ func (i *Cataloger) Catalog(resolver file.Resolver) (map[file.Coordinates]string continue } if err != nil { + prog.SetError(err) return nil, err } + + prog.Increment() + results[location.Coordinates] = result } + log.Debugf("file contents cataloger processed %d files", len(results)) + prog.AtomicStage.Set(fmt.Sprintf("%s files", humanize.Comma(prog.Current()))) + prog.SetCompleted() + return results, nil } @@ -66,7 +99,7 @@ func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Locati buf := &bytes.Buffer{} encoder := base64.NewEncoder(base64.StdEncoding, buf) if _, err = io.Copy(encoder, contentReader); err != nil { - return "", internal.ErrPath{Context: "contents-cataloger", Path: location.RealPath, Err: err} + return "", internal.ErrPath{Context: "content-cataloger", Path: location.RealPath, Err: err} } // note: it's important to close the reader before reading from the buffer since closing will flush the remaining bytes if err := encoder.Close(); err != nil { @@ -75,3 +108,14 @@ func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Locati return buf.String(), nil } + +func catalogingProgress(locations int64) *monitor.CatalogerTaskProgress { + info := monitor.GenericTask{ + Title: monitor.Title{ + Default: "File contents", + }, + ParentID: monitor.TopLevelCatalogingTaskID, + } + + return bus.StartCatalogerTask(info, locations, "") +} diff --git a/syft/file/cataloger/filecontent/cataloger_test.go b/syft/file/cataloger/filecontent/cataloger_test.go index 719bdd48183..fc937031d90 100644 --- a/syft/file/cataloger/filecontent/cataloger_test.go +++ b/syft/file/cataloger/filecontent/cataloger_test.go @@ -67,8 +67,10 @@ func TestContentsCataloger(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - c, err := NewCataloger(test.globs, test.maxSize) - assert.NoError(t, err) + c := NewCataloger(Config{ + Globs: test.globs, + SkipFilesAboveSize: test.maxSize, + }) resolver := file.NewMockResolverForPaths(test.files...) actual, err := c.Catalog(resolver) diff --git a/syft/file/cataloger/filedigest/cataloger.go b/syft/file/cataloger/filedigest/cataloger.go index 00f193b2787..a5dce0d14cf 100644 --- a/syft/file/cataloger/filedigest/cataloger.go +++ b/syft/file/cataloger/filedigest/cataloger.go @@ -45,32 +45,34 @@ func (i *Cataloger) Catalog(resolver file.Resolver, coordinates ...file.Coordina } } - prog := digestsCatalogingProgress(int64(len(locations))) + prog := catalogingProgress(int64(len(locations))) for _, location := range locations { - prog.Increment() - prog.AtomicStage.Set(location.Path()) - result, err := i.catalogLocation(resolver, location) if errors.Is(err, ErrUndigestableFile) { continue } + prog.AtomicStage.Set(location.Path()) + if internal.IsErrPathPermission(err) { log.Debugf("file digests cataloger skipping %q: %+v", location.RealPath, err) continue } if err != nil { - return nil, err + prog.SetError(err) + return nil, fmt.Errorf("failed to process file %q: %w", location.RealPath, err) } + prog.Increment() + results[location.Coordinates] = result } log.Debugf("file digests cataloger processed %d files", prog.Current()) - prog.AtomicStage.Set(fmt.Sprintf("%s digests", humanize.Comma(prog.Current()))) + prog.AtomicStage.Set(fmt.Sprintf("%s files", humanize.Comma(prog.Current()))) prog.SetCompleted() return results, nil @@ -101,13 +103,12 @@ func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Locati return digests, nil } -func digestsCatalogingProgress(locations int64) *monitor.CatalogerTaskProgress { +func catalogingProgress(locations int64) *monitor.CatalogerTaskProgress { info := monitor.GenericTask{ Title: monitor.Title{ - Default: "Catalog file digests", - WhileRunning: "Cataloging file digests", - OnSuccess: "Cataloged file digests", + Default: "File digests", }, + ParentID: monitor.TopLevelCatalogingTaskID, } return bus.StartCatalogerTask(info, locations, "") diff --git a/syft/file/cataloger/filemetadata/cataloger.go b/syft/file/cataloger/filemetadata/cataloger.go index 427444534bb..95796ea0a1a 100644 --- a/syft/file/cataloger/filemetadata/cataloger.go +++ b/syft/file/cataloger/filemetadata/cataloger.go @@ -43,16 +43,18 @@ func (i *Cataloger) Catalog(resolver file.Resolver, coordinates ...file.Coordina }() } - prog := metadataCatalogingProgress(int64(len(locations))) + prog := catalogingProgress(-1) for location := range locations { - prog.Increment() prog.AtomicStage.Set(location.Path()) metadata, err := resolver.FileMetadataByLocation(location) if err != nil { + prog.SetError(err) return nil, err } + prog.Increment() + results[location.Coordinates] = metadata } @@ -64,13 +66,12 @@ func (i *Cataloger) Catalog(resolver file.Resolver, coordinates ...file.Coordina return results, nil } -func metadataCatalogingProgress(locations int64) *monitor.CatalogerTaskProgress { +func catalogingProgress(locations int64) *monitor.CatalogerTaskProgress { info := monitor.GenericTask{ Title: monitor.Title{ - Default: "Catalog file metadata", - WhileRunning: "Cataloging file metadata", - OnSuccess: "Cataloged file metadata", + Default: "File metadata", }, + ParentID: monitor.TopLevelCatalogingTaskID, } return bus.StartCatalogerTask(info, locations, "") diff --git a/syft/file/selection.go b/syft/file/selection.go new file mode 100644 index 00000000000..fb7f46f9907 --- /dev/null +++ b/syft/file/selection.go @@ -0,0 +1,9 @@ +package file + +const ( + NoFilesSelection Selection = "none" + FilesOwnedByPackageSelection Selection = "owned-by-package" + AllFilesSelection Selection = "all" +) + +type Selection string diff --git a/syft/lib.go b/syft/lib.go index d8ef7617d67..8ebb3cfbab4 100644 --- a/syft/lib.go +++ b/syft/lib.go @@ -17,103 +17,13 @@ Similar to the cataloging process, Linux distribution identification is also per package syft import ( - "fmt" - "github.com/wagoodman/go-partybus" "github.com/anchore/go-logger" "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/linux" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger" - "github.com/anchore/syft/syft/source" ) -// CatalogPackages takes an inventory of packages from the given image from a particular perspective -// (e.g. squashed source, all-layers source). Returns the discovered set of packages, the identified Linux -// distribution, and the source object used to wrap the data source. -func CatalogPackages(src source.Source, cfg cataloger.Config) (*pkg.Collection, []artifact.Relationship, *linux.Release, error) { - resolver, err := src.FileResolver(cfg.Search.Scope) - if err != nil { - return nil, nil, nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err) - } - - // find the distro - release := linux.IdentifyRelease(resolver) - if release != nil { - log.Infof("identified distro: %s", release.String()) - } else { - log.Info("could not identify distro") - } - - // if the catalogers have been configured, use them regardless of input type - var catalogers []pkg.Cataloger - if len(cfg.Catalogers) > 0 { - catalogers = cataloger.AllCatalogers(cfg) - } else { - // otherwise conditionally use the correct set of loggers based on the input type (container image or directory) - - // TODO: this is bad, we should not be using the concrete type to determine the cataloger set - // instead this should be a caller concern (pass the catalogers you want to use). The SBOM build PR will do this. - switch src.(type) { - case *source.StereoscopeImageSource: - log.Info("cataloging an image") - catalogers = cataloger.ImageCatalogers(cfg) - case *source.FileSource: - log.Info("cataloging a file") - catalogers = cataloger.AllCatalogers(cfg) - case *source.DirectorySource: - log.Info("cataloging a directory") - catalogers = cataloger.DirectoryCatalogers(cfg) - default: - return nil, nil, nil, fmt.Errorf("unsupported source type: %T", src) - } - } - - catalog, relationships, err := cataloger.Catalog(resolver, release, cfg.Parallelism, catalogers...) - - // apply exclusions to the package catalog - // default config value for this is true - // https://github.com/anchore/syft/issues/931 - if cfg.ExcludeBinaryOverlapByOwnership { - for _, r := range relationships { - if cataloger.ExcludeBinaryByFileOwnershipOverlap(r, catalog) { - catalog.Delete(r.To.ID()) - relationships = removeRelationshipsByID(relationships, r.To.ID()) - } - } - } - - // no need to consider source relationships for os -> binary exclusions - relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...) - return catalog, relationships, release, err -} - -func removeRelationshipsByID(relationships []artifact.Relationship, id artifact.ID) []artifact.Relationship { - var filtered []artifact.Relationship - for _, r := range relationships { - if r.To.ID() != id && r.From.ID() != id { - filtered = append(filtered, r) - } - } - return filtered -} - -func newSourceRelationshipsFromCatalog(src source.Source, c *pkg.Collection) []artifact.Relationship { - relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method? - for p := range c.Enumerate() { - relationships = append(relationships, artifact.Relationship{ - From: src, - To: p, - Type: artifact.ContainsRelationship, - }) - } - - return relationships -} - // SetLogger sets the logger object used for all syft logging calls. func SetLogger(logger logger.Logger) { log.Set(logger) diff --git a/syft/lib_test.go b/syft/lib_test.go index 0e2ca8a8e9a..abb0e0027fe 100644 --- a/syft/lib_test.go +++ b/syft/lib_test.go @@ -5,6 +5,7 @@ import ( "github.com/stretchr/testify/require" + "github.com/anchore/syft/internal/relationship" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" ) @@ -34,7 +35,7 @@ func Test_removeRelationshipsByID(t *testing.T) { for _, r := range relationships { if r.From.ID() == "1" || r.From.ID() == "2" { - relationships = removeRelationshipsByID(relationships, r.From.ID()) + relationships = relationship.RemoveRelationshipsByID(relationships, r.From.ID()) } } diff --git a/syft/pkg/cataloger/binary/cataloger.go b/syft/pkg/cataloger/binary/cataloger.go index 8219a171d73..5d9b5a2ad59 100644 --- a/syft/pkg/cataloger/binary/cataloger.go +++ b/syft/pkg/cataloger/binary/cataloger.go @@ -13,7 +13,7 @@ import ( const catalogerName = "binary-cataloger" type CatalogerConfig struct { - Classifiers []Classifier + Classifiers []Classifier `yaml:"classifiers" json:"classifiers" mapstructure:"classifiers"` } func DefaultCatalogerConfig() CatalogerConfig { diff --git a/syft/pkg/cataloger/binary/classifier.go b/syft/pkg/cataloger/binary/classifier.go index be818a3ad1d..a7689ae70cd 100644 --- a/syft/pkg/cataloger/binary/classifier.go +++ b/syft/pkg/cataloger/binary/classifier.go @@ -23,25 +23,25 @@ import ( // Classifier is a generic package classifier that can be used to match a package definition // to a file that meets the given content criteria of the EvidenceMatcher. type Classifier struct { - Class string + Class string `json:"class"` // FileGlob is a selector to narrow down file inspection using the **/glob* syntax - FileGlob string + FileGlob string `json:"fileGlob"` // EvidenceMatcher is what will be used to match against the file in the source // location. If the matcher returns a package, the file will be considered a candidate. - EvidenceMatcher EvidenceMatcher + EvidenceMatcher EvidenceMatcher `json:"-"` // Information below is used to specify the Package information when returned // Package is the name to use for the package - Package string + Package string `json:"package"` // PURL is the Package URL to use when generating a package - PURL packageurl.PackageURL + PURL packageurl.PackageURL `json:"purl"` // CPEs are the specific CPEs we want to include for this binary with updated version information - CPEs []cpe.CPE + CPEs []cpe.CPE `json:"cpes"` } // EvidenceMatcher is a function called to catalog Packages that match some sort of evidence diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go deleted file mode 100644 index e87840347b1..00000000000 --- a/syft/pkg/cataloger/catalog.go +++ /dev/null @@ -1,229 +0,0 @@ -package cataloger - -import ( - "fmt" - "math" - "runtime/debug" - "sync" - - "github.com/dustin/go-humanize" - "github.com/hashicorp/go-multierror" - "github.com/wagoodman/go-progress" - - "github.com/anchore/syft/internal/bus" - "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/event/monitor" - "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/linux" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" -) - -// Monitor provides progress-related data for observing the progress of a Catalog() call (published on the event bus). -type Monitor struct { - FilesProcessed progress.Monitorable // the number of files selected and contents analyzed from all registered catalogers - PackagesDiscovered progress.Monitorable // the number of packages discovered from all registered catalogers -} - -// catalogResult provides the result of running a single cataloger against source -type catalogResult struct { - Packages []pkg.Package - Relationships []artifact.Relationship - // Discovered may sometimes be more than len(packages) - Discovered int64 - Error error -} - -func runCataloger(cataloger pkg.Cataloger, resolver file.Resolver) (catalogerResult *catalogResult, err error) { - // handle individual cataloger panics - defer func() { - if e := recover(); e != nil { - err = fmt.Errorf("%v at:\n%s", e, string(debug.Stack())) - } - }() - - catalogerResult = new(catalogResult) - - // find packages from the underlying raw data - log.WithFields("cataloger", cataloger.Name()).Trace("cataloging started") - packages, relationships, err := cataloger.Catalog(resolver) - if err != nil { - log.WithFields("cataloger", cataloger.Name()).Warn("error while cataloging") - return catalogerResult, err - } - - catalogedPackages := len(packages) - - log.WithFields("cataloger", cataloger.Name()).Debugf("discovered %d packages", catalogedPackages) - catalogerResult.Discovered = int64(catalogedPackages) - - for _, p := range packages { - // generate CPEs (note: this is excluded from package ID, so is safe to mutate) - // we might have binary classified CPE already with the package so we want to append here - - dictionaryCPE, ok := cpe.DictionaryFind(p) - if ok { - log.Debugf("used CPE dictionary to find CPE for %s package %q: %s", p.Type, p.Name, dictionaryCPE.BindToFmtString()) - p.CPEs = append(p.CPEs, dictionaryCPE) - } else { - p.CPEs = append(p.CPEs, cpe.Generate(p)...) - } - - // if we were not able to identify the language we have an opportunity - // to try and get this value from the PURL. Worst case we assert that - // we could not identify the language at either stage and set UnknownLanguage - if p.Language == "" { - p.Language = pkg.LanguageFromPURL(p.PURL) - } - - // create file-to-package relationships for files owned by the package - owningRelationships, err := packageFileOwnershipRelationships(p, resolver) - if err != nil { - log.WithFields("cataloger", cataloger.Name(), "package", p.Name, "error", err).Warnf("unable to create any package-file relationships") - } else { - catalogerResult.Relationships = append(catalogerResult.Relationships, owningRelationships...) - } - catalogerResult.Packages = append(catalogerResult.Packages, p) - } - - catalogerResult.Relationships = append(catalogerResult.Relationships, relationships...) - log.WithFields("cataloger", cataloger.Name()).Trace("cataloging complete") - return catalogerResult, err -} - -// Catalog a given source (container image or filesystem) with the given catalogers, returning all discovered packages. -// In order to efficiently retrieve contents from a underlying container image the content fetch requests are -// done in bulk. Specifically, all files of interest are collected from each catalogers and accumulated into a single -// request. -// -//nolint:funlen -func Catalog(resolver file.Resolver, _ *linux.Release, parallelism int, catalogers ...pkg.Cataloger) (*pkg.Collection, []artifact.Relationship, error) { - catalog := pkg.NewCollection() - var allRelationships []artifact.Relationship - - prog := monitorPackageCatalogingTask() - - // perform analysis, accumulating errors for each failed analysis - var errs error - - nCatalogers := len(catalogers) - - // we do not need more parallelism than there are `catalogers`. - parallelism = int(math.Min(float64(nCatalogers), math.Max(1.0, float64(parallelism)))) - log.WithFields("parallelism", parallelism, "catalogers", nCatalogers).Debug("cataloging packages") - - jobs := make(chan pkg.Cataloger, nCatalogers) - results := make(chan *catalogResult, nCatalogers) - - waitGroup := sync.WaitGroup{} - - var totalPackagesDiscovered int64 - - for i := 0; i < parallelism; i++ { - waitGroup.Add(1) - - go func() { - defer waitGroup.Done() - - // wait for / get the next cataloger job available. - for cataloger := range jobs { - result, err := runCataloger(cataloger, resolver) - - // ensure we set the error to be aggregated - result.Error = err - - prog.Add(result.Discovered) - totalPackagesDiscovered += result.Discovered - count := humanize.Comma(totalPackagesDiscovered) - prog.AtomicStage.Set(fmt.Sprintf("%s packages", count)) - - results <- result - } - }() - } - - // Enqueue the jobs - for _, cataloger := range catalogers { - jobs <- cataloger - } - close(jobs) - - // Wait for the jobs to finish - waitGroup.Wait() - close(results) - - // collect the results - for result := range results { - if result.Error != nil { - errs = multierror.Append(errs, result.Error) - } - for _, p := range result.Packages { - catalog.Add(p) - } - allRelationships = append(allRelationships, result.Relationships...) - } - - allRelationships = append(allRelationships, pkg.NewRelationships(catalog)...) - - if errs != nil { - prog.SetError(errs) - } else { - prog.SetCompleted() - } - - return catalog, allRelationships, errs -} - -func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) { - fileOwner, ok := p.Metadata.(pkg.FileOwner) - if !ok { - return nil, nil - } - - locations := map[artifact.ID]file.Location{} - - for _, path := range fileOwner.OwnedFiles() { - pathRefs, err := resolver.FilesByPath(path) - if err != nil { - return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err) - } - - if len(pathRefs) == 0 { - // ideally we want to warn users about missing files from a package, however, it is very common for - // container image authors to delete files that are not needed in order to keep image sizes small. Adding - // a warning here would be needlessly noisy (even for popular base images). - continue - } - - for _, ref := range pathRefs { - if oldRef, ok := locations[ref.Coordinates.ID()]; ok { - log.Debugf("found path duplicate of %s", oldRef.RealPath) - } - locations[ref.Coordinates.ID()] = ref - } - } - - var relationships []artifact.Relationship - for _, location := range locations { - relationships = append(relationships, artifact.Relationship{ - From: p, - To: location.Coordinates, - Type: artifact.ContainsRelationship, - }) - } - return relationships, nil -} - -func monitorPackageCatalogingTask() *monitor.CatalogerTaskProgress { - info := monitor.GenericTask{ - Title: monitor.Title{ - Default: "Catalog packages", - WhileRunning: "Cataloging packages", - OnSuccess: "Cataloged packages", - }, - HideOnSuccess: false, - } - - return bus.StartCatalogerTask(info, -1, "") -} diff --git a/syft/pkg/cataloger/catalog_test.go b/syft/pkg/cataloger/catalog_test.go deleted file mode 100644 index 950ec133aba..00000000000 --- a/syft/pkg/cataloger/catalog_test.go +++ /dev/null @@ -1,67 +0,0 @@ -package cataloger - -import ( - "testing" - - "github.com/stretchr/testify/require" - - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/linux" - "github.com/anchore/syft/syft/pkg" -) - -func Test_CatalogPanicHandling(t *testing.T) { - catalog, relationships, err := Catalog( - file.NewMockResolverForPaths(), - &linux.Release{}, - 1, - panickingCataloger{}, - returningCataloger{}, - ) - - require.Error(t, err) - require.Contains(t, err.Error(), "catalog_test.go") - require.Len(t, catalog.Sorted(), 2) - require.Len(t, relationships, 1) -} - -type panickingCataloger struct{} - -func (p panickingCataloger) Name() string { - return "panicking-cataloger" -} - -func (p panickingCataloger) Catalog(_ file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { - panic("something bad happened") -} - -var _ pkg.Cataloger = (*panickingCataloger)(nil) - -type returningCataloger struct{} - -func (p returningCataloger) Name() string { - return "returning-cataloger" -} - -func (p returningCataloger) Catalog(_ file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { - pkg1 := pkg.Package{ - Name: "package-1", - Version: "1.0", - } - pkg1.SetID() - pkg2 := pkg.Package{ - Name: "package-2", - Version: "2.0", - } - pkg2.SetID() - return []pkg.Package{pkg1, pkg2}, []artifact.Relationship{ - { - From: pkg1, - To: pkg2, - Type: artifact.DependencyOfRelationship, - }, - }, nil -} - -var _ pkg.Cataloger = (*returningCataloger)(nil) diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go deleted file mode 100644 index 77ba9e50194..00000000000 --- a/syft/pkg/cataloger/cataloger.go +++ /dev/null @@ -1,212 +0,0 @@ -/* -Package cataloger provides the ability to process files from a container image or file system and discover packages -(gems, wheels, jars, rpms, debs, etc). Specifically, this package contains both a catalog function to utilize all -catalogers defined in child packages as well as the interface definition to implement a cataloger. -*/ -package cataloger - -import ( - "strings" - - "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger/alpine" - "github.com/anchore/syft/syft/pkg/cataloger/arch" - "github.com/anchore/syft/syft/pkg/cataloger/binary" - "github.com/anchore/syft/syft/pkg/cataloger/cpp" - "github.com/anchore/syft/syft/pkg/cataloger/dart" - "github.com/anchore/syft/syft/pkg/cataloger/debian" - "github.com/anchore/syft/syft/pkg/cataloger/dotnet" - "github.com/anchore/syft/syft/pkg/cataloger/elixir" - "github.com/anchore/syft/syft/pkg/cataloger/erlang" - "github.com/anchore/syft/syft/pkg/cataloger/gentoo" - "github.com/anchore/syft/syft/pkg/cataloger/githubactions" - "github.com/anchore/syft/syft/pkg/cataloger/golang" - "github.com/anchore/syft/syft/pkg/cataloger/haskell" - "github.com/anchore/syft/syft/pkg/cataloger/java" - "github.com/anchore/syft/syft/pkg/cataloger/javascript" - "github.com/anchore/syft/syft/pkg/cataloger/kernel" - "github.com/anchore/syft/syft/pkg/cataloger/nix" - "github.com/anchore/syft/syft/pkg/cataloger/php" - "github.com/anchore/syft/syft/pkg/cataloger/python" - "github.com/anchore/syft/syft/pkg/cataloger/r" - "github.com/anchore/syft/syft/pkg/cataloger/redhat" - "github.com/anchore/syft/syft/pkg/cataloger/ruby" - "github.com/anchore/syft/syft/pkg/cataloger/rust" - "github.com/anchore/syft/syft/pkg/cataloger/sbom" - "github.com/anchore/syft/syft/pkg/cataloger/swift" -) - -const AllCatalogersPattern = "all" - -// ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages. -func ImageCatalogers(cfg Config) []pkg.Cataloger { - return filterCatalogers([]pkg.Cataloger{ - arch.NewDBCataloger(), - alpine.NewDBCataloger(), - binary.NewCataloger(cfg.Binary), - cpp.NewConanInfoCataloger(), - debian.NewDBCataloger(), - dotnet.NewDotnetPortableExecutableCataloger(), - golang.NewGoModuleBinaryCataloger(cfg.Golang), - java.NewArchiveCataloger(cfg.JavaConfig()), - java.NewNativeImageCataloger(), - javascript.NewPackageCataloger(), - nix.NewStoreCataloger(), - php.NewComposerInstalledCataloger(), - gentoo.NewPortageCataloger(), - python.NewInstalledPackageCataloger(), - r.NewPackageCataloger(), - redhat.NewDBCataloger(), - ruby.NewInstalledGemSpecCataloger(), - sbom.NewCataloger(), - }, cfg.Catalogers) -} - -// DirectoryCatalogers returns a slice of locally implemented catalogers that are fit for detecting packages from index files (and select installations) -func DirectoryCatalogers(cfg Config) []pkg.Cataloger { - return filterCatalogers([]pkg.Cataloger{ - arch.NewDBCataloger(), - alpine.NewDBCataloger(), - binary.NewCataloger(cfg.Binary), - cpp.NewConanCataloger(), - dart.NewPubspecLockCataloger(), - debian.NewDBCataloger(), - dotnet.NewDotnetDepsCataloger(), - dotnet.NewDotnetPortableExecutableCataloger(), - elixir.NewMixLockCataloger(), - erlang.NewRebarLockCataloger(), - githubactions.NewActionUsageCataloger(), - githubactions.NewWorkflowUsageCataloger(), - golang.NewGoModuleFileCataloger(cfg.Golang), - golang.NewGoModuleBinaryCataloger(cfg.Golang), - haskell.NewHackageCataloger(), - java.NewArchiveCataloger(cfg.JavaConfig()), - java.NewGradleLockfileCataloger(), - java.NewPomCataloger(cfg.JavaConfig()), - java.NewNativeImageCataloger(), - javascript.NewLockCataloger(cfg.Javascript), - nix.NewStoreCataloger(), - php.NewComposerLockCataloger(), - gentoo.NewPortageCataloger(), - python.NewPackageCataloger(cfg.Python), - python.NewInstalledPackageCataloger(), - redhat.NewArchiveCataloger(), - redhat.NewDBCataloger(), - ruby.NewGemFileLockCataloger(), - ruby.NewGemSpecCataloger(), - rust.NewCargoLockCataloger(), - sbom.NewCataloger(), - swift.NewCocoapodsCataloger(), - swift.NewSwiftPackageManagerCataloger(), - }, cfg.Catalogers) -} - -// AllCatalogers returns all implemented catalogers -func AllCatalogers(cfg Config) []pkg.Cataloger { - return filterCatalogers([]pkg.Cataloger{ - arch.NewDBCataloger(), - alpine.NewDBCataloger(), - binary.NewCataloger(cfg.Binary), - cpp.NewConanCataloger(), - dart.NewPubspecLockCataloger(), - debian.NewDBCataloger(), - dotnet.NewDotnetDepsCataloger(), - dotnet.NewDotnetPortableExecutableCataloger(), - elixir.NewMixLockCataloger(), - erlang.NewRebarLockCataloger(), - githubactions.NewActionUsageCataloger(), - githubactions.NewWorkflowUsageCataloger(), - golang.NewGoModuleFileCataloger(cfg.Golang), - golang.NewGoModuleBinaryCataloger(cfg.Golang), - haskell.NewHackageCataloger(), - java.NewArchiveCataloger(cfg.JavaConfig()), - java.NewGradleLockfileCataloger(), - java.NewPomCataloger(cfg.JavaConfig()), - java.NewNativeImageCataloger(), - javascript.NewLockCataloger(cfg.Javascript), - javascript.NewPackageCataloger(), - kernel.NewLinuxKernelCataloger(cfg.LinuxKernel), - nix.NewStoreCataloger(), - php.NewComposerInstalledCataloger(), - php.NewComposerLockCataloger(), - gentoo.NewPortageCataloger(), - python.NewPackageCataloger(cfg.Python), - python.NewInstalledPackageCataloger(), - r.NewPackageCataloger(), - redhat.NewArchiveCataloger(), - redhat.NewDBCataloger(), - ruby.NewGemFileLockCataloger(), - ruby.NewGemSpecCataloger(), - ruby.NewInstalledGemSpecCataloger(), - rust.NewAuditBinaryCataloger(), - rust.NewCargoLockCataloger(), - sbom.NewCataloger(), - swift.NewCocoapodsCataloger(), - swift.NewSwiftPackageManagerCataloger(), - }, cfg.Catalogers) -} - -func RequestedAllCatalogers(cfg Config) bool { - for _, enableCatalogerPattern := range cfg.Catalogers { - if enableCatalogerPattern == AllCatalogersPattern { - return true - } - } - return false -} - -func filterCatalogers(catalogers []pkg.Cataloger, enabledCatalogerPatterns []string) []pkg.Cataloger { - // if cataloger is not set, all applicable catalogers are enabled by default - if len(enabledCatalogerPatterns) == 0 { - return catalogers - } - for _, enableCatalogerPattern := range enabledCatalogerPatterns { - if enableCatalogerPattern == AllCatalogersPattern { - return catalogers - } - } - var keepCatalogers []pkg.Cataloger - for _, cataloger := range catalogers { - if contains(enabledCatalogerPatterns, cataloger.Name()) { - keepCatalogers = append(keepCatalogers, cataloger) - continue - } - log.Infof("skipping cataloger %q", cataloger.Name()) - } - return keepCatalogers -} - -func contains(enabledPartial []string, catalogerName string) bool { - catalogerName = strings.TrimSuffix(catalogerName, "-cataloger") - for _, partial := range enabledPartial { - partial = strings.TrimSuffix(partial, "-cataloger") - if partial == "" { - continue - } - if hasFullWord(partial, catalogerName) { - return true - } - } - return false -} - -func hasFullWord(targetPhrase, candidate string) bool { - if targetPhrase == "cataloger" || targetPhrase == "" { - return false - } - start := strings.Index(candidate, targetPhrase) - if start == -1 { - return false - } - - if start > 0 && candidate[start-1] != '-' { - return false - } - - end := start + len(targetPhrase) - if end < len(candidate) && candidate[end] != '-' { - return false - } - return true -} diff --git a/syft/pkg/cataloger/cataloger_test.go b/syft/pkg/cataloger/cataloger_test.go deleted file mode 100644 index 5346df27cfb..00000000000 --- a/syft/pkg/cataloger/cataloger_test.go +++ /dev/null @@ -1,342 +0,0 @@ -package cataloger - -import ( - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/pkg" -) - -var _ pkg.Cataloger = (*dummy)(nil) - -type dummy struct { - name string -} - -func (d dummy) Name() string { - return d.name -} - -func (d dummy) Catalog(_ file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { - panic("not implemented") -} - -func Test_filterCatalogers(t *testing.T) { - largeCatalogerList := []string{ - "alpm-db-cataloger", - "apkdb-cataloger", - "binary-cataloger", - "conan-cataloger", - "dartlang-lock-cataloger", - "dpkg-db-cataloger", - "dotnet-deps-cataloger", - "elixir-mix-lock-cataloger", - "erlang-rebar-lock-cataloger", - "go-module-file-cataloger", - "go-module-binary-cataloger", - "haskell-cataloger", - "graalvm-native-image-cataloger", - "java-cataloger", - "java-pom-cataloger", - "javascript-package-cataloger", - "javascript-lock-cataloger", - "php-composer-installed-cataloger", - "php-composer-lock-cataloger", - "portage-cataloger", - "python-package-cataloger", - "python-installed-package-cataloger", - "rpm-db-cataloger", - "rpm-archive-cataloger", - "ruby-gemfile-cataloger", - "ruby-installed-gemspec-cataloger", - "rust-cargo-lock-cataloger", - "cargo-auditable-binary-cataloger", - "sbom-cataloger", - "cocoapods-cataloger", - } - tests := []struct { - name string - patterns []string - catalogers []string - want []string - }{ - { - name: "no filtering", - patterns: nil, - catalogers: []string{ - "ruby-installed-gemspec-cataloger", - "python-installed-package-cataloger", - "php-composer-installed-cataloger", - "javascript-package-cataloger", - "dpkg-db-cataloger", - "rpm-db-cataloger", - "java-cataloger", - "apkdb-cataloger", - "go-module-binary-cataloger", - }, - want: []string{ - "ruby-installed-gemspec-cataloger", - "python-installed-package-cataloger", - "php-composer-installed-cataloger", - "javascript-package-cataloger", - "dpkg-db-cataloger", - "rpm-db-cataloger", - "java-cataloger", - "apkdb-cataloger", - "go-module-binary-cataloger", - }, - }, - { - name: "exact name match", - patterns: []string{ - "rpm-db-cataloger", - "javascript-package-cataloger", - }, - catalogers: []string{ - "ruby-installed-gemspec-cataloger", - "python-installed-package-cataloger", - "php-composer-installed-cataloger", - "javascript-package-cataloger", - "dpkg-db-cataloger", - "rpm-db-cataloger", - "java-cataloger", - "apkdb-cataloger", - "go-module-binary-cataloger", - }, - want: []string{ - "javascript-package-cataloger", - "rpm-db-cataloger", - }, - }, - { - name: "partial name match", - patterns: []string{ - "ruby", - "installed", - }, - catalogers: []string{ - "ruby-installed-gemspec-cataloger", - "ruby-gemfile-cataloger", - "python-installed-package-cataloger", - "php-composer-installed-cataloger", - "javascript-package-cataloger", - "dpkg-db-cataloger", - "rpm-db-cataloger", - "java-cataloger", - "apkdb-cataloger", - "go-module-binary-cataloger", - }, - want: []string{ - "php-composer-installed-cataloger", - "python-installed-package-cataloger", - "ruby-installed-gemspec-cataloger", - "ruby-gemfile-cataloger", - }, - }, - { - name: "ignore 'cataloger' keyword", - patterns: []string{ - "cataloger", - }, - catalogers: []string{ - "ruby-installed-gemspec-cataloger", - "ruby-gemfile-cataloger", - "python-installed-package-cataloger", - "php-composer-installed-cataloger", - "javascript-package-cataloger", - "dpkg-db-cataloger", - "rpm-db-cataloger", - "java-cataloger", - "apkdb-cataloger", - "go-module-binary-cataloger", - }, - want: []string{}, - }, - { - name: "only some patterns match", - patterns: []string{ - "cataloger", - "go-module", - }, - catalogers: []string{ - "ruby-installed-gemspec-cataloger", - "ruby-gemfile-cataloger", - "python-installed-package-cataloger", - "php-composer-installed-cataloger", - "javascript-package-cataloger", - "dpkg-db-cataloger", - "rpm-db-cataloger", - "java-cataloger", - "apkdb-cataloger", - "go-module-binary-cataloger", - }, - want: []string{ - "go-module-binary-cataloger", - }, - }, - { - name: "don't cross match ecosystems with matching prefix", - patterns: []string{ - "java-cataloger", - }, - catalogers: []string{ - "javascript-package-cataloger", - "java-cataloger", - }, - want: []string{ - "java-cataloger", - }, - }, - { - name: "don't cross match ecosystems with short, common name", - patterns: []string{ - "go", - }, - catalogers: largeCatalogerList, - want: []string{ - "go-module-file-cataloger", - "go-module-binary-cataloger", - //"rust-cargo-lock-cataloger", // with naive "contains" matching - //"cargo-auditable-binary-cataloger", // with naive "contains" matching - }, - }, - { - name: "ignore partial matches", - patterns: []string{ - "mod", - }, - catalogers: largeCatalogerList, - want: []string{ - // important! these are NOT found - //"go-module-file-cataloger", - //"go-module-binary-cataloger", - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - var catalogers []pkg.Cataloger - for _, n := range tt.catalogers { - catalogers = append(catalogers, dummy{name: n}) - } - got := filterCatalogers(catalogers, tt.patterns) - var gotNames []string - for _, g := range got { - gotNames = append(gotNames, g.Name()) - } - assert.ElementsMatch(t, tt.want, gotNames) - }) - } -} - -func Test_contains(t *testing.T) { - tests := []struct { - name string - enabledCatalogers []string - catalogerName string - want bool - }{ - { - name: "keep exact match", - enabledCatalogers: []string{ - "php-composer-installed-cataloger", - }, - catalogerName: "php-composer-installed-cataloger", - want: true, - }, - { - name: "match substring", - enabledCatalogers: []string{ - "python", - }, - catalogerName: "python-installed-package-cataloger", - want: true, - }, - { - name: "dont match on 'cataloger'", - enabledCatalogers: []string{ - "cataloger", - }, - catalogerName: "python-installed-package-cataloger", - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equal(t, tt.want, contains(tt.enabledCatalogers, tt.catalogerName)) - }) - } -} - -func Test_hasFullWord(t *testing.T) { - - tests := []struct { - name string - targetPhrase string - candidate string - want bool - }{ - { - name: "exact match", - targetPhrase: "php-composer-installed-cataloger", - candidate: "php-composer-installed-cataloger", - want: true, - }, - { - name: "partial, full word match", - targetPhrase: "composer", - candidate: "php-composer-installed-cataloger", - want: true, - }, - { - name: "partial, full, multi-word match", - targetPhrase: "php-composer", - candidate: "php-composer-installed-cataloger", - want: true, - }, - { - name: "prefix match", - targetPhrase: "php", - candidate: "php-composer-installed-cataloger", - want: true, - }, - { - name: "postfix match with -cataloger suffix", - targetPhrase: "installed", - candidate: "php-composer-installed-cataloger", - want: true, - }, - { - name: "postfix match", - targetPhrase: "installed", - candidate: "php-composer-installed", - want: true, - }, - { - name: "ignore cataloger keyword", - targetPhrase: "cataloger", - candidate: "php-composer-installed-cataloger", - want: false, - }, - { - name: "ignore partial match", - targetPhrase: "hp", - candidate: "php-composer-installed-cataloger", - want: false, - }, - { - name: "ignore empty string", - targetPhrase: "", - candidate: "php-composer-installed-cataloger", - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equalf(t, tt.want, hasFullWord(tt.targetPhrase, tt.candidate), "hasFullWord(%v, %v)", tt.targetPhrase, tt.candidate) - }) - } -} diff --git a/syft/pkg/cataloger/config.go b/syft/pkg/cataloger/config.go deleted file mode 100644 index 640f549334c..00000000000 --- a/syft/pkg/cataloger/config.go +++ /dev/null @@ -1,53 +0,0 @@ -package cataloger - -import ( - "github.com/anchore/syft/syft/cataloging" - "github.com/anchore/syft/syft/pkg/cataloger/binary" - "github.com/anchore/syft/syft/pkg/cataloger/golang" - "github.com/anchore/syft/syft/pkg/cataloger/java" - "github.com/anchore/syft/syft/pkg/cataloger/javascript" - "github.com/anchore/syft/syft/pkg/cataloger/kernel" - "github.com/anchore/syft/syft/pkg/cataloger/python" -) - -// TODO: these field naming vs helper function naming schemes are inconsistent. -type Config struct { - Search SearchConfig - Golang golang.CatalogerConfig - LinuxKernel kernel.LinuxKernelCatalogerConfig - Python python.CatalogerConfig - Java java.ArchiveCatalogerConfig - Javascript javascript.CatalogerConfig - Binary binary.CatalogerConfig - Catalogers []string - Parallelism int - ExcludeBinaryOverlapByOwnership bool -} - -func DefaultConfig() Config { - return Config{ - Search: DefaultSearchConfig(), - Parallelism: 1, - LinuxKernel: kernel.DefaultLinuxCatalogerConfig(), - Python: python.DefaultCatalogerConfig(), - Java: java.DefaultArchiveCatalogerConfig(), - Javascript: javascript.DefaultCatalogerConfig(), - Binary: binary.DefaultCatalogerConfig(), - ExcludeBinaryOverlapByOwnership: true, - } -} - -// JavaConfig merges relevant config values from Config to return a java.Config struct. -// Values like IncludeUnindexedArchives and IncludeIndexedArchives are used across catalogers -// and are not specific to Java requiring this merge. -func (c Config) JavaConfig() java.ArchiveCatalogerConfig { - return java.ArchiveCatalogerConfig{ - ArchiveSearchConfig: cataloging.ArchiveSearchConfig{ - IncludeUnindexedArchives: c.Search.IncludeUnindexedArchives, - IncludeIndexedArchives: c.Search.IncludeIndexedArchives, - }, - UseNetwork: c.Java.UseNetwork, - MavenBaseURL: c.Java.MavenBaseURL, - MaxParentRecursiveDepth: c.Java.MaxParentRecursiveDepth, - } -} diff --git a/syft/pkg/cataloger/dotnet/parse_dotnet_deps.go b/syft/pkg/cataloger/dotnet/parse_dotnet_deps.go index fc7687804e0..da8166552ff 100644 --- a/syft/pkg/cataloger/dotnet/parse_dotnet_deps.go +++ b/syft/pkg/cataloger/dotnet/parse_dotnet_deps.go @@ -6,6 +6,7 @@ import ( "sort" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/relationship" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" @@ -122,7 +123,7 @@ func parseDotnetDeps(_ file.Resolver, _ *generic.Environment, reader file.Locati // sort the relationships for deterministic output // TODO: ideally this would be replaced with artifact.SortRelationships when one exists and is type agnostic. // this will only consider package-to-package relationships. - pkg.SortRelationships(relationships) + relationship.Sort(relationships) return pkgs, relationships, nil } diff --git a/syft/pkg/cataloger/golang/cataloger.go b/syft/pkg/cataloger/golang/cataloger.go index b875965db98..fb6e753272e 100644 --- a/syft/pkg/cataloger/golang/cataloger.go +++ b/syft/pkg/cataloger/golang/cataloger.go @@ -18,13 +18,18 @@ import ( var versionCandidateGroups = regexp.MustCompile(`(?P\d+(\.\d+)?(\.\d+)?)(?P\w*)`) +const ( + modFileCatalogerName = "go-module-file-cataloger" + binaryCatalogerName = "go-module-binary-cataloger" +) + // NewGoModuleFileCataloger returns a new cataloger object that searches within go.mod files. func NewGoModuleFileCataloger(opts CatalogerConfig) pkg.Cataloger { c := goModCataloger{ - licenses: newGoLicenses(opts), + licenses: newGoLicenses(modFileCatalogerName, opts), } return &progressingCataloger{ - cataloger: generic.NewCataloger("go-module-file-cataloger"). + cataloger: generic.NewCataloger(modFileCatalogerName). WithParserByGlobs(c.parseGoModFile, "**/go.mod"), } } @@ -32,10 +37,10 @@ func NewGoModuleFileCataloger(opts CatalogerConfig) pkg.Cataloger { // NewGoModuleBinaryCataloger returns a new cataloger object that searches within binaries built by the go compiler. func NewGoModuleBinaryCataloger(opts CatalogerConfig) pkg.Cataloger { c := goBinaryCataloger{ - licenses: newGoLicenses(opts), + licenses: newGoLicenses(binaryCatalogerName, opts), } return &progressingCataloger{ - cataloger: generic.NewCataloger("go-module-binary-cataloger"). + cataloger: generic.NewCataloger(binaryCatalogerName). WithParserByMimeTypes(c.parseGoBinary, internal.ExecutableMIMETypeSet.List()...), } } diff --git a/syft/pkg/cataloger/golang/licenses.go b/syft/pkg/cataloger/golang/licenses.go index c21937bb9d0..09410852c88 100644 --- a/syft/pkg/cataloger/golang/licenses.go +++ b/syft/pkg/cataloger/golang/licenses.go @@ -30,13 +30,15 @@ import ( ) type goLicenses struct { + catalogerName string opts CatalogerConfig localModCacheResolver file.WritableResolver lowerLicenseFileNames *strset.Set } -func newGoLicenses(opts CatalogerConfig) goLicenses { +func newGoLicenses(catalogerName string, opts CatalogerConfig) goLicenses { return goLicenses{ + catalogerName: catalogerName, opts: opts, localModCacheResolver: modCacheResolver(opts.LocalModCacheDir), lowerLicenseFileNames: strset.New(lowercaseLicenseFiles()...), @@ -125,6 +127,7 @@ func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([] OnSuccess: "Downloaded go mod", }, HideOnSuccess: true, + ParentID: c.catalogerName, }, -1, "") fsys, err := getModule(prog, proxies, moduleName, moduleVersion) diff --git a/syft/pkg/cataloger/golang/licenses_test.go b/syft/pkg/cataloger/golang/licenses_test.go index e8019c7ccad..a4d1a4a9bb0 100644 --- a/syft/pkg/cataloger/golang/licenses_test.go +++ b/syft/pkg/cataloger/golang/licenses_test.go @@ -67,6 +67,7 @@ func Test_LocalLicenseSearch(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { l := newGoLicenses( + "", CatalogerConfig{ SearchLocalModCacheLicenses: true, LocalModCacheDir: path.Join(wd, "test-fixtures", "licenses", "pkg", "mod"), @@ -154,11 +155,14 @@ func Test_RemoteProxyLicenseSearch(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - l := newGoLicenses(CatalogerConfig{ - SearchRemoteLicenses: true, - Proxies: []string{server.URL}, - LocalModCacheDir: modDir, - }) + l := newGoLicenses( + "", + CatalogerConfig{ + SearchRemoteLicenses: true, + Proxies: []string{server.URL}, + LocalModCacheDir: modDir, + }, + ) licenses, err := l.getLicenses(fileresolver.Empty{}, test.name, test.version) require.NoError(t, err) diff --git a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go index f39523706f3..11d16209377 100644 --- a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go +++ b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go @@ -14,6 +14,7 @@ import ( "github.com/stretchr/testify/require" "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/internal/relationship" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/linux" @@ -346,8 +347,8 @@ func (p *CatalogTester) assertPkgs(t *testing.T, pkgs []pkg.Package, relationshi opts = append(opts, cmp.Reporter(&r)) // order should not matter - pkg.SortRelationships(p.expectedRelationships) - pkg.SortRelationships(relationships) + relationship.Sort(p.expectedRelationships) + relationship.Sort(relationships) if diff := cmp.Diff(p.expectedRelationships, relationships, opts...); diff != "" { t.Log("Specific Differences:\n" + r.String()) diff --git a/syft/pkg/cataloger/search_config.go b/syft/pkg/cataloger/search_config.go deleted file mode 100644 index 17a6a3019cc..00000000000 --- a/syft/pkg/cataloger/search_config.go +++ /dev/null @@ -1,19 +0,0 @@ -package cataloger - -import ( - "github.com/anchore/syft/syft/source" -) - -type SearchConfig struct { - IncludeIndexedArchives bool - IncludeUnindexedArchives bool - Scope source.Scope -} - -func DefaultSearchConfig() SearchConfig { - return SearchConfig{ - IncludeIndexedArchives: true, - IncludeUnindexedArchives: false, - Scope: source.SquashedScope, - } -} diff --git a/test/cli/scan_cmd_test.go b/test/cli/scan_cmd_test.go index 0e967d7a3ff..05503bffd97 100644 --- a/test/cli/scan_cmd_test.go +++ b/test/cli/scan_cmd_test.go @@ -162,7 +162,7 @@ func TestPackagesCmdFlags(t *testing.T) { name: "all-layers-scope-flag-by-env", args: []string{"scan", "-o", "json", hiddenPackagesImage}, env: map[string]string{ - "SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers", + "SYFT_SCOPE": "all-layers", }, assertions: []traitAssertion{ assertPackageCount(163), // packages are now deduplicated for this case @@ -234,14 +234,50 @@ func TestPackagesCmdFlags(t *testing.T) { }, }, { - name: "catalogers-option", - // This will detect enable python-package-cataloger, python-installed-package-cataloger and ruby-gemspec cataloger - args: []string{"scan", "-o", "json", "--catalogers", "python,ruby-gemspec", coverageImage}, + name: "legacy-catalogers-option", + // This will detect enable: + // - python-installed-package-cataloger + // - python-package-cataloger + // - ruby-gemspec-cataloger + // - ruby-installed-gemspec-cataloger + args: []string{"packages", "-o", "json", "--catalogers", "python,gemspec", coverageImage}, assertions: []traitAssertion{ + assertInOutput("Flag --catalogers has been deprecated, use: override-default-catalogers and select-catalogers"), assertPackageCount(13), assertSuccessfulReturnCode, }, }, + { + name: "select-catalogers-option", + // This will detect enable: + // - python-installed-package-cataloger + // - ruby-installed-gemspec-cataloger + args: []string{"scan", "-o", "json", "--select-catalogers", "python,gemspec", coverageImage}, + assertions: []traitAssertion{ + assertPackageCount(6), + assertSuccessfulReturnCode, + }, + }, + { + name: "override-default-catalogers-option", + // This will detect enable: + // - python-installed-package-cataloger + // - python-package-cataloger + // - ruby-gemspec-cataloger + // - ruby-installed-gemspec-cataloger + args: []string{"packages", "-o", "json", "--override-default-catalogers", "python,gemspec", coverageImage}, + assertions: []traitAssertion{ + assertPackageCount(13), + assertSuccessfulReturnCode, + }, + }, + { + name: "new and old cataloger options are mutually exclusive", + args: []string{"packages", "-o", "json", "--override-default-catalogers", "python", "--catalogers", "gemspec", coverageImage}, + assertions: []traitAssertion{ + assertFailingReturnCode, + }, + }, { name: "override-default-parallelism", args: []string{"scan", "-vvv", "-o", "json", coverageImage}, @@ -250,8 +286,7 @@ func TestPackagesCmdFlags(t *testing.T) { }, assertions: []traitAssertion{ // the application config in the log matches that of what we expect to have been configured. - assertInOutput("parallelism: 2"), - assertInOutput("parallelism=2"), + assertInOutput(`parallelism: 2`), assertPackageCount(coverageImageSquashedPackageCount), assertSuccessfulReturnCode, }, @@ -261,8 +296,7 @@ func TestPackagesCmdFlags(t *testing.T) { args: []string{"scan", "-vvv", "-o", "json", coverageImage}, assertions: []traitAssertion{ // the application config in the log matches that of what we expect to have been configured. - assertInOutput("parallelism: 1"), - assertInOutput("parallelism=1"), + assertInOutput(`parallelism: 1`), assertPackageCount(coverageImageSquashedPackageCount), assertSuccessfulReturnCode, }, diff --git a/test/integration/all_layers_squashed_comparison_test.go b/test/integration/all_layers_squashed_comparison_test.go index 39973cbfaa3..4dedd9ef44e 100644 --- a/test/integration/all_layers_squashed_comparison_test.go +++ b/test/integration/all_layers_squashed_comparison_test.go @@ -8,8 +8,8 @@ import ( func Test_AllLayersIncludesSquashed(t *testing.T) { // This is a verification test for issue #894 (https://github.com/anchore/syft/issues/894) - allLayers, _ := catalogFixtureImage(t, "image-suse-all-layers", source.AllLayersScope, nil) - squashed, _ := catalogFixtureImage(t, "image-suse-all-layers", source.SquashedScope, nil) + allLayers, _ := catalogFixtureImage(t, "image-suse-all-layers", source.AllLayersScope) + squashed, _ := catalogFixtureImage(t, "image-suse-all-layers", source.SquashedScope) lenAllLayers := len(allLayers.Artifacts.Packages.Sorted()) lenSquashed := len(squashed.Artifacts.Packages.Sorted()) diff --git a/test/integration/catalog_packages_test.go b/test/integration/catalog_packages_test.go index 1e8fc1c4637..c0144e987da 100644 --- a/test/integration/catalog_packages_test.go +++ b/test/integration/catalog_packages_test.go @@ -1,6 +1,7 @@ package integration import ( + "context" "strings" "testing" @@ -9,53 +10,39 @@ import ( "github.com/stretchr/testify/require" "github.com/anchore/stereoscope/pkg/imagetest" - "github.com/anchore/syft/syft/linux" + "github.com/anchore/syft/syft" "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger" "github.com/anchore/syft/syft/source" ) func BenchmarkImagePackageCatalogers(b *testing.B) { + // get the fixture image tar file fixtureImageName := "image-pkg-coverage" imagetest.GetFixtureImage(b, "docker-archive", fixtureImageName) tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName) - var pc *pkg.Collection - for _, c := range cataloger.ImageCatalogers(cataloger.DefaultConfig()) { - // in case of future alteration where state is persisted, assume no dependency is safe to reuse - userInput := "docker-archive:" + tarPath - detection, err := source.Detect(userInput, source.DefaultDetectConfig()) - require.NoError(b, err) - theSource, err := detection.NewSource(source.DefaultDetectionSourceConfig()) - if err != nil { - b.Fatalf("unable to get source: %+v", err) - } - b.Cleanup(func() { - theSource.Close() - }) + // get the source object for the image + userInput := "docker-archive:" + tarPath + detection, err := source.Detect(userInput, source.DefaultDetectConfig()) + require.NoError(b, err) - resolver, err := theSource.FileResolver(source.SquashedScope) - if err != nil { - b.Fatalf("unable to get resolver: %+v", err) - } + theSource, err := detection.NewSource(source.DefaultDetectionSourceConfig()) + require.NoError(b, err) - theDistro := linux.IdentifyRelease(resolver) + b.Cleanup(func() { + theSource.Close() + }) - b.Run(c.Name(), func(b *testing.B) { - for i := 0; i < b.N; i++ { - pc, _, err = cataloger.Catalog(resolver, theDistro, 1, c) - if err != nil { - b.Fatalf("failure during benchmark: %+v", err) - } - } - }) + // build the SBOM + s, err := syft.CreateSBOM(context.Background(), theSource, syft.DefaultCreateSBOMConfig()) - b.Logf("catalog for %q number of packages: %d", c.Name(), pc.PackageCount()) - } + // did it work? + require.NoError(b, err) + require.NotNil(b, s) } func TestPkgCoverageImage(t *testing.T) { - sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope) observedLanguages := strset.New() definedLanguages := strset.New() @@ -250,29 +237,8 @@ func TestPkgCoverageDirectory(t *testing.T) { } } -func TestPkgCoverageCatalogerConfiguration(t *testing.T) { - // Check that cataloger configuration can be used to run a cataloger on a source - // for which that cataloger isn't enabled by defauly - sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope, []string{"rust"}) - - observedLanguages := strset.New() - definedLanguages := strset.New() - definedLanguages.Add("rust") - - for actualPkg := range sbom.Artifacts.Packages.Enumerate() { - observedLanguages.Add(actualPkg.Language.String()) - } - - assert.Equal(t, definedLanguages, observedLanguages) - - // Verify that rust isn't actually an image cataloger - c := cataloger.DefaultConfig() - c.Catalogers = []string{"rust"} - assert.Len(t, cataloger.ImageCatalogers(c), 0) -} - func TestPkgCoverageImage_HasEvidence(t *testing.T) { - sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope) var cases []testCase cases = append(cases, commonTestCases...) diff --git a/test/integration/convert_test.go b/test/integration/convert_test.go index 4a9dcb3a0f9..913679cb892 100644 --- a/test/integration/convert_test.go +++ b/test/integration/convert_test.go @@ -59,7 +59,7 @@ func TestConvertCmd(t *testing.T) { } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - syftSbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope, nil) + syftSbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope) syftFormat := syftjson.NewFormatEncoder() syftFile, err := os.CreateTemp("", "test-convert-sbom-") diff --git a/test/integration/distro_test.go b/test/integration/distro_test.go index d2159660ff8..6c8f904534e 100644 --- a/test/integration/distro_test.go +++ b/test/integration/distro_test.go @@ -10,7 +10,7 @@ import ( ) func TestDistroImage(t *testing.T) { - sbom, _ := catalogFixtureImage(t, "image-distro-id", source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, "image-distro-id", source.SquashedScope) expected := &linux.Release{ PrettyName: "BusyBox v1.31.1", diff --git a/test/integration/encode_decode_cycle_test.go b/test/integration/encode_decode_cycle_test.go index 9b32081d6dd..905889277e0 100644 --- a/test/integration/encode_decode_cycle_test.go +++ b/test/integration/encode_decode_cycle_test.go @@ -15,7 +15,6 @@ import ( "github.com/anchore/syft/syft/format/cyclonedxjson" "github.com/anchore/syft/syft/format/cyclonedxxml" "github.com/anchore/syft/syft/format/syftjson" - "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" ) @@ -32,12 +31,12 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { "image-owning-package", } tests := []struct { - formatOption sbom.FormatID - redactor func(in []byte) []byte - json bool + name string + redactor func(in []byte) []byte + json bool }{ { - formatOption: syftjson.ID, + name: syftjson.ID.String(), redactor: func(in []byte) []byte { // no redactions necessary return in @@ -45,7 +44,7 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { json: true, }, { - formatOption: cyclonedxjson.ID, + name: cyclonedxjson.ID.String(), redactor: func(in []byte) []byte { // unstable values in = regexp.MustCompile(`"(timestamp|serialNumber|bom-ref|ref)":\s*"(\n|[^"])+"`).ReplaceAll(in, []byte(`"$1": "redacted"`)) @@ -55,7 +54,7 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { json: true, }, { - formatOption: cyclonedxxml.ID, + name: cyclonedxxml.ID.String(), redactor: func(in []byte) []byte { // unstable values in = regexp.MustCompile(`(serialNumber|bom-ref|ref)="[^"]+"`).ReplaceAll(in, []byte{}) @@ -75,11 +74,11 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { decoders := format.NewDecoderCollection(format.Decoders()...) for _, test := range tests { - t.Run(string(test.formatOption), func(t *testing.T) { + t.Run(test.name, func(t *testing.T) { for _, image := range images { - originalSBOM, _ := catalogFixtureImage(t, image, source.SquashedScope, nil) + originalSBOM, _ := catalogFixtureImage(t, image, source.SquashedScope) - f := encoders.GetByString(string(test.formatOption)) + f := encoders.GetByString(test.name) require.NotNil(t, f) var buff1 bytes.Buffer diff --git a/test/integration/files_test.go b/test/integration/files_test.go new file mode 100644 index 00000000000..c2e4bc923f1 --- /dev/null +++ b/test/integration/files_test.go @@ -0,0 +1,154 @@ +package integration + +import ( + "crypto" + "os" + "path/filepath" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/clio" + stereoscopeFile "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/syft/cmd/syft/cli/options" + "github.com/anchore/syft/syft/cataloging/filecataloging" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/file/cataloger/filecontent" + "github.com/anchore/syft/syft/sbom" +) + +func TestFileCataloging_Default(t *testing.T) { + cfg := options.DefaultCatalog().ToSBOMConfig(clio.Identification{}) + cfg = cfg.WithFilesConfig(filecataloging.DefaultConfig()) + sbom, _ := catalogDirectoryWithConfig(t, "test-fixtures/files", cfg) + + var metadata map[file.Coordinates]file.Metadata + + var digests map[file.Coordinates][]file.Digest + + var contents map[file.Coordinates]string + + assertFileData(t, metadata, digests, contents, sbom) +} + +func TestFileCataloging_AllFiles(t *testing.T) { + cfg := options.DefaultCatalog().ToSBOMConfig(clio.Identification{}) + cfg = cfg.WithFilesConfig(filecataloging.Config{ + Selection: file.AllFilesSelection, + Hashers: []crypto.Hash{ + crypto.SHA256, + }, + Content: filecontent.Config{ + // this is enough to potentially capture a/file, a-small-file, a-symlink-to-a-small-file, and a-symlink-to-file + // but the size of a/file will cause it to be filtered, and the symlinks will not be included since + // they are not regular files + Globs: []string{"**/*file"}, + SkipFilesAboveSize: 30, + }, + }) + sbom, _ := catalogDirectoryWithConfig(t, "test-fixtures/files", cfg) + + pwd, err := os.Getwd() + require.NoError(t, err) + + testPath := func(path string) string { + return filepath.Join(pwd, "test-fixtures/files", path) + } + + metadata := map[file.Coordinates]file.Metadata{ + {RealPath: ""}: { + Path: testPath(""), + Type: stereoscopeFile.TypeDirectory, + }, + {RealPath: "/somewhere"}: { + Path: testPath("/somewhere"), + Type: stereoscopeFile.TypeDirectory, + }, + {RealPath: "/somewhere/there"}: { + Path: testPath("/somewhere/there"), + Type: stereoscopeFile.TypeDirectory, + }, + {RealPath: "/somewhere/there/is"}: { + Path: testPath("/somewhere/there/is"), + Type: stereoscopeFile.TypeDirectory, + }, + {RealPath: "/somewhere/there/is/a"}: { + Path: testPath("/somewhere/there/is/a"), + Type: stereoscopeFile.TypeDirectory, + }, + {RealPath: "/somewhere/there/is/a-small-file"}: { + Path: testPath("/somewhere/there/is/a-small-file"), + Type: stereoscopeFile.TypeRegular, + MIMEType: "text/plain", + }, + {RealPath: "/somewhere/there/is/a-symlink-to-a-small-file"}: { + Path: testPath("/somewhere/there/is/a-symlink-to-a-small-file"), + LinkDestination: testPath("/somewhere/there/is/a-small-file"), + Type: stereoscopeFile.TypeSymLink, + }, + {RealPath: "/somewhere/there/is/a-symlink-to-file"}: { + Path: testPath("/somewhere/there/is/a-symlink-to-file"), + LinkDestination: testPath("/somewhere/there/is/a/file"), + Type: stereoscopeFile.TypeSymLink, + }, + {RealPath: "/somewhere/there/is/a/file"}: { + Path: testPath("/somewhere/there/is/a/file"), + Type: stereoscopeFile.TypeRegular, + MIMEType: "text/plain", + }, + } + + digests := map[file.Coordinates][]file.Digest{ + {RealPath: "/somewhere/there/is/a-small-file"}: { + file.Digest{Algorithm: "sha256", Value: "672c23470e4ce99cf270bb63ae66ad2b8a80aa19090c40e59fbb1229a4ab661a"}, + }, + {RealPath: "/somewhere/there/is/a/file"}: { + file.Digest{Algorithm: "sha256", Value: "00dac26d6d94353ac0d92bb9640cba76f82f5ca8707bb845ecdc574bd002348e"}, + }, + } + + contents := map[file.Coordinates]string{ + {RealPath: "/somewhere/there/is/a-small-file"}: "c29tZSBjb250ZW50cyE=", + } + + assertFileData(t, metadata, digests, contents, sbom) + +} + +func assertFileData(t testing.TB, metadata map[file.Coordinates]file.Metadata, digests map[file.Coordinates][]file.Digest, contents map[file.Coordinates]string, sbom sbom.SBOM) { + metadataCompareOpts := cmp.Options{ + cmp.Comparer(func(x, y file.Metadata) bool { + if x.Path != y.Path { + t.Logf("path mismatch: %s != %s", x.Path, y.Path) + return false + } + + if x.Type != y.Type { + t.Logf("type mismatch: %s != %s", x.Type, y.Type) + return false + } + + if x.LinkDestination != y.LinkDestination { + t.Logf("link destination mismatch: %s != %s", x.LinkDestination, y.LinkDestination) + return false + } + + if x.MIMEType != y.MIMEType { + t.Logf("mime type mismatch: %s != %s", x.MIMEType, y.MIMEType) + return false + } + + return true + }), + } + + if d := cmp.Diff(metadata, sbom.Artifacts.FileMetadata, metadataCompareOpts...); d != "" { + t.Errorf("unexpected metadata (-want +got):\n%s", d) + } + + assert.Equal(t, digests, sbom.Artifacts.FileDigests, "different digests detected") + assert.Equal(t, contents, sbom.Artifacts.FileContents, "different contents detected") + +} diff --git a/test/integration/go_compiler_detection_test.go b/test/integration/go_compiler_detection_test.go index 26994482379..fbeec5ba019 100644 --- a/test/integration/go_compiler_detection_test.go +++ b/test/integration/go_compiler_detection_test.go @@ -25,7 +25,7 @@ func TestGolangCompilerDetection(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - sbom, _ := catalogFixtureImage(t, tt.image, source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, tt.image, source.SquashedScope) packages := sbom.Artifacts.Packages.PackagesByName("stdlib") foundCompilerVersions := make(map[string]struct{}) diff --git a/test/integration/java_purl_test.go b/test/integration/java_purl_test.go index 2af26ab7051..12faef3c221 100644 --- a/test/integration/java_purl_test.go +++ b/test/integration/java_purl_test.go @@ -11,7 +11,7 @@ import ( ) func TestJavaPURLs(t *testing.T) { - sbom, _ := catalogFixtureImage(t, "image-test-java-purls", source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, "image-test-java-purls", source.SquashedScope) found := make(map[string]string) for _, p := range sbom.Artifacts.Packages.Sorted() { if p.Type != pkg.JavaPkg && p.Type != pkg.JenkinsPluginPkg { diff --git a/test/integration/mariner_distroless_test.go b/test/integration/mariner_distroless_test.go index 95c457cea84..5231f325427 100644 --- a/test/integration/mariner_distroless_test.go +++ b/test/integration/mariner_distroless_test.go @@ -8,7 +8,7 @@ import ( ) func TestMarinerDistroless(t *testing.T) { - sbom, _ := catalogFixtureImage(t, "image-mariner-distroless", source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, "image-mariner-distroless", source.SquashedScope) expectedPkgs := 12 actualPkgs := 0 diff --git a/test/integration/package_deduplication_test.go b/test/integration/package_deduplication_test.go index 75c86234e44..12fa9dcf207 100644 --- a/test/integration/package_deduplication_test.go +++ b/test/integration/package_deduplication_test.go @@ -63,7 +63,7 @@ func TestPackageDeduplication(t *testing.T) { for _, tt := range tests { t.Run(string(tt.scope), func(t *testing.T) { - sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope, nil) + sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope) for _, p := range sbom.Artifacts.Packages.Sorted() { if p.Type == pkg.BinaryPkg { assert.NotEmpty(t, p.Name) diff --git a/test/integration/package_ownership_relationship_test.go b/test/integration/package_ownership_relationship_test.go index d3ce0745404..dc323ce1060 100644 --- a/test/integration/package_ownership_relationship_test.go +++ b/test/integration/package_ownership_relationship_test.go @@ -25,7 +25,7 @@ func TestPackageOwnershipRelationships(t *testing.T) { for _, test := range tests { t.Run(test.fixture, func(t *testing.T) { - sbom, _ := catalogFixtureImage(t, test.fixture, source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, test.fixture, source.SquashedScope) output := bytes.NewBufferString("") err := syftjson.NewFormatEncoder().Encode(output, sbom) @@ -60,7 +60,7 @@ func TestPackageOwnershipExclusions(t *testing.T) { for _, test := range tests { t.Run(test.fixture, func(t *testing.T) { - sbom, _ := catalogFixtureImage(t, test.fixture, source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, test.fixture, source.SquashedScope) binaryPackages := make([]pkg.Package, 0) apkPackages := make([]pkg.Package, 0) for p := range sbom.Artifacts.Packages.Enumerate() { diff --git a/test/integration/regression_apk_scanner_buffer_size_test.go b/test/integration/regression_apk_scanner_buffer_size_test.go index 3549d52ee14..b0f61232ea7 100644 --- a/test/integration/regression_apk_scanner_buffer_size_test.go +++ b/test/integration/regression_apk_scanner_buffer_size_test.go @@ -10,7 +10,7 @@ import ( func TestRegression212ApkBufferSize(t *testing.T) { // This is a regression test for issue #212 (https://github.com/anchore/syft/issues/212) in which the apk db could // not be processed due to a scanner buffer that was too small - sbom, _ := catalogFixtureImage(t, "image-large-apk-data", source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, "image-large-apk-data", source.SquashedScope) expectedPkgs := 58 actualPkgs := 0 diff --git a/test/integration/regression_go_bin_scanner_arch_test.go b/test/integration/regression_go_bin_scanner_arch_test.go index d88ee6c7c6c..8a5d231884e 100644 --- a/test/integration/regression_go_bin_scanner_arch_test.go +++ b/test/integration/regression_go_bin_scanner_arch_test.go @@ -16,7 +16,7 @@ func TestRegressionGoArchDiscovery(t *testing.T) { ) // This is a regression test to make sure the way we detect go binary packages // stays consistent and reproducible as the tool chain evolves - sbom, _ := catalogFixtureImage(t, "image-go-bin-arch-coverage", source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, "image-go-bin-arch-coverage", source.SquashedScope) var actualELF, actualWIN, actualMACOS int diff --git a/test/integration/regression_java_no_main_package_test.go b/test/integration/regression_java_no_main_package_test.go index 102271ec8c9..31a755efa36 100644 --- a/test/integration/regression_java_no_main_package_test.go +++ b/test/integration/regression_java_no_main_package_test.go @@ -7,5 +7,5 @@ import ( ) func TestRegressionJavaNoMainPackage(t *testing.T) { // Regression: https://github.com/anchore/syft/issues/252 - catalogFixtureImage(t, "image-java-no-main-package", source.SquashedScope, nil) + catalogFixtureImage(t, "image-java-no-main-package", source.SquashedScope) } diff --git a/test/integration/regression_java_virtualpath_test.go b/test/integration/regression_java_virtualpath_test.go index d9d100fa65e..a4685602e0e 100644 --- a/test/integration/regression_java_virtualpath_test.go +++ b/test/integration/regression_java_virtualpath_test.go @@ -12,7 +12,7 @@ import ( func TestWarCatalogedCorrectlyIfRenamed(t *testing.T) { // install hudson-war@2.2.1 and renames the file to `/hudson.war` - sbom, _ := catalogFixtureImage(t, "image-java-virtualpath-regression", source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, "image-java-virtualpath-regression", source.SquashedScope) badPURL := "pkg:maven/hudson/hudson@2.2.1" goodPURL := "pkg:maven/org.jvnet.hudson.main/hudson-war@2.2.1" diff --git a/test/integration/regression_photon_package_test.go b/test/integration/regression_photon_package_test.go index dc8360a8efd..6d9aaf322b2 100644 --- a/test/integration/regression_photon_package_test.go +++ b/test/integration/regression_photon_package_test.go @@ -8,7 +8,7 @@ import ( ) func TestPhotonPackageRegression(t *testing.T) { // Regression: https://github.com/anchore/syft/pull/1997 - sbom, _ := catalogFixtureImage(t, "image-photon-all-layers", source.AllLayersScope, nil) + sbom, _ := catalogFixtureImage(t, "image-photon-all-layers", source.AllLayersScope) var packages []pkg.Package for p := range sbom.Artifacts.Packages.Enumerate() { packages = append(packages, p) diff --git a/test/integration/rust_audit_binary_test.go b/test/integration/rust_audit_binary_test.go index 57baf46af36..5a61055e5e9 100644 --- a/test/integration/rust_audit_binary_test.go +++ b/test/integration/rust_audit_binary_test.go @@ -8,7 +8,7 @@ import ( ) func TestRustAudit(t *testing.T) { - sbom, _ := catalogFixtureImage(t, "image-rust-auditable", source.SquashedScope, []string{"all"}) + sbom, _ := catalogFixtureImage(t, "image-rust-auditable", source.SquashedScope) expectedPkgs := 2 actualPkgs := 0 diff --git a/test/integration/sbom_cataloger_test.go b/test/integration/sbom_cataloger_test.go index 65d94b3e059..703a3dcacd4 100644 --- a/test/integration/sbom_cataloger_test.go +++ b/test/integration/sbom_cataloger_test.go @@ -11,16 +11,16 @@ func TestSbomCataloger(t *testing.T) { // The image contains a go.mod file with 2 dependencies and an spdx json sbom. // The go.mod file contains 2 dependencies, and the sbom includes a go dependency // that overlaps with the go.mod - sbom, _ := catalogFixtureImage(t, "image-sbom-cataloger", source.SquashedScope, []string{"all"}) + sbom, _ := catalogFixtureImage(t, "image-sbom-cataloger", source.SquashedScope, "+go-module-file-cataloger") expectedSbomCatalogerPkgs := 1 expectedGoModCatalogerPkgs := 2 actualSbomPkgs := 0 actualGoModPkgs := 0 - for pkg := range sbom.Artifacts.Packages.Enumerate(pkg.GoModulePkg) { - if pkg.FoundBy == "go-module-file-cataloger" { + for p := range sbom.Artifacts.Packages.Enumerate(pkg.GoModulePkg) { + if p.FoundBy == "go-module-file-cataloger" { actualGoModPkgs += 1 - } else if pkg.FoundBy == "sbom-cataloger" { + } else if p.FoundBy == "sbom-cataloger" { actualSbomPkgs += 1 } } diff --git a/test/integration/sqlite_rpmdb_test.go b/test/integration/sqlite_rpmdb_test.go index fd3dfa98a01..37f21cbba46 100644 --- a/test/integration/sqlite_rpmdb_test.go +++ b/test/integration/sqlite_rpmdb_test.go @@ -12,7 +12,7 @@ import ( func TestSqliteRpm(t *testing.T) { // This is a regression test for issue #469 (https://github.com/anchore/syft/issues/469). Recent RPM // based distribution store package data in an sqlite database - sbom, _ := catalogFixtureImage(t, "image-sqlite-rpmdb", source.SquashedScope, nil) + sbom, _ := catalogFixtureImage(t, "image-sqlite-rpmdb", source.SquashedScope) expectedPkgs := 139 actualPkgs := 0 diff --git a/test/integration/test-fixtures/files/somewhere/there/is/a-small-file b/test/integration/test-fixtures/files/somewhere/there/is/a-small-file new file mode 100644 index 00000000000..50c7fcb078b --- /dev/null +++ b/test/integration/test-fixtures/files/somewhere/there/is/a-small-file @@ -0,0 +1 @@ +some contents! \ No newline at end of file diff --git a/test/integration/test-fixtures/files/somewhere/there/is/a-symlink-to-a-small-file b/test/integration/test-fixtures/files/somewhere/there/is/a-symlink-to-a-small-file new file mode 120000 index 00000000000..2255208c262 --- /dev/null +++ b/test/integration/test-fixtures/files/somewhere/there/is/a-symlink-to-a-small-file @@ -0,0 +1 @@ +./a-small-file \ No newline at end of file diff --git a/test/integration/test-fixtures/files/somewhere/there/is/a-symlink-to-file b/test/integration/test-fixtures/files/somewhere/there/is/a-symlink-to-file new file mode 120000 index 00000000000..064ba9f0443 --- /dev/null +++ b/test/integration/test-fixtures/files/somewhere/there/is/a-symlink-to-file @@ -0,0 +1 @@ +./a/file \ No newline at end of file diff --git a/test/integration/test-fixtures/files/somewhere/there/is/a/file b/test/integration/test-fixtures/files/somewhere/there/is/a/file new file mode 100644 index 00000000000..2ebc965e0d7 --- /dev/null +++ b/test/integration/test-fixtures/files/somewhere/there/is/a/file @@ -0,0 +1,10 @@ +with a lot of contents! +with a lot of contents! +with a lot of contents! +with a lot of contents! +with a lot of contents! +with a lot of contents! +with a lot of contents! +with a lot of contents! +with a lot of contents! +with a lot of contents! \ No newline at end of file diff --git a/test/integration/utils_test.go b/test/integration/utils_test.go index eeb583f6558..58b300756cf 100644 --- a/test/integration/utils_test.go +++ b/test/integration/utils_test.go @@ -1,81 +1,91 @@ package integration import ( + "context" "testing" "github.com/stretchr/testify/require" + "github.com/anchore/clio" "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/cmd/syft/cli/options" "github.com/anchore/syft/syft" - "github.com/anchore/syft/syft/pkg/cataloger" + "github.com/anchore/syft/syft/cataloging/pkgcataloging" "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" ) -func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Scope, catalogerCfg []string) (sbom.SBOM, source.Source) { +func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Scope, catalogerSelection ...string) (sbom.SBOM, source.Source) { + cfg := options.DefaultCatalog().ToSBOMConfig(clio.Identification{ + Name: "syft-tester", + Version: "v0.99.0", + }).WithCatalogerSelection( + pkgcataloging.NewSelectionRequest(). + WithExpression(catalogerSelection...), + ) + cfg.Search.Scope = scope + + return catalogFixtureImageWithConfig(t, fixtureImageName, cfg) +} + +func catalogFixtureImageWithConfig(t *testing.T, fixtureImageName string, cfg *syft.CreateSBOMConfig) (sbom.SBOM, source.Source) { + cfg.CatalogerSelection = cfg.CatalogerSelection.WithDefaults(pkgcataloging.ImageTag) + + // get the fixture image tar file imagetest.GetFixtureImage(t, "docker-archive", fixtureImageName) tarPath := imagetest.GetFixtureImageTarPath(t, fixtureImageName) userInput := "docker-archive:" + tarPath + + // get the source to build an SBOM against detection, err := source.Detect(userInput, source.DefaultDetectConfig()) require.NoError(t, err) + theSource, err := detection.NewSource(source.DefaultDetectionSourceConfig()) require.NoError(t, err) + t.Cleanup(func() { theSource.Close() }) - c := cataloger.DefaultConfig() - c.Catalogers = catalogerCfg - - c.Search.Scope = scope - pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c) - if err != nil { - t.Fatalf("failed to catalog image: %+v", err) - } - - return sbom.SBOM{ - Artifacts: sbom.Artifacts{ - Packages: pkgCatalog, - LinuxDistribution: actualDistro, - }, - Relationships: relationships, - Source: theSource.Describe(), - Descriptor: sbom.Descriptor{ - Name: "syft", - Version: "v0.42.0-bogus", - // the application configuration should be persisted here, however, we do not want to import - // the application configuration in this package (it's reserved only for ingestion by the cmd package) - Configuration: map[string]string{ - "config-key": "config-value", - }, - }, - }, theSource + s, err := syft.CreateSBOM(context.Background(), theSource, cfg) + + require.NoError(t, err) + require.NotNil(t, s) + + return *s, theSource } -func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, source.Source) { +func catalogDirectory(t *testing.T, dir string, catalogerSelection ...string) (sbom.SBOM, source.Source) { + cfg := options.DefaultCatalog().ToSBOMConfig(clio.Identification{ + Name: "syft-tester", + Version: "v0.99.0", + }).WithCatalogerSelection( + pkgcataloging.NewSelectionRequest(). + WithExpression(catalogerSelection...), + ) + + return catalogDirectoryWithConfig(t, dir, cfg) +} + +func catalogDirectoryWithConfig(t *testing.T, dir string, cfg *syft.CreateSBOMConfig) (sbom.SBOM, source.Source) { + cfg.CatalogerSelection = cfg.CatalogerSelection.WithDefaults(pkgcataloging.DirectoryTag) + + // get the source to build an sbom against userInput := "dir:" + dir detection, err := source.Detect(userInput, source.DefaultDetectConfig()) require.NoError(t, err) + theSource, err := detection.NewSource(source.DefaultDetectionSourceConfig()) require.NoError(t, err) t.Cleanup(func() { theSource.Close() }) - // TODO: this would be better with functional options (after/during API refactor) - c := cataloger.DefaultConfig() - c.Search.Scope = source.AllLayersScope - pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c) - if err != nil { - t.Fatalf("failed to catalog image: %+v", err) - } - - return sbom.SBOM{ - Artifacts: sbom.Artifacts{ - Packages: pkgCatalog, - LinuxDistribution: actualDistro, - }, - Relationships: relationships, - Source: theSource.Describe(), - }, theSource + // build the SBOM + s, err := syft.CreateSBOM(context.Background(), theSource, cfg) + + require.NoError(t, err) + require.NotNil(t, s) + + return *s, theSource }