From 473605ce2282cecbde0d38e542f066e58b6d3d9b Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 15 Dec 2023 16:13:26 -0500 Subject: [PATCH] update docs with SBOM selection help + breaking changes Signed-off-by: Alex Goodman --- README.md | 277 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 159 insertions(+), 118 deletions(-) diff --git a/README.md b/README.md index d75f248bee4..eef48a8ba13 100644 --- a/README.md +++ b/README.md @@ -109,13 +109,13 @@ nix-shell -p syft To generate an SBOM for a container image: -``` +```bash syft ``` The above output includes only software that is visible in the container (i.e., the squashed representation of the image). To include software from all image layers in the SBOM, regardless of its presence in the final image, provide `--scope all-layers`: -``` +```bash syft --scope all-layers ``` @@ -123,7 +123,7 @@ syft --scope all-layers Syft can generate an SBOM from a variety of sources: -``` +```bash # catalog a container image archive (from the result of `docker image save ...`, `podman save ...`, or `skopeo copy` commands) syft path/to/image.tar @@ -155,69 +155,114 @@ If docker is not present, then the Podman daemon is attempted next, followed by This default behavior can be overridden with the `default-image-pull-source` configuration option (See [Configuration](https://github.com/anchore/syft#configuration) for more details). -### Default Cataloger Configuration by scan type - -Syft uses different default sets of catalogers depending on what it is scanning: a container image or a directory on disk. The default catalogers for an image scan assumes that package installation steps have already been completed. For example, Syft will identify Python packages that have egg or wheel metadata files under a site-packages directory, since this indicates software actually installed on an image. - -However, if you are scanning a directory, Syft doesn't assume that all relevant software is installed, and will use catalogers that can identify declared dependencies that may not yet be installed on the final system: for example, dependencies listed in a Python requirements.txt. - -You can override the list of enabled/disabled catalogers by using the "catalogers" keyword in the [Syft configuration file](https://github.com/anchore/syft#configuration). - -##### Image Scanning: -- alpmdb -- apkdb -- binary -- dotnet-deps -- dotnet-portable-executable -- dpkgdb -- go-module-binary -- graalvm-native-image -- java -- javascript-package -- linux-kernel -- nix-store -- php-composer-installed -- portage -- python-package -- rpm-db -- ruby-gemspec -- sbom - -##### Directory Scanning: -- alpmdb -- apkdb -- binary -- cocoapods -- conan -- dartlang-lock -- dotnet-deps -- dotnet-portable-executable -- dpkgdb -- elixir-mix-lock -- erlang-rebar-lock -- go-mod-file -- go-module-binary -- graalvm-native-image -- haskell -- java -- java-gradle-lockfile -- java-pom -- javascript-lock -- linux-kernel -- nix-store -- php-composer-lock -- portage -- python-index -- python-package -- rpm-db -- rpm-file -- ruby-gemfile -- rust-cargo-lock -- sbom -- swift-package-manager - -##### Non Default: -- cargo-auditable-binary + +### File selection + +By default, Syft will catalog file details and digests for files that are owned by discovered packages. You can change this behavior by using the `SYFT_FILE_METADATA_SELECTION` environment variable or the `file.metadata.selection` configuration option. The options are: + +- `all-files`: capture all files from the search space +- `owned-files`: capture only files owned by packages (default) +- `no-files`: disable capturing any file information + + +### Package cataloger selection + +#### Concepts + +Syft uses different sets of catalogers depending on *what is being scanned*: a container image or a directory on disk. + +The catalogers for an image scan assumes that package installation steps have already been completed. For example, Syft will identify Python packages that have egg or wheel metadata files under a `site-packages` directory, since this is how the canonical tooling `pip` installs python packages. + +The catalogers for a directory scan will look for installed software as well as declared dependencies that are not necessarily installed. For example, dependencies listed in a Python requirements.txt. + +This default set of catalogers being dynamic is critical as this allows Syft to be used in a variety of contexts while still generating accurate SBOMs. +Overriding the set of default catalogers is not recommended for most purposes, however, is possible if needed. + + +#### Rules + +Catalogers can be referenced in two different ways: +- *by name*: the exact cataloger name (e.g. `java-pom-cataloger` or `java-archive-cataloger`) +- *by tag*: a tag that is associated with a cataloger (e.g. `java`) + +Syft can take lists of references on the CLI or in the application configuration to define which catalogers to use. + +You can **Set** the list of catalogers explicitly to use with the `--override-default-catalogers` CLI flag, accepting a comma-separated list of cataloger names or tags. + +You can also **Add** to, **remove** from, or **sub-select** catalogers to use within the default set of catalogers by using the `--select-catalogers` CLI flag. + - To sub-select catalogers simply provide a tag (e.g. `--select-catalogers TAG`). This is always interpreted from the default set of catalogers. If used, the union of these catalogers becomes the new default set. + - To add a cataloger prefix the cataloger name with `+` (e.g. `--select-catalogers +NAME`). This is always interpreted from the universal set of catalogers. + - To remove a cataloger prefix the cataloger name or tag with `-` (e.g. `--select-catalogers -NAME_OR_TAG`). This is always interpreted from the final set of catalogers. + +These rules and the dynamic default cataloger sets approximates to the following logic: + +``` +image_catalogers = all_catalogers & catalogers_tagged("image") + +directory_catalogers = all_catalogers & catalogers_tagged("directory") + +default_catalogers = image_catalogers | directory_catalogers + +sub_selected_catalogers = default_catalogers [ & catalogers_tagged(TAG) [ + default_catalogers & catalogers_named(NAME) ...] ] + +final_set = (sub_selected_catalogers - removed_catalogers) + added_catalogers +``` + + +#### Examples + +Only scan for python related packages with catalogers appropriate for the source type (image or directory): +```bash +syft --select-catalogers "python" +# results in the following catalogers being used: +# - python-installed-package-cataloger +``` + +Same command, but the set of catalogers changes based on what is being analyzed (in this case a directory): +```bash +syft --select-catalogers "python" +# results in the following catalogers being used: +# - python-installed-package-cataloger +# - python-package-cataloger +``` + +Use the default set of catalogers and add a cataloger to the set: +```bash +syft ... --catalogers "+sbom-cataloger" +``` + +Use the default set of catalogers but remove any catalogers that deal with RPMs: +```bash +syft ... --catalogers "-rpm" +``` + +Only scan with catalogers that: +- are tagged with "go" +- always use the sbom-cataloger +- are appropriate for the source type (image or directory) + +```bash +syft --select-catalogers "go,+sbom-cataloger" +# results in the following catalogers being used: +# - go-module-binary-cataloger +# - sbom-cataloger +``` + +Only scan with all catalogers that deal with binary analysis: +```bash +syft ... --override-default-catalogers "binary" +# results in the following catalogers being used: +# - binary-cataloger +# - cargo-auditable-binary-cataloger +# - dotnet-portable-executable-cataloger +# - go-module-binary-cataloger +``` + +Only scan with the specific "go-module-binary-cataloger" and "go-module-file-cataloger" catalogers: +```bash +syft ... --override-default-catalogers "go-module-binary-cataloger,go-module-file-cataloger" +``` + ### Excluding file paths @@ -468,21 +513,18 @@ output: "syft-table" # same as -q ; SYFT_QUIET env var quiet: false -# same as --file; write output report to a file (default is to write to stdout) -file: "" - # enable/disable checking for application updates on startup # same as SYFT_CHECK_FOR_APP_UPDATE env var check-for-app-update: true -# allows users to specify which image source should be used to generate the sbom -# valid values are: registry, docker, podman -default-image-pull-source: "" +# maximum number of workers used to process the list of package catalogers in parallel +parallelism: 1 # a list of globs to exclude from scanning. same as --exclude ; for example: # exclude: # - "/etc/**" # - "./out/**/*.json" +# same as --exclude ; SYFT_EXCLUDE env var exclude: [] # allows users to exclude synthetic binary packages from the sbom @@ -493,6 +535,10 @@ exclude-binary-overlap-by-ownership: true # same as --platform; SYFT_PLATFORM env var platform: "" +# the search space to look for file and package data (options: all-layers, squashed) +# SYFT_SCOPE env var +scope: "squashed" + # set the list of package catalogers to use when generating the SBOM # default = empty (cataloger set determined automatically by the source type [image or file/directory]) # catalogers: @@ -533,7 +579,6 @@ platform: "" # - spm-cataloger catalogers: - # all format configuration format: @@ -586,9 +631,36 @@ format: pretty: false +file: + + metadata: + # select which files should be captured by the file-metadata cataloger and included in the SBOM. + # Options include: + # - "all-files": capture all files from the search space + # - "owned-files": capture only files owned by packages + # - "unowned-files": capture only files not owned by packages + # - "no-files", "": do not capture any files + # SYFT_FILE_METADATA_SELECTION env var + selection: "owned-files" + + # the file digest algorithms to use when cataloging files (options: "md5", "sha1", "sha224", "sha256", "sha384", "sha512") + # SYFT_FILE_METADATA_DIGESTS env var + digests: ["sha256"] + + # capture the contents of select files in the SBOM + content: + # skip searching a file entirely if it is above the given size (default = 1MB; unit = bytes) + # SYFT_FILE_CONTENT_SKIP_FILES_ABOVE_SIZE env var + skip-files-above-size: 1048576 + + # file globs for the cataloger to match on + # SYFT_FILE_CONTENT_GLOBS env var + globs: [] + + # cataloging packages is exposed through the packages and power-user subcommands package: - + # search within archives that do contain a file index to search against (zip) # note: for now this only applies to the java package cataloger # SYFT_PACKAGE_SEARCH_INDEXED_ARCHIVES env var @@ -600,14 +672,10 @@ package: # SYFT_PACKAGE_SEARCH_UNINDEXED_ARCHIVES env var search-unindexed-archives: false - cataloger: - # enable/disable cataloging of packages - # SYFT_PACKAGE_CATALOGER_ENABLED env var - enabled: true + # allows users to exclude synthetic binary packages from the sbom + # these packages are removed if an overlap with a non-synthetic package is found + exclude-binary-overlap-by-ownership: true - # the search space to look for packages (options: all-layers, squashed) - # same as -s ; SYFT_PACKAGE_CATALOGER_SCOPE env var - scope: "squashed" golang: # search for go package licences in the GOPATH of the system running Syft, note that this is outside the @@ -657,45 +725,11 @@ python: guess-unpinned-requirements: false javascript: - search-remote-licenses: false - npm-base-url: "https://registry.npmjs.org" - -file-contents: - cataloger: - # enable/disable cataloging of file contents - # SYFT_FILE_CONTENTS_CATALOGER_ENABLED env var - enabled: true + search-remote-licenses: false + npm-base-url: "https://registry.npmjs.org" - # the search space to look for file contents (options: all-layers, squashed) - # SYFT_FILE_CONTENTS_CATALOGER_SCOPE env var - scope: "squashed" - # skip searching a file entirely if it is above the given size (default = 1MB; unit = bytes) - # SYFT_FILE_CONTENTS_SKIP_FILES_ABOVE_SIZE env var - skip-files-above-size: 1048576 - - # file globs for the cataloger to match on - # SYFT_FILE_CONTENTS_GLOBS env var - globs: [] - -file-metadata: - cataloger: - # enable/disable cataloging of file metadata - # SYFT_FILE_METADATA_CATALOGER_ENABLED env var - enabled: true - - # the search space to look for file metadata (options: all-layers, squashed) - # SYFT_FILE_METADATA_CATALOGER_SCOPE env var - scope: "squashed" - - # the file digest algorithms to use when cataloging files (options: "md5", "sha1", "sha224", "sha256", "sha384", "sha512") - # SYFT_FILE_METADATA_DIGESTS env var - digests: ["sha256"] - -# maximum number of workers used to process the list of package catalogers in parallel -parallelism: 1 - -# options that apply to all scan sources +# configuration for the source that the SBOM is generated from (e.g. a file, directory, or container image) source: # alias name for the source # SYFT_SOURCE_NAME env var; --source-name flag @@ -710,6 +744,13 @@ source: # the file digest algorithms to use on the scanned file (options: "md5", "sha1", "sha224", "sha256", "sha384", "sha512") digests: ["sha256"] + image: + + # allows users to specify which image source should be used to generate the sbom + # valid values are: registry, docker, podman + default-pull-source: "" + + # options when pulling directly from a registry via the "registry:" or "containerd:" scheme registry: # skip TLS verification when communicating with the registry