Skip to content

Commit

Permalink
SyftCLIScanner: support SBOM generation with syft CLI
Browse files Browse the repository at this point in the history
Packit currently supports SBOM generation with syft tooling by utilizing
syft's go library. This has caused packit maintainers significant
maintainence burden. This commit adds a mechanism for buildpack authors
to utlize the syft CLI instead to generate SBOM. The intention here is
that with widespread adoption of this, we can phase out the codebase
that uses the syft go libary and thereby relieve the maintainers of this
pain.

Until recently, syft did not allow consumers to specify the exact schema
version of an SBOM mediatype they want generated (the tooling currently
supports passing a version for CycloneDX and SPDX -
github.com/anchore/syft/issues/846#issuecomment-1908676454). So packit
was forced to vendor-in (copy) large chunks of upstream syft go code
into packit in order to pin SBOM mediatype versions to versions that
most consumers wanted to use. Everytime a new version of Syft comes out,
maintainers had to painfully update the vendored-in code to work with
upstream syft components (e.g.
github.com//pull/491).

Furthermore, it is advantageous to use the syft CLI instead of syft go
library for multiple reasons. With CLI, we can delegate the entire SBOM
generation mechanism easily to syft. The CLI tool is well documented and
widely used in the community, and it seems like the syft project is
developed with with a CLI-first approach. The caveat here is that
buildpack authors who use this method should include the Paketo Syft
buildpack in their buildplan to have access to the CLI during the build
phase.

Example usage:

\# detect
\# unless BP_DISABLE_BOM is true
requirements = append(requirements, packit.BuildPlanRequirement{
                Name: "syft",
                Metadata: map[string]interface{}{
                        "build": true,
                },
})

\# build
syftCLIScanner := sbom.NewSyftCLIScanner(
		pexec.NewExecutable("syft"),
		scribe.NewEmitter(os.Stdout),
)

\# To scan a layer after installing a dependency
_ = syftCLIScanner.GenerateSBOM(myLayer.Path,
	context.Layers.Path,
	myLayer.Name,
	context.BuildpackInfo.SBOMFormats...,
)

\# OR to scan the workspace dir after running a process
_ = syftCLIScanner.GenerateSBOM(context.WorkingDir,
	context.Layers.Path,
	myLayer.Name,
	context.BuildpackInfo.SBOMFormats...,
)

- I have not implemented pretty-fication of SBOM that the codepath that
  use syft go lib implements. This seems to be adding bloat to the app
  image and not supported via CLI. Consumers of SBOM can easily prettify
  the SBOM JSONs.
- In the codepath that use the syft go lib, license information is
  manually injected from buildpack.toml data into the SBOM. This is not
  available with the SyftCLIScanner. I couldn't find any reasoning for
  why this was done in the first place.
- I have intentionally not reused some code in methods that's mixed up
  with the syft go library with an intention to easily phase out that
  codebase in the near future.
  • Loading branch information
arjun024 committed Sep 14, 2024
1 parent 884a7b7 commit 0ec15df
Show file tree
Hide file tree
Showing 6 changed files with 687 additions and 4 deletions.
32 changes: 32 additions & 0 deletions sbom/fakes/executable.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package fakes

import (
"sync"

"github.com/paketo-buildpacks/packit/v2/pexec"
)

type Executable struct {
ExecuteCall struct {
mutex sync.Mutex
CallCount int
Receives struct {
Execution pexec.Execution
}
Returns struct {
Err error
}
Stub func(pexec.Execution) error
}
}

func (f *Executable) Execute(param1 pexec.Execution) error {
f.ExecuteCall.mutex.Lock()
defer f.ExecuteCall.mutex.Unlock()
f.ExecuteCall.CallCount++
f.ExecuteCall.Receives.Execution = param1
if f.ExecuteCall.Stub != nil {
return f.ExecuteCall.Stub(param1)
}
return f.ExecuteCall.Returns.Err
}
35 changes: 31 additions & 4 deletions sbom/formats.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
package sbom

import (
"fmt"
"mime"
"strings"
)

const (
CycloneDXFormat = "application/vnd.cyclonedx+json"
SPDXFormat = "application/spdx+json"
Expand All @@ -10,15 +16,36 @@ const (
type Format string

// Extension outputs the expected file extension for a given Format.
// packit allows CycloneDX and SPDX mediatypes to have an optional
// version suffix. e.g. "application/vnd.cyclonedx+json;version=1.4"
// The version suffix is not allowed for the syft mediatype as the
// syft tooling does not support providing a version for this mediatype.
func (f Format) Extension() string {
switch f {
case CycloneDXFormat:
switch {
case strings.HasPrefix(string(f), CycloneDXFormat):
return "cdx.json"
case SPDXFormat:
case strings.HasPrefix(string(f), SPDXFormat):
return "spdx.json"
case SyftFormat:
case f == SyftFormat:
return "syft.json"
default:
return ""
}
}

// Extracts optional version. This usually derives from the "sbom-formats"
// field used by packit-based buildpacks (@packit.SBOMFormats). e.g.
// "application/vnd.cyclonedx+json;version=1.4" -> "1.4" See
// github.com/paketo-buildpacks/packit/issues/302
func (f Format) VersionParam() (string, error) {
_, params, err := mime.ParseMediaType(string(f))
if err != nil {
return "", fmt.Errorf("failed to parse SBOM mediatype. Expected <mediatype>[;version=<ver>], Got %s: %w", f, err)
}

version, ok := params["version"]
if !ok {
return "", nil
}
return version, nil
}
39 changes: 39 additions & 0 deletions sbom/formats_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package sbom_test

import (
"testing"

"github.com/paketo-buildpacks/packit/v2/sbom"
"github.com/sclevine/spec"

. "github.com/onsi/gomega"
)

func testFormats(t *testing.T, context spec.G, it spec.S) {
var Expect = NewWithT(t).Expect
var f sbom.Format

context("Formats", func() {
context("no version param", func() {
it("gets the right mediatype extension and version", func() {
f = sbom.CycloneDXFormat
Expect(f.Extension()).To(Equal("cdx.json"))
Expect(f.VersionParam()).To(Equal(""))
})
})

context("with version param", func() {
it("gets the right mediatype extension and version", func() {
f = sbom.SPDXFormat + ";version=9.8.7"
Expect(f.Extension()).To(Equal("spdx.json"))
Expect(f.VersionParam()).To(Equal("9.8.7"))
})
context("Syft mediatype with version returns empty", func() {
it("returns empty", func() {
f = sbom.SyftFormat + ";version=9.8.7"
Expect(f.Extension()).To(Equal(""))
})
})
})
})
}
2 changes: 2 additions & 0 deletions sbom/init_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ func TestUnitSBOM(t *testing.T) {
format.MaxLength = 0

suite := spec.New("sbom", spec.Report(report.Terminal{}))
suite("Formats", testFormats)
suite("Formatter", testFormatter)
suite("FormattedReader", testFormattedReader)
suite("SBOM", testSBOM)
suite("SyftCLIScanner", testSyftCLIScanner)
suite.Run(t)
}
238 changes: 238 additions & 0 deletions sbom/syft_cli_scanner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
package sbom

import (
"encoding/json"
"fmt"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"time"

"github.com/google/uuid"
"github.com/paketo-buildpacks/packit/v2/pexec"
"github.com/paketo-buildpacks/packit/v2/scribe"
)

//go:generate faux --interface Executable --output fakes/executable.go
type Executable interface {
Execute(pexec.Execution) (err error)
}

// SyftCLIScanner implements scanning a dir using the `syft` CLI
// to generate SBOM, process it, and write it to a location that complies with
// the buildpacks spec. Supports CycloneDX, SPDX and Syft mediatypes, with an
// optional version param for CycloneDX and Syft.
//
// Example Usage:
//
// syftCLIScanner := sbom.NewSyftCLIScanner(
// pexec.NewExecutable("syft"),
// scribe.NewEmitter(os.Stdout),
// )
type SyftCLIScanner struct {
syftCLI Executable
logger scribe.Emitter
}

func NewSyftCLIScanner(syftCLI Executable, logger scribe.Emitter) SyftCLIScanner {
return SyftCLIScanner{
syftCLI: syftCLI,
logger: logger,
}
}

// Generate takes a path to a directory to scan and a list of SBOM mediatypes
// (with an optional version for CycloneDX and SPDX), and invokes the syft CLI
// scan command. The CLI is instructed to write the SBOM to
// <layers>/<layer>.sbom.<ext> as defined by the buildpack spec. Additionally,
// CycloneDX & SPDX outputs are modified to make the output reproducible
// (Paketo RFCs 38 & 49).
func (s SyftCLIScanner) GenerateSBOM(scanDir, layersPath, layerName string, mediaTypes ...string) error {
sbomWritePaths := make(map[string]string)
args := []string{"scan", "--quiet"}

s.logger.Debug.Process("Generating SBOM")
s.logger.Debug.Subprocess("Generating syft CLI args from provided mediatypes %s", mediaTypes)
for _, mediatype := range mediaTypes {
syftOutputFormat, err := s.specMediatypeToSyftOutputFormat(mediatype)
if err != nil {
return fmt.Errorf("failed to convert mediatype %s to syft output format: %w", mediatype, err)
}

extension := Format(mediatype).Extension()
if extension == "" {
return fmt.Errorf("invalid mediatype %s provided", mediatype)
}

// Layer SBOM write location during build is <layers>/<layer>.sbom.<ext> (CNB spec)
sbomWritePaths[mediatype] = filepath.Join(layersPath, fmt.Sprintf("%s.sbom.%s", layerName, extension))
args = append(args, "--output", fmt.Sprintf("%s=%s", syftOutputFormat, sbomWritePaths[mediatype]))
}

args = append(args, fmt.Sprintf("dir:%s", scanDir))

s.logger.Debug.Subprocess("Executing syft CLI with args %v", args)
if err := s.syftCLI.Execute(pexec.Execution{
Args: args,
Dir: scanDir,
Stdout: s.logger.ActionWriter,
Stderr: s.logger.ActionWriter,
}); err != nil {
return fmt.Errorf("failed to execute syft cli with args '%s': %w.\nYou might be missing a buildpack that provides the syft CLI", args, err)
}

// Make SBOM outputs reproducible
for _, mediatype := range mediaTypes {
if strings.HasPrefix(mediatype, CycloneDXFormat) {
s.logger.Debug.Subprocess("Processing syft CLI CycloneDX SBOM output to make it reproducible")
err := s.makeCycloneDXReproducible(sbomWritePaths[mediatype])
if err != nil {
return fmt.Errorf("failed to make CycloneDX SBOM reproducible: %w", err)
}
} else if strings.HasPrefix(mediatype, SPDXFormat) {
s.logger.Debug.Subprocess("Processing syft CLI SPDX SBOM output to make it reproducible")
err := s.makeSPDXReproducible(sbomWritePaths[mediatype])
if err != nil {
return fmt.Errorf("failed to make SPDX SBOM reproducible: %w", err)
}
}
}

s.logger.Debug.Break()
return nil
}

// This method takes an SBOM mediatype name as defined by the buildpack spec,
// (with an optional version param for CycloneDX and SPDX, e.g.
// "application/vnd.cyclonedx+json;version=1.4") and returns the output format
// understood by syft tooling (e.g. "cyclonedx-json@1.4").
// Refer github.com/anchore/syft/blob/v1.11.1/cmd/syft/internal/options/writer.go#L86
func (s SyftCLIScanner) specMediatypeToSyftOutputFormat(mediatype string) (string, error) {
optionalVersionParam, err := Format(mediatype).VersionParam()
if err != nil {
return "", err
}
if optionalVersionParam != "" {
optionalVersionParam = "@" + optionalVersionParam
}

switch {
case strings.HasPrefix(mediatype, CycloneDXFormat):
return "cyclonedx-json" + optionalVersionParam, nil
case strings.HasPrefix(mediatype, SPDXFormat):
return "spdx-json" + optionalVersionParam, nil
case strings.HasPrefix(mediatype, SyftFormat):
// The syft tool does not support providing a version for the syft mediatype.
if optionalVersionParam != "" {
return "", fmt.Errorf("The syft mediatype does not allow providing a ;version=<ver> param. Got: %s", mediatype)
}
return "syft-json", nil
default:
return "", fmt.Errorf("mediatype %s matched none of the known mediatypes. Valid values are %s, with an optional version param for CycloneDX and SPDX", mediatype, []string{CycloneDXFormat, SPDXFormat, SyftFormat})
}
}

// Makes CycloneDX SBOM more reproducible.
// Remove fields serialNumber and metadata.timestamp.
// See https://github.com/paketo-buildpacks/rfcs/blob/main/text/0038-cdx-syft-sbom.md#amendment-sbom-reproducibility
func (s SyftCLIScanner) makeCycloneDXReproducible(path string) error {
in, err := os.Open(path)
if err != nil {
return fmt.Errorf("unable to read CycloneDX JSON file %s:%w", path, err)
}
defer in.Close()

input := map[string]interface{}{}
if err := json.NewDecoder(in).Decode(&input); err != nil {
return fmt.Errorf("unable to decode CycloneDX JSON %s: %w", path, err)
}

delete(input, "serialNumber")

if md, exists := input["metadata"]; exists {
if metadata, ok := md.(map[string]interface{}); ok {
delete(metadata, "timestamp")
}
}

out, err := os.Create(path)
if err != nil {
return fmt.Errorf("unable to open CycloneDX JSON for writing %s: %w", path, err)
}
defer out.Close()

if err := json.NewEncoder(out).Encode(input); err != nil {
return fmt.Errorf("unable to encode CycloneDX: %w", err)
}

return nil
}

// Makes SPDX SBOM more reproducible.
// Ensure documentNamespace and creationInfo.created have reproducible values.
// The method respects $SOURCE_DATE_EPOCH for created timestamp if set.
// See github.com/paketo-buildpacks/rfcs/blob/main/text/0049-reproducible-spdx.md
func (s SyftCLIScanner) makeSPDXReproducible(path string) error {
in, err := os.Open(path)
if err != nil {
return fmt.Errorf("unable to read SPDX JSON file %s:%w", path, err)
}
defer in.Close()

input := map[string]interface{}{}
if err := json.NewDecoder(in).Decode(&input); err != nil {
return fmt.Errorf("unable to decode SPDX JSON %s: %w", path, err)
}

// Makes the creationInfo reproducible so a hash can be taken for the
// documentNamespace
if creationInfo, ok := input["creationInfo"].(map[string]interface{}); ok {
creationInfo["created"] = time.Time{} // This is the zero-valued time

sourceDateEpoch := os.Getenv("SOURCE_DATE_EPOCH")
if sourceDateEpoch != "" {
sde, err := strconv.ParseInt(sourceDateEpoch, 10, 64)
if err != nil {
return fmt.Errorf("failed to parse SOURCE_DATE_EPOCH: %w", err)
}
creationInfo["created"] = time.Unix(sde, 0).UTC()
}
input["creationInfo"] = creationInfo
}

if namespace, ok := input["documentNamespace"].(string); ok {
delete(input, "documentNamespace")

data, err := json.Marshal(input)
if err != nil {
return fmt.Errorf("failed to checksum SPDX document: %w", err)
}

uri, err := url.Parse(namespace)
if err != nil {
return fmt.Errorf("failed to parse SPDX documentNamespace url: %w", err)
}

uri.Host = "paketo.io"
uri.Path = strings.Replace(uri.Path, "syft", "packit", 1)
oldBase := filepath.Base(uri.Path)
source, _, _ := strings.Cut(oldBase, "-")
newBase := fmt.Sprintf("%s-%s", source, uuid.NewSHA1(uuid.NameSpaceURL, data))
uri.Path = strings.Replace(uri.Path, oldBase, newBase, 1)

input["documentNamespace"] = uri.String()
}

out, err := os.Create(path)
if err != nil {
return fmt.Errorf("unable to open SPDX JSON for writing %s: %w", path, err)
}
defer out.Close()

if err := json.NewEncoder(out).Encode(input); err != nil {
return fmt.Errorf("unable to encode SPDX: %w", err)
}
return nil
}
Loading

0 comments on commit 0ec15df

Please sign in to comment.