Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add R cataloger #1790

Merged
merged 8 commits into from
May 10, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions syft/formats/common/spdxhelpers/source_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ func SourceInfo(p pkg.Package) string {
answer = "acquired package info from linux kernel module files"
case pkg.NixPkg:
answer = "acquired package info from nix store path"
case pkg.Rpkg:
answer = "acquired package info from R-package DESCRIPTION file"
default:
answer = "acquired package info from the following paths"
}
Expand Down
8 changes: 8 additions & 0 deletions syft/formats/common/spdxhelpers/source_info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,14 @@ func Test_SourceInfo(t *testing.T) {
"from nix store path",
},
},
{
input: pkg.Package{
Type: pkg.Rpkg,
},
expected: []string{
"acquired package info from R-package DESCRIPTION file",
},
},
}
var pkgTypes []pkg.Type
for _, test := range tests {
Expand Down
3 changes: 3 additions & 0 deletions syft/pkg/cataloger/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/php"
"github.com/anchore/syft/syft/pkg/cataloger/portage"
"github.com/anchore/syft/syft/pkg/cataloger/python"
"github.com/anchore/syft/syft/pkg/cataloger/r"
"github.com/anchore/syft/syft/pkg/cataloger/rpm"
"github.com/anchore/syft/syft/pkg/cataloger/ruby"
"github.com/anchore/syft/syft/pkg/cataloger/rust"
Expand All @@ -53,6 +54,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
php.NewComposerInstalledCataloger(),
portage.NewPortageCataloger(),
python.NewPythonPackageCataloger(),
r.NewPackageCataloger(),
rpm.NewRpmDBCataloger(),
ruby.NewGemSpecCataloger(),
sbom.NewSBOMCataloger(),
Expand Down Expand Up @@ -121,6 +123,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
portage.NewPortageCataloger(),
python.NewPythonIndexCataloger(),
python.NewPythonPackageCataloger(),
r.NewPackageCataloger(),
rpm.NewFileCataloger(),
rpm.NewRpmDBCataloger(),
ruby.NewGemFileLockCataloger(),
Expand Down
13 changes: 13 additions & 0 deletions syft/pkg/cataloger/r/cataloger.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package r

import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "r-package-cataloger"

// NewPackageCataloger returns a new R cataloger object based on detection of R package DESCRIPTION files.
func NewPackageCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
WithParserByGlobs(parseDescriptionFile, "**/DESCRIPTION")
}
60 changes: 60 additions & 0 deletions syft/pkg/cataloger/r/cataloger_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package r

import (
"testing"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
"github.com/anchore/syft/syft/source"
)

func TestRPackageCataloger(t *testing.T) {
expectedPkgs := []pkg.Package{
{
Name: "base",
Version: "4.3.0",
FoundBy: "r-package-cataloger",
Locations: source.NewLocationSet(source.NewLocation("base/DESCRIPTION")),
Licenses: []string{"Part of R 4.3.0"},
Language: "R",
Type: "R-package",
willmurphyscode marked this conversation as resolved.
Show resolved Hide resolved
PURL: "pkg:cran/base@4.3.0",
MetadataType: "RDescriptionFileMetadataType",
Metadata: pkg.RDescriptionFileMetadata{
Title: "The R Base Package",
Description: "Base R functions.",
Author: "R Core Team and contributors worldwide",
Maintainer: "R Core Team <do-use-Contact-address@r-project.org>",
Built: "R 4.3.0; ; 2023-04-21 11:33:09 UTC; unix",
Suggests: []string{"methods"},
},
},
{
Name: "stringr",
Version: "1.5.0.9000",
FoundBy: "r-package-cataloger",
Locations: source.NewLocationSet(source.NewLocation("stringr/DESCRIPTION")),
Licenses: []string{"MIT + file LICENSE"},
Language: "R",
Type: "R-package",
PURL: "pkg:cran/stringr@1.5.0.9000",
MetadataType: "RDescriptionFileMetadataType",
Metadata: pkg.RDescriptionFileMetadata{
Title: "Simple, Consistent Wrappers for Common String Operations",
Description: "A consistent, simple and easy to use set of wrappers around the fantastic 'stringi' package. All function and argument names (and positions) are consistent, all functions deal with \"NA\"'s and zero length vectors in the same way, and the output from one function is easy to feed into the input of another.",
URL: []string{"https://stringr.tidyverse.org", "https://github.com/tidyverse/stringr"},
Imports: []string{
"cli", "glue (>= 1.6.1)", "lifecycle (>= 1.0.3)", "magrittr",
"rlang (>= 1.0.0)", "stringi (>= 1.5.3)", "vctrs (>= 0.4.0)",
},
Depends: []string{"R (>= 3.3)"},
Suggests: []string{"covr", "dplyr", "gt", "htmltools", "htmlwidgets", "knitr", "rmarkdown", "testthat (>= 3.0.0)", "tibble"},
},
},
}
// TODO: relationships are not under test yet
var expectedRelationships []artifact.Relationship

pkgtest.NewCatalogTester().FromDirectory(t, "test-fixtures/installed").Expects(expectedPkgs, expectedRelationships).TestCataloger(t, NewPackageCataloger())
}
38 changes: 38 additions & 0 deletions syft/pkg/cataloger/r/package.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package r

import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)

func newPackage(pd parseData, locations ...source.Location) pkg.Package {
locationSet := source.NewLocationSet()
for _, loc := range locations {
locationSet.Add(loc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation))
}
result := pkg.Package{
Name: pd.Package,
Version: pd.Version,
FoundBy: catalogerName,
willmurphyscode marked this conversation as resolved.
Show resolved Hide resolved
Locations: locationSet,
Licenses: []string{pd.License},
Language: "R",
willmurphyscode marked this conversation as resolved.
Show resolved Hide resolved
Type: pkg.Rpkg,
PURL: packageURL(pd),
MetadataType: pkg.RDescriptionFileMetadataType,
Metadata: pd.RDescriptionFileMetadata,
}

result.Language = "R"
willmurphyscode marked this conversation as resolved.
Show resolved Hide resolved
result.FoundBy = catalogerName

result.Licenses = []string{pd.License}
result.Version = pd.Version
result.SetID()
return result
}

func packageURL(m parseData) string {
return packageurl.NewPackageURL("cran", "", m.Package, m.Version, nil, "").ToString()
}
134 changes: 134 additions & 0 deletions syft/pkg/cataloger/r/parse_description.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package r

import (
"bufio"
"io"
"regexp"
"strings"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)

/* some examples of license strings found in DESCRIPTION files:
find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'License:' | sort | uniq
License: GPL
License: GPL (>= 2)
License: GPL (>=2)
License: GPL(>=2)
License: GPL (>= 2) | file LICENCE
License: GPL-2 | GPL-3
License: GPL-3
License: LGPL (>= 2)
License: LGPL (>= 2.1)
License: MIT + file LICENSE
License: Part of R 4.3.0
License: Unlimited
*/

func parseDescriptionFile(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
values := extractFieldsFromDescriptionFile(reader)
m := parseDataFromDescriptionMap(values)
return []pkg.Package{newPackage(m, []source.Location{reader.Location}...)}, nil, nil
willmurphyscode marked this conversation as resolved.
Show resolved Hide resolved
}

type parseData struct {
Package string
Version string
License string
pkg.RDescriptionFileMetadata
}

func parseDataFromDescriptionMap(values map[string]string) parseData {
return parseData{
License: values["License"],
Package: values["Package"],
Version: values["Version"],
RDescriptionFileMetadata: pkg.RDescriptionFileMetadata{
Title: values["Title"],
Description: cleanMultiLineValue(values["Description"]),
Maintainer: values["Maintainer"],
URL: commaSeparatedList(values["URL"]),
Depends: commaSeparatedList(values["Depends"]),
Imports: commaSeparatedList(values["Imports"]),
Suggests: commaSeparatedList(values["Suggests"]),
NeedsCompilation: yesNoToBool(values["NeedsCompilation"]),
Author: values["Author"],
Repository: values["Repository"],
Built: values["Built"],
},
}
}

func yesNoToBool(s string) bool {
return strings.EqualFold(s, "yes")
willmurphyscode marked this conversation as resolved.
Show resolved Hide resolved
}

func commaSeparatedList(s string) []string {
var result []string
split := strings.Split(s, ",")
for _, piece := range split {
value := strings.TrimSpace(piece)
if value == "" {
continue
}
result = append(result, value)
}
return result
}

var space = regexp.MustCompile(`\s+`)

func cleanMultiLineValue(s string) string {
return space.ReplaceAllString(s, " ")
}

func extractFieldsFromDescriptionFile(reader io.Reader) map[string]string {
result := make(map[string]string)
key := ""
var valueFragment strings.Builder
scanner := bufio.NewScanner(reader)

for scanner.Scan() {
line := scanner.Text()
// line is like Key: Value -> start capturing value; close out previous value
// line is like \t\t continued value -> append to existing value
if len(line) == 0 {
continue
}
if startsWithWhitespace(line) {
// we're continuing a value
if key == "" {
continue
}
valueFragment.WriteByte('\n')
valueFragment.WriteString(strings.TrimSpace(line))
} else {
if key != "" {
// capture previous value
result[key] = valueFragment.String()
key = ""
valueFragment = strings.Builder{}
}
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
key = parts[0]
valueFragment.WriteString(strings.TrimSpace(parts[1]))
}
}
if key != "" {
result[key] = valueFragment.String()
}
return result
}

func startsWithWhitespace(s string) bool {
if s == "" {
return false
}
return s[0] == ' ' || s[0] == '\t'
}
73 changes: 73 additions & 0 deletions syft/pkg/cataloger/r/parse_description_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package r

import (
"os"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func Test_extractFieldsFromDescriptionFile(t *testing.T) {
tests := []struct {
name string
fixture string
want map[string]string
}{
{
name: "go case",
fixture: "test-fixtures/map-parse/simple",
want: map[string]string{
"Package": "base",
"Version": "4.3.0",
"Suggests": "methods",
"Built": "R 4.3.0; ; 2023-04-21 11:33:09 UTC; unix",
},
},
{
name: "bad cases",
fixture: "test-fixtures/map-parse/bad",
want: map[string]string{
"Key": "",
"Whitespace": "",
},
},
{
name: "multiline key-value",
fixture: "test-fixtures/map-parse/multiline",
want: map[string]string{
"Description": `A consistent, simple and easy to use set of wrappers around
the fantastic 'stringi' package. All function and argument names (and
positions) are consistent, all functions deal with "NA"'s and zero
length vectors in the same way, and the output from one function is
easy to feed into the input of another.`,
"License": "MIT + file LICENSE",
"Key": "value",
},
},
{
name: "eof multiline",
fixture: "test-fixtures/map-parse/eof-multiline",
want: map[string]string{
"License": "MIT + file LICENSE",
"Description": `A consistent, simple and easy to use set of wrappers around
the fantastic 'stringi' package. All function and argument names (and
positions) are consistent, all functions deal with "NA"'s and zero
length vectors in the same way, and the output from one function is
easy to feed into the input of another.`,
},
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
file, err := os.Open(test.fixture)
require.NoError(t, err)

result := extractFieldsFromDescriptionFile(file)

assert.Equal(t, test.want, result)
})
}

}
Loading