Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add R cataloger #1790

Merged
merged 8 commits into from
May 10, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions syft/formats/common/spdxhelpers/source_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ func SourceInfo(p pkg.Package) string {
answer = "acquired package info from linux kernel module files"
case pkg.NixPkg:
answer = "acquired package info from nix store path"
case pkg.Rpkg:
answer = "acquired package info from R-package DESCRIPTION file"
default:
answer = "acquired package info from the following paths"
}
Expand Down
8 changes: 8 additions & 0 deletions syft/formats/common/spdxhelpers/source_info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,14 @@ func Test_SourceInfo(t *testing.T) {
"from nix store path",
},
},
{
input: pkg.Package{
Type: pkg.Rpkg,
},
expected: []string{
"acquired package info from R-package DESCRIPTION file",
},
},
}
var pkgTypes []pkg.Type
for _, test := range tests {
Expand Down
3 changes: 3 additions & 0 deletions syft/pkg/cataloger/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/php"
"github.com/anchore/syft/syft/pkg/cataloger/portage"
"github.com/anchore/syft/syft/pkg/cataloger/python"
"github.com/anchore/syft/syft/pkg/cataloger/r"
"github.com/anchore/syft/syft/pkg/cataloger/rpm"
"github.com/anchore/syft/syft/pkg/cataloger/ruby"
"github.com/anchore/syft/syft/pkg/cataloger/rust"
Expand All @@ -53,6 +54,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
php.NewComposerInstalledCataloger(),
portage.NewPortageCataloger(),
python.NewPythonPackageCataloger(),
r.NewPackageCataloger(),
rpm.NewRpmDBCataloger(),
ruby.NewGemSpecCataloger(),
sbom.NewSBOMCataloger(),
Expand Down Expand Up @@ -121,6 +123,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
portage.NewPortageCataloger(),
python.NewPythonIndexCataloger(),
python.NewPythonPackageCataloger(),
r.NewPackageCataloger(),
rpm.NewFileCataloger(),
rpm.NewRpmDBCataloger(),
ruby.NewGemFileLockCataloger(),
Expand Down
13 changes: 13 additions & 0 deletions syft/pkg/cataloger/r/cataloger.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package r

import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "r-package-cataloger"

// NewPackageCataloger returns a new R cataloger object based on detection of R package DESCRIPTION files.
func NewPackageCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
WithParserByGlobs(parseDescriptionFile, "**/DESCRIPTION")
}
60 changes: 60 additions & 0 deletions syft/pkg/cataloger/r/cataloger_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package r

import (
"testing"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
"github.com/anchore/syft/syft/source"
)

func TestRPackageCataloger(t *testing.T) {
expectedPkgs := []pkg.Package{
{
Name: "base",
Version: "4.3.0",
FoundBy: "r-package-cataloger",
Locations: source.NewLocationSet(source.NewLocation("base/DESCRIPTION")),
Licenses: []string{"Part of R 4.3.0"},
Language: pkg.R,
Type: pkg.Rpkg,
PURL: "pkg:cran/base@4.3.0",
MetadataType: pkg.RDescriptionFileMetadataType,
Metadata: pkg.RDescriptionFileMetadata{
Title: "The R Base Package",
Description: "Base R functions.",
Author: "R Core Team and contributors worldwide",
Maintainer: "R Core Team <do-use-Contact-address@r-project.org>",
Built: "R 4.3.0; ; 2023-04-21 11:33:09 UTC; unix",
Suggests: []string{"methods"},
},
},
{
Name: "stringr",
Version: "1.5.0.9000",
FoundBy: "r-package-cataloger",
Locations: source.NewLocationSet(source.NewLocation("stringr/DESCRIPTION")),
Licenses: []string{"MIT + file LICENSE"},
Language: pkg.R,
Type: pkg.Rpkg,
PURL: "pkg:cran/stringr@1.5.0.9000",
MetadataType: pkg.RDescriptionFileMetadataType,
Metadata: pkg.RDescriptionFileMetadata{
Title: "Simple, Consistent Wrappers for Common String Operations",
Description: "A consistent, simple and easy to use set of wrappers around the fantastic 'stringi' package. All function and argument names (and positions) are consistent, all functions deal with \"NA\"'s and zero length vectors in the same way, and the output from one function is easy to feed into the input of another.",
URL: []string{"https://stringr.tidyverse.org", "https://github.com/tidyverse/stringr"},
Imports: []string{
"cli", "glue (>= 1.6.1)", "lifecycle (>= 1.0.3)", "magrittr",
"rlang (>= 1.0.0)", "stringi (>= 1.5.3)", "vctrs (>= 0.4.0)",
},
Depends: []string{"R (>= 3.3)"},
Suggests: []string{"covr", "dplyr", "gt", "htmltools", "htmlwidgets", "knitr", "rmarkdown", "testthat (>= 3.0.0)", "tibble"},
},
},
}
// TODO: relationships are not under test yet
var expectedRelationships []artifact.Relationship

pkgtest.NewCatalogTester().FromDirectory(t, "test-fixtures/installed").Expects(expectedPkgs, expectedRelationships).TestCataloger(t, NewPackageCataloger())
}
32 changes: 32 additions & 0 deletions syft/pkg/cataloger/r/package.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package r

import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)

func newPackage(pd parseData, locations ...source.Location) pkg.Package {
locationSet := source.NewLocationSet()
for _, loc := range locations {
locationSet.Add(loc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation))
}
result := pkg.Package{
Name: pd.Package,
Version: pd.Version,
Locations: locationSet,
Licenses: []string{pd.License},
Language: pkg.R,
Type: pkg.Rpkg,
PURL: packageURL(pd),
MetadataType: pkg.RDescriptionFileMetadataType,
Metadata: pd.RDescriptionFileMetadata,
}

result.SetID()
return result
}

func packageURL(m parseData) string {
return packageurl.NewPackageURL("cran", "", m.Package, m.Version, nil, "").ToString()
}
14 changes: 14 additions & 0 deletions syft/pkg/cataloger/r/package_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package r

import "testing"

func Test_newPackage(t *testing.T) {
testCases := []struct {
name string
}{}

for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
})
}
}
147 changes: 147 additions & 0 deletions syft/pkg/cataloger/r/parse_description.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package r

import (
"bufio"
"io"
"regexp"
"strings"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)

/* some examples of license strings found in DESCRIPTION files:
find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'License:' | sort | uniq
License: GPL
License: GPL (>= 2)
License: GPL (>=2)
License: GPL(>=2)
License: GPL (>= 2) | file LICENCE
License: GPL-2 | GPL-3
License: GPL-3
License: LGPL (>= 2)
License: LGPL (>= 2.1)
License: MIT + file LICENSE
License: Part of R 4.3.0
License: Unlimited
*/

func parseDescriptionFile(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
values := extractFieldsFromDescriptionFile(reader)
m := parseDataFromDescriptionMap(values)
p := newPackage(m, []source.Location{reader.Location}...)
if p.Name == "" || p.Version == "" {
return nil, nil, nil
}
return []pkg.Package{p}, nil, nil
}

type parseData struct {
Package string
Version string
License string
pkg.RDescriptionFileMetadata
}

func parseDataFromDescriptionMap(values map[string]string) parseData {
return parseData{
License: values["License"],
Package: values["Package"],
Version: values["Version"],
RDescriptionFileMetadata: pkg.RDescriptionFileMetadata{
Title: values["Title"],
Description: cleanMultiLineValue(values["Description"]),
Maintainer: values["Maintainer"],
URL: commaSeparatedList(values["URL"]),
Depends: commaSeparatedList(values["Depends"]),
Imports: commaSeparatedList(values["Imports"]),
Suggests: commaSeparatedList(values["Suggests"]),
NeedsCompilation: yesNoToBool(values["NeedsCompilation"]),
Author: values["Author"],
Repository: values["Repository"],
Built: values["Built"],
},
}
}

func yesNoToBool(s string) bool {
/*
$ docker run --rm -it rocker/r-ver bash
$ install2.r ggplot2 dplyr mlr3 caret # just some packages for a larger sample
$ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'NeedsCompilation:' | sort | uniq
NeedsCompilation: no
NeedsCompilation: yes
$ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'NeedsCompilation:' | wc -l
105
*/
return strings.EqualFold(s, "yes")
willmurphyscode marked this conversation as resolved.
Show resolved Hide resolved
}

func commaSeparatedList(s string) []string {
var result []string
split := strings.Split(s, ",")
for _, piece := range split {
value := strings.TrimSpace(piece)
if value == "" {
continue
}
result = append(result, value)
}
return result
}

var space = regexp.MustCompile(`\s+`)

func cleanMultiLineValue(s string) string {
return space.ReplaceAllString(s, " ")
}

func extractFieldsFromDescriptionFile(reader io.Reader) map[string]string {
result := make(map[string]string)
key := ""
var valueFragment strings.Builder
scanner := bufio.NewScanner(reader)

for scanner.Scan() {
line := scanner.Text()
// line is like Key: Value -> start capturing value; close out previous value
// line is like \t\t continued value -> append to existing value
if len(line) == 0 {
continue
}
if startsWithWhitespace(line) {
// we're continuing a value
if key == "" {
continue
}
valueFragment.WriteByte('\n')
valueFragment.WriteString(strings.TrimSpace(line))
} else {
if key != "" {
// capture previous value
result[key] = valueFragment.String()
key = ""
valueFragment = strings.Builder{}
}
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
key = parts[0]
valueFragment.WriteString(strings.TrimSpace(parts[1]))
}
}
if key != "" {
result[key] = valueFragment.String()
}
return result
}

func startsWithWhitespace(s string) bool {
if s == "" {
return false
}
return s[0] == ' ' || s[0] == '\t'
}
Loading