Skip to content

Commit

Permalink
Add R cataloger
Browse files Browse the repository at this point in the history
Add a cataloger that detects installed R packages by looking for DESCRIPTION files.

Signed-off-by: Will Murphy <will.murphy@anchore.com>
  • Loading branch information
willmurphyscode committed May 5, 2023
1 parent d63a1f5 commit 93d3c4a
Show file tree
Hide file tree
Showing 24 changed files with 522 additions and 0 deletions.
2 changes: 2 additions & 0 deletions syft/formats/common/spdxhelpers/source_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ func SourceInfo(p pkg.Package) string {
answer = "acquired package info from linux kernel module files"
case pkg.NixPkg:
answer = "acquired package info from nix store path"
case pkg.Rpkg:
answer = "acquired package info from R-package DESCRIPTION file"
default:
answer = "acquired package info from the following paths"
}
Expand Down
8 changes: 8 additions & 0 deletions syft/formats/common/spdxhelpers/source_info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,14 @@ func Test_SourceInfo(t *testing.T) {
"from nix store path",
},
},
{
input: pkg.Package{
Type: pkg.Rpkg,
},
expected: []string{
"acquired package info from R-package DESCRIPTION file",
},
},
}
var pkgTypes []pkg.Type
for _, test := range tests {
Expand Down
3 changes: 3 additions & 0 deletions syft/pkg/cataloger/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/php"
"github.com/anchore/syft/syft/pkg/cataloger/portage"
"github.com/anchore/syft/syft/pkg/cataloger/python"
"github.com/anchore/syft/syft/pkg/cataloger/r"
"github.com/anchore/syft/syft/pkg/cataloger/rpm"
"github.com/anchore/syft/syft/pkg/cataloger/ruby"
"github.com/anchore/syft/syft/pkg/cataloger/rust"
Expand All @@ -54,6 +55,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
php.NewComposerInstalledCataloger(),
portage.NewPortageCataloger(),
python.NewPythonPackageCataloger(),
r.NewPackageCataloger(),
rpm.NewRpmDBCataloger(),
ruby.NewGemSpecCataloger(),
sbom.NewSBOMCataloger(),
Expand Down Expand Up @@ -123,6 +125,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
portage.NewPortageCataloger(),
python.NewPythonIndexCataloger(),
python.NewPythonPackageCataloger(),
r.NewPackageCataloger(),
rpm.NewFileCataloger(),
rpm.NewRpmDBCataloger(),
ruby.NewGemFileLockCataloger(),
Expand Down
13 changes: 13 additions & 0 deletions syft/pkg/cataloger/r/cataloger.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package r

import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "r-package-cataloger"

// NewPackageCataloger returns a new R cataloger object based on detection of R package DESCRIPTION files.
func NewPackageCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
WithParserByGlobs(parseDescriptionFile, "**/DESCRIPTION")
}
60 changes: 60 additions & 0 deletions syft/pkg/cataloger/r/cataloger_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package r

import (
"testing"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
"github.com/anchore/syft/syft/source"
)

func TestRPackageCataloger(t *testing.T) {
expectedPkgs := []pkg.Package{
{
Name: "base",
Version: "4.3.0",
FoundBy: "r-package-cataloger",
Locations: source.NewLocationSet(source.NewLocation("base/DESCRIPTION")),
Licenses: []string{"Part of R 4.3.0"},
Language: "R",
Type: "R-package",
PURL: "pkg:cran/base@4.3.0",
MetadataType: "RDescriptionFileMetadataType",
Metadata: pkg.RDescriptionFileMetadata{
Title: "The R Base Package",
Description: "Base R functions.",
Author: "R Core Team and contributors worldwide",
Maintainer: "R Core Team <do-use-Contact-address@r-project.org>",
Built: "R 4.3.0; ; 2023-04-21 11:33:09 UTC; unix",
Suggests: []string{"methods"},
},
},
{
Name: "stringr",
Version: "1.5.0.9000",
FoundBy: "r-package-cataloger",
Locations: source.NewLocationSet(source.NewLocation("stringr/DESCRIPTION")),
Licenses: []string{"MIT + file LICENSE"},
Language: "R",
Type: "R-package",
PURL: "pkg:cran/stringr@1.5.0.9000",
MetadataType: "RDescriptionFileMetadataType",
Metadata: pkg.RDescriptionFileMetadata{
Title: "Simple, Consistent Wrappers for Common String Operations",
Description: "A consistent, simple and easy to use set of wrappers around the fantastic 'stringi' package. All function and argument names (and positions) are consistent, all functions deal with \"NA\"'s and zero length vectors in the same way, and the output from one function is easy to feed into the input of another.",
URL: []string{"https://stringr.tidyverse.org", "https://github.com/tidyverse/stringr"},
Imports: []string{
"cli", "glue (>= 1.6.1)", "lifecycle (>= 1.0.3)", "magrittr",
"rlang (>= 1.0.0)", "stringi (>= 1.5.3)", "vctrs (>= 0.4.0)",
},
Depends: []string{"R (>= 3.3)"},
Suggests: []string{"covr", "dplyr", "gt", "htmltools", "htmlwidgets", "knitr", "rmarkdown", "testthat (>= 3.0.0)", "tibble"},
},
},
}
// TODO: relationships are not under test yet
var expectedRelationships []artifact.Relationship

pkgtest.NewCatalogTester().FromDirectory(t, "test-fixtures/installed").Expects(expectedPkgs, expectedRelationships).TestCataloger(t, NewPackageCataloger())
}
38 changes: 38 additions & 0 deletions syft/pkg/cataloger/r/package.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package r

import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)

func newPackage(pd parseData, locations ...source.Location) pkg.Package {
locationSet := source.NewLocationSet()
for _, loc := range locations {
locationSet.Add(loc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation))
}
result := pkg.Package{
Name: pd.Package,
Version: pd.Version,
FoundBy: catalogerName,
Locations: locationSet,
Licenses: []string{pd.License},
Language: "R",
Type: pkg.Rpkg,
PURL: packageURL(pd),
MetadataType: pkg.RDescriptionFileMetadataType,
Metadata: pd.RDescriptionFileMetadata,
}

result.Language = "R"
result.FoundBy = catalogerName

result.Licenses = []string{pd.License}
result.Version = pd.Version
result.SetID()
return result
}

func packageURL(m parseData) string {
return packageurl.NewPackageURL("cran", "", m.Package, m.Version, nil, "").ToString()
}
134 changes: 134 additions & 0 deletions syft/pkg/cataloger/r/parse_description.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package r

import (
"bufio"
"io"
"regexp"
"strings"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)

/* some examples of license strings found in DESCRIPTION files:
find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'License:' | sort | uniq
License: GPL
License: GPL (>= 2)
License: GPL (>=2)
License: GPL(>=2)
License: GPL (>= 2) | file LICENCE
License: GPL-2 | GPL-3
License: GPL-3
License: LGPL (>= 2)
License: LGPL (>= 2.1)
License: MIT + file LICENSE
License: Part of R 4.3.0
License: Unlimited
*/

func parseDescriptionFile(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
values := extractFieldsFromDescriptionFile(reader)
m := parseDataFromDescriptionMap(values)
return []pkg.Package{newPackage(m, []source.Location{reader.Location}...)}, nil, nil
}

type parseData struct {
Package string
Version string
License string
pkg.RDescriptionFileMetadata
}

func parseDataFromDescriptionMap(values map[string]string) parseData {
return parseData{
License: values["License"],
Package: values["Package"],
Version: values["Version"],
RDescriptionFileMetadata: pkg.RDescriptionFileMetadata{
Title: values["Title"],
Description: cleanMultiLineValue(values["Description"]),
Maintainer: values["Maintainer"],
URL: commaSeparatedList(values["URL"]),
Depends: commaSeparatedList(values["Depends"]),
Imports: commaSeparatedList(values["Imports"]),
Suggests: commaSeparatedList(values["Suggests"]),
NeedsCompilation: yesNoToBool(values["NeedsCompilation"]),
Author: values["Author"],
Repository: values["Repository"],
Built: values["Built"],
},
}
}

func yesNoToBool(s string) bool {
return strings.EqualFold(s, "yes")
}

func commaSeparatedList(s string) []string {
var result []string
split := strings.Split(s, ",")
for _, piece := range split {
value := strings.TrimSpace(piece)
if value == "" {
continue
}
result = append(result, value)
}
return result
}

var space = regexp.MustCompile(`\s+`)

func cleanMultiLineValue(s string) string {
return space.ReplaceAllString(s, " ")
}

func extractFieldsFromDescriptionFile(reader io.Reader) map[string]string {
result := make(map[string]string)
key := ""
var valueFragment strings.Builder
scanner := bufio.NewScanner(reader)

for scanner.Scan() {
line := scanner.Text()
// line is like Key: Value -> start capturing value; close out previous value
// line is like \t\t continued value -> append to existing value
if len(line) == 0 {
continue
}
if startsWithWhitespace(line) {
// we're continuing a value
if key == "" {
continue
}
valueFragment.WriteByte('\n')
valueFragment.WriteString(strings.TrimSpace(line))
} else {
if key != "" {
// capture previous value
result[key] = valueFragment.String()
key = ""
valueFragment = strings.Builder{}
}
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
key = parts[0]
valueFragment.WriteString(strings.TrimSpace(parts[1]))
}
}
if key != "" {
result[key] = valueFragment.String()
}
return result
}

func startsWithWhitespace(s string) bool {
if s == "" {
return false
}
return s[0] == ' ' || s[0] == '\t'
}
73 changes: 73 additions & 0 deletions syft/pkg/cataloger/r/parse_description_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package r

import (
"os"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func Test_extractFieldsFromDescriptionFile(t *testing.T) {
tests := []struct {
name string
fixture string
want map[string]string
}{
{
name: "go case",
fixture: "test-fixtures/map-parse/simple",
want: map[string]string{
"Package": "base",
"Version": "4.3.0",
"Suggests": "methods",
"Built": "R 4.3.0; ; 2023-04-21 11:33:09 UTC; unix",
},
},
{
name: "bad cases",
fixture: "test-fixtures/map-parse/bad",
want: map[string]string{
"Key": "",
"Whitespace": "",
},
},
{
name: "multiline key-value",
fixture: "test-fixtures/map-parse/multiline",
want: map[string]string{
"Description": `A consistent, simple and easy to use set of wrappers around
the fantastic 'stringi' package. All function and argument names (and
positions) are consistent, all functions deal with "NA"'s and zero
length vectors in the same way, and the output from one function is
easy to feed into the input of another.`,
"License": "MIT + file LICENSE",
"Key": "value",
},
},
{
name: "eof multiline",
fixture: "test-fixtures/map-parse/eof-multiline",
want: map[string]string{
"License": "MIT + file LICENSE",
"Description": `A consistent, simple and easy to use set of wrappers around
the fantastic 'stringi' package. All function and argument names (and
positions) are consistent, all functions deal with "NA"'s and zero
length vectors in the same way, and the output from one function is
easy to feed into the input of another.`,
},
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
file, err := os.Open(test.fixture)
require.NoError(t, err)

result := extractFieldsFromDescriptionFile(file)

assert.Equal(t, test.want, result)
})
}

}
Loading

0 comments on commit 93d3c4a

Please sign in to comment.