From ac012d911de77414d3b049e344af4ab73bb86a44 Mon Sep 17 00:00:00 2001 From: Weston Steimel Date: Sat, 25 Feb 2023 10:03:44 +0000 Subject: [PATCH] feat: extract username as vendor candidate from github/gitlab Signed-off-by: Weston Steimel --- .../cataloger/common/cpe/vendors_from_url.go | 27 +++++++++++++++++-- .../common/cpe/vendors_from_url_test.go | 25 +++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/syft/pkg/cataloger/common/cpe/vendors_from_url.go b/syft/pkg/cataloger/common/cpe/vendors_from_url.go index 2f8285789925..93267827487e 100644 --- a/syft/pkg/cataloger/common/cpe/vendors_from_url.go +++ b/syft/pkg/cataloger/common/cpe/vendors_from_url.go @@ -1,23 +1,30 @@ package cpe import ( + "regexp" "strings" + + "github.com/anchore/syft/internal" ) var ( - urlPrefixVendors = map[string][]string{ + urlPrefixToVendors = map[string][]string{ "https://www.gnu.org/": {"gnu"}, "https://developer.gnome.org/": {"gnome"}, "https://www.ruby-lang.org/": {"ruby-lang"}, "https://llvm.org/": {"llvm"}, "https://www.isc.org/": {"isc"}, } + + vendorExtractionPatterns = []*regexp.Regexp{ + regexp.MustCompile(`^https://(?:github|gitlab)\.com/(?P[\w\-]*?)/.*$`), + } ) func candidateVendorsFromURL(url string) fieldCandidateSet { vendors := newFieldCandidateSet() - for urlPrefix, additionalVendors := range urlPrefixVendors { + for urlPrefix, additionalVendors := range urlPrefixToVendors { if strings.HasPrefix(url, urlPrefix) { for _, v := range additionalVendors { vendors.add(fieldCandidate{ @@ -25,9 +32,25 @@ func candidateVendorsFromURL(url string) fieldCandidateSet { disallowSubSelections: true, disallowDelimiterVariations: true, }) + + return vendors } } } + for _, p := range vendorExtractionPatterns { + groups := internal.MatchNamedCaptureGroups(p, url) + + if v, ok := groups["vendor"]; ok { + vendors.add(fieldCandidate{ + value: v, + disallowSubSelections: true, + disallowDelimiterVariations: true, + }) + + return vendors + } + } + return vendors } diff --git a/syft/pkg/cataloger/common/cpe/vendors_from_url_test.go b/syft/pkg/cataloger/common/cpe/vendors_from_url_test.go index 75dac3ba94f7..19348bd4af8a 100644 --- a/syft/pkg/cataloger/common/cpe/vendors_from_url_test.go +++ b/syft/pkg/cataloger/common/cpe/vendors_from_url_test.go @@ -27,6 +27,31 @@ func Test_candidateVendorsFromURL(t *testing.T) { url: "https://www.gnu.org/software/make", expected: []string{"gnu"}, }, + { + name: "github username as vendor", + url: "https://github.com/armadillo/abcxyz-12345", + expected: []string{"armadillo"}, + }, + { + name: "github username with - as vendor", + url: "https://github.com/1234-abc-xyz/hello", + expected: []string{"1234-abc-xyz"}, + }, + { + name: "gitlab username as vendor", + url: "https://gitlab.com/armadillo/abcxyz-12345", + expected: []string{"armadillo"}, + }, + { + name: "gitlab username with - as vendor", + url: "https://gitlab.com/1234-abc-xyz/hello", + expected: []string{"1234-abc-xyz"}, + }, + { + name: "github username as vendor from longer url", + url: "https://github.com/armadillo/abcxyz-12345/a/b/c/d/e/f/g", + expected: []string{"armadillo"}, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) {