Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: in some cases, try to use pom info to guess name and version to top level jar #2080

Merged
merged 7 commits into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ func (j *archiveParser) parse() ([]pkg.Package, []artifact.Relationship, error)
// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages.
func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
// search and parse java manifest files
// TODO: do we want to prefer or check for pom files over manifest here?
manifestMatches := j.fileManifest.GlobMatch(manifestGlob)
if len(manifestMatches) > 1 {
return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches)
Expand Down Expand Up @@ -186,9 +185,24 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {

// we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest
licenses := pkg.NewLicensesFromLocation(j.location, selectLicenses(manifest)...)
/*
We should name and version from, in this order:
1. pom.properties if we find exactly 1
2. pom.xml if we find exactly 1
3. manifest
4. filename
*/
name, version := j.guessMainPackageNameAndVersionFromPomInfo()
if name == "" {
name = selectName(manifest, j.fileInfo)
}
if version == "" {
version = selectVersion(manifest, j.fileInfo)
}
return &pkg.Package{
Name: selectName(manifest, j.fileInfo),
Version: selectVersion(manifest, j.fileInfo),
// TODO: maybe select name should just have a pom properties in it?
Copy link
Contributor

@wagoodman wagoodman Aug 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or maybe rename selectName to something more specific like selectNameFromManifest (same for the version function) -- the upside is that you can write a selectNameAndVersion wrapper that is called here and pushes some of this lower level logic to another function, and the existing tests for selectName don't really need to get modified from a logic sense.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion! I think I'll do this as a follow up.

Name: name,
Version: version,
Language: pkg.Java,
Licenses: pkg.NewLicenseSet(licenses...),
Locations: file.NewLocationSet(
Expand All @@ -204,6 +218,37 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
}, nil
}

func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (string, string) {
pomPropertyMatches := j.fileManifest.GlobMatch(pomPropertiesGlob)
pomMatches := j.fileManifest.GlobMatch(pomXMLGlob)
var pomPropertiesObject pkg.PomProperties
var pomProjectObject pkg.PomProject
if len(pomPropertyMatches) == 1 || len(pomMatches) == 1 {
// we have exactly 1 pom.properties or pom.xml in the archive; assume it represents the
// package we're scanning if the names seem like a plausible match
properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, pomPropertyMatches)
projects, _ := pomProjectByParentPath(j.archivePath, j.location, pomMatches)

for parentPath, propertiesObj := range properties {
if propertiesObj.ArtifactID != "" && j.fileInfo.name != "" && strings.HasPrefix(propertiesObj.ArtifactID, j.fileInfo.name) {
pomPropertiesObject = propertiesObj
if proj, exists := projects[parentPath]; exists {
pomProjectObject = proj
}
}
}
}
name := pomPropertiesObject.ArtifactID
if name == "" {
name = pomProjectObject.ArtifactID
}
version := pomPropertiesObject.Version
if version == "" {
version = pomProjectObject.Version
}
return name, version
}

// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given
// parent package, returning all listed Java packages found for each pom
// properties discovered and potentially updating the given parentPkg with new
Expand Down
36 changes: 36 additions & 0 deletions test/integration/regression_java_virtualpath_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package integration

import (
"strings"
"testing"

"github.com/stretchr/testify/assert"

"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)

func TestWarCatalogedCorrectlyIfRenamed(t *testing.T) {
// install hudson-war@2.2.1 and renames the file to `/hudson.war`
sbom, _ := catalogFixtureImage(t, "image-java-virtualpath-regression", source.SquashedScope, nil)

badPURL := "pkg:maven/hudson/hudson@2.2.1"
goodPURL := "pkg:maven/org.jvnet.hudson.main/hudson-war@2.2.1"
foundCorrectPackage := false
badVirtualPath := "/hudson.war:org.jvnet.hudson.main:hudson-war"
goodVirtualPath := "/hudson.war"
for _, p := range sbom.Artifacts.Packages.Sorted() {
if p.Type == pkg.JavaPkg && strings.Contains(p.Name, "hudson") {
assert.NotEqual(t, badPURL, p.PURL, "must not find bad purl %q", badPURL)
virtPath := ""
if meta, ok := p.Metadata.(pkg.JavaMetadata); ok {
virtPath = meta.VirtualPath
if p.PURL == goodPURL && virtPath == goodVirtualPath {
foundCorrectPackage = true
}
}
assert.NotEqual(t, badVirtualPath, virtPath, "must not find bad virtual path %q", badVirtualPath)
}
}
assert.True(t, foundCorrectPackage, "must find correct package, but did not")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM alpine:latest

RUN wget https://repo1.maven.org/maven2/org/jvnet/hudson/main/hudson-war/2.2.1/hudson-war-2.2.1.war

RUN mv hudson-war-2.2.1.war hudson.war