-
Notifications
You must be signed in to change notification settings - Fork 587
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: in some cases, try to use pom info to guess name and version to top level jar #2080
Changes from 5 commits
f347fb0
44df88f
714d208
09bdd84
37337b5
1f37909
878b5a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -149,7 +149,6 @@ func (j *archiveParser) parse() ([]pkg.Package, []artifact.Relationship, error) | |
// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages. | ||
func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { | ||
// search and parse java manifest files | ||
// TODO: do we want to prefer or check for pom files over manifest here? | ||
manifestMatches := j.fileManifest.GlobMatch(manifestGlob) | ||
if len(manifestMatches) > 1 { | ||
return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches) | ||
|
@@ -186,9 +185,24 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { | |
|
||
// we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest | ||
licenses := pkg.NewLicensesFromLocation(j.location, selectLicenses(manifest)...) | ||
/* | ||
We should name and version from, in this order: | ||
1. pom.properties if we find exactly 1 | ||
2. pom.xml if we find exactly 1 | ||
3. manifest | ||
4. filename | ||
*/ | ||
name, version := j.guessMainPackageNameAndVersionFromPomInfo() | ||
if name == "" { | ||
name = selectName(manifest, j.fileInfo) | ||
} | ||
if version == "" { | ||
version = selectVersion(manifest, j.fileInfo) | ||
} | ||
return &pkg.Package{ | ||
Name: selectName(manifest, j.fileInfo), | ||
Version: selectVersion(manifest, j.fileInfo), | ||
// TODO: maybe select name should just have a pom properties in it? | ||
Name: name, | ||
Version: version, | ||
Language: pkg.Java, | ||
Licenses: pkg.NewLicenseSet(licenses...), | ||
Locations: file.NewLocationSet( | ||
|
@@ -204,6 +218,37 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { | |
}, nil | ||
} | ||
|
||
func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (string, string) { | ||
pomPropertyMatches := j.fileManifest.GlobMatch(pomPropertiesGlob) | ||
pomMatches := j.fileManifest.GlobMatch(pomXMLGlob) | ||
var pomPropertiesObject pkg.PomProperties | ||
var pomProjectObject pkg.PomProject | ||
if len(pomPropertyMatches) == 1 && len(pomMatches) == 1 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's possible that we could have only a pom.xml and no properties, I think we should account for that as well (allow for 0 to 1 pom properties) |
||
// we have exactly 1 pom.properties in the archive; assume it represents the | ||
// package we're scanning | ||
properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, pomPropertyMatches) | ||
projects, _ := pomProjectByParentPath(j.archivePath, j.location, pomMatches) | ||
|
||
for parentPath, propertiesObj := range properties { | ||
if propertiesObj.ArtifactID != "" && j.fileInfo.name != "" && strings.HasPrefix(propertiesObj.ArtifactID, j.fileInfo.name) { | ||
pomPropertiesObject = propertiesObj | ||
if proj, exists := projects[parentPath]; exists { | ||
pomProjectObject = proj | ||
} | ||
} | ||
} | ||
} | ||
name := pomPropertiesObject.ArtifactID | ||
if name == "" { | ||
name = pomProjectObject.ArtifactID | ||
} | ||
version := pomPropertiesObject.Version | ||
if version == "" { | ||
version = pomProjectObject.Version | ||
} | ||
return name, version | ||
} | ||
|
||
// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given | ||
// parent package, returning all listed Java packages found for each pom | ||
// properties discovered and potentially updating the given parentPkg with new | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package integration | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
|
||
"github.com/anchore/syft/syft/pkg" | ||
"github.com/anchore/syft/syft/source" | ||
) | ||
|
||
func TestWarCatalogedCorrectlyIfRenamed(t *testing.T) { | ||
// install hudson-war@2.2.1 and renames the file to `/hudson.war` | ||
// TODO: is this better expressed in syft/pkg/cataloger/java/archive_parser_test.go? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agreed, I'm not against having this integration test at all, so consider this comment non-blocking, however, testing specific business logic should be pushed as low as possible test-level-wise (unit is preferred for these kinds of assertions). We do have some regression tests at the integration level to test against the specific asset where we found the regression, so keeping this test here is ok. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the suggestion! I think I'll leave this here for now; it was reported as an end to end regression, and the parser was still finding the correct package, it was just also finding an incorrect extra package, so the end-to-end flow seems a good place of the test. I'll push a commit to remove the TODO. |
||
sbom, _ := catalogFixtureImage(t, "image-java-virtualpath-regression", source.SquashedScope, nil) | ||
|
||
badPURL := "pkg:maven/hudson/hudson@2.2.1" | ||
goodPURL := "pkg:maven/org.jvnet.hudson.main/hudson-war@2.2.1" | ||
foundCorrectPackage := false | ||
badVirtualPath := "/hudson.war:org.jvnet.hudson.main:hudson-war" | ||
goodVirtualPath := "/hudson.war" | ||
for _, p := range sbom.Artifacts.Packages.Sorted() { | ||
if p.Type == pkg.JavaPkg && strings.Contains(p.Name, "hudson") { | ||
assert.NotEqual(t, badPURL, p.PURL, "must not find bad purl %q", badPURL) | ||
virtPath := "" | ||
if meta, ok := p.Metadata.(pkg.JavaMetadata); ok { | ||
virtPath = meta.VirtualPath | ||
if p.PURL == goodPURL && virtPath == goodVirtualPath { | ||
foundCorrectPackage = true | ||
} | ||
} | ||
assert.NotEqual(t, badVirtualPath, virtPath, "must not find bad virtual path %q", badVirtualPath) | ||
} | ||
} | ||
assert.True(t, foundCorrectPackage, "must find correct package, but did not") | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
FROM alpine:latest | ||
|
||
RUN wget https://repo1.maven.org/maven2/org/jvnet/hudson/main/hudson-war/2.2.1/hudson-war-2.2.1.war | ||
|
||
RUN mv hudson-war-2.2.1.war hudson.war | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or maybe rename
selectName
to something more specific likeselectNameFromManifest
(same for the version function) -- the upside is that you can write aselectNameAndVersion
wrapper that is called here and pushes some of this lower level logic to another function, and the existing tests for selectName don't really need to get modified from a logic sense.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the suggestion! I think I'll do this as a follow up.