diff --git a/syft/pkg/cataloger/common/cpe/java.go b/syft/pkg/cataloger/common/cpe/java.go index 2e838de3010..7a1a8f7b8c3 100644 --- a/syft/pkg/cataloger/common/cpe/java.go +++ b/syft/pkg/cataloger/common/cpe/java.go @@ -21,16 +21,16 @@ var ( "be", } - primaryJavaManifestGroupIDFields = []string{ + PrimaryJavaManifestGroupIDFields = []string{ + "Bundle-SymbolicName", "Extension-Name", "Specification-Vendor", "Implementation-Vendor", - "Bundle-SymbolicName", "Implementation-Vendor-Id", "Implementation-Title", "Bundle-Activator", } - secondaryJavaManifestGroupIDFields = []string{ + SecondaryJavaManifestGroupIDFields = []string{ "Automatic-Module-Name", "Main-Class", "Package", @@ -168,7 +168,7 @@ func artifactIDFromJavaPackage(p pkg.Package) string { } artifactID := strings.TrimSpace(metadata.PomProperties.ArtifactID) - if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 { + if looksLikeGroupID(artifactID) && len(strings.Split(artifactID, ".")) > 1 { // there is a strong indication that the artifact ID is really a group ID, don't use it return "" } @@ -184,6 +184,9 @@ func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) { return GroupIDsFromJavaMetadata(p.Name, metadata) } +// GroupIDsFromJavaMetadata returns the possible group IDs for a Java package +// This function is similar to GroupIDFromJavaPackage, but returns all possible group IDs and is less strict +// It is used as a way to generate possible candidates for CPE matching. func GroupIDsFromJavaMetadata(pkgName string, metadata pkg.JavaMetadata) (groupIDs []string) { groupIDs = append(groupIDs, groupIDsFromPomProperties(metadata.PomProperties)...) groupIDs = append(groupIDs, groupIDsFromPomProject(metadata.PomProject)...) @@ -242,7 +245,7 @@ func addGroupIDsFromGroupIDsAndArtifactID(groupID, artifactID string) (groupIDs } func groupIDsFromJavaManifest(pkgName string, manifest *pkg.JavaManifest) []string { - if groupID, ok := defaultArtifactIDToGroupID[pkgName]; ok { + if groupID, ok := DefaultArtifactIDToGroupID[pkgName]; ok { return []string{groupID} } @@ -251,7 +254,7 @@ func groupIDsFromJavaManifest(pkgName string, manifest *pkg.JavaManifest) []stri } // try the common manifest fields first for a set of candidates - groupIDs := getManifestFieldGroupIDs(manifest, primaryJavaManifestGroupIDFields) + groupIDs := GetManifestFieldGroupIDs(manifest, PrimaryJavaManifestGroupIDFields) if len(groupIDs) != 0 { return groupIDs @@ -262,10 +265,10 @@ func groupIDsFromJavaManifest(pkgName string, manifest *pkg.JavaManifest) []stri // for more info see pkg:maven/commons-io/commons-io@2.8.0 within cloudbees/cloudbees-core-mm:2.263.4.2 // at /usr/share/jenkins/jenkins.war:WEB-INF/plugins/analysis-model-api.hpi:WEB-INF/lib/commons-io-2.8.0.jar // as well as the ant package from cloudbees/cloudbees-core-mm:2.277.2.4-ra. - return getManifestFieldGroupIDs(manifest, secondaryJavaManifestGroupIDFields) + return GetManifestFieldGroupIDs(manifest, SecondaryJavaManifestGroupIDFields) } -func getManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (groupIDs []string) { +func GetManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (groupIDs []string) { if manifest == nil { return nil } @@ -302,3 +305,7 @@ func removeOSCIDirectives(groupID string) string { func startsWithTopLevelDomain(value string) bool { return internal.HasAnyOfPrefixes(value, domains...) } + +func looksLikeGroupID(value string) bool { + return strings.Contains(value, ".") +} diff --git a/syft/pkg/cataloger/common/cpe/java_groupid_map.go b/syft/pkg/cataloger/common/cpe/java_groupid_map.go index 9e36db185f3..5205c81e9c3 100644 --- a/syft/pkg/cataloger/common/cpe/java_groupid_map.go +++ b/syft/pkg/cataloger/common/cpe/java_groupid_map.go @@ -1,6 +1,6 @@ package cpe -var defaultArtifactIDToGroupID = map[string]string{ +var DefaultArtifactIDToGroupID = map[string]string{ "ant": "org.apache.ant", "ant-antlr": "org.apache.ant", "ant-antunit": "org.apache.ant", diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index ea216e9062f..a9662be65b1 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -62,11 +62,11 @@ func parseJavaArchive(_ file.Resolver, _ *generic.Environment, reader file.Locat } // uniquePkgKey creates a unique string to identify the given package. -func uniquePkgKey(p *pkg.Package) string { +func uniquePkgKey(groupID string, p *pkg.Package) string { if p == nil { return "" } - return fmt.Sprintf("%s|%s", p.Name, p.Version) + return fmt.Sprintf("%s|%s|%s", groupID, p.Name, p.Version) } // newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover @@ -371,13 +371,27 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP return projectByParentPath, nil } -// packagesFromPomProperties processes a single Maven POM properties for a given parent package, returning all listed Java packages found and +// newPackageFromMavenData processes a single Maven POM properties for a given parent package, returning all listed Java packages found and // associating each discovered package to the given parent package. Note the pom.xml is optional, the pom.properties is not. func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.PomProject, parentPkg *pkg.Package, location file.Location) *pkg.Package { // keep the artifact name within the virtual path if this package does not match the parent package vPathSuffix := "" - if !strings.HasPrefix(pomProperties.ArtifactID, parentPkg.Name) { - vPathSuffix += ":" + pomProperties.ArtifactID + groupID := "" + if parentMetadata, ok := parentPkg.Metadata.(pkg.JavaMetadata); ok { + groupID = groupIDFromJavaMetadata(parentPkg.Name, parentMetadata) + } + + parentKey := fmt.Sprintf("%s:%s:%s", groupID, parentPkg.Name, parentPkg.Version) + // Since we don't have a package yet, it's important to use the same `field: value` association that we used when creating the parent package + // See below where Name => pomProperties.ArtifactID and Version => pomProperties.Version. We want to check for potentially nested identical + // packages and create equal virtual paths so they are de duped in the future + pomProjectKey := fmt.Sprintf("%s:%s:%s", pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version) + if parentKey != pomProjectKey { + // build a new virtual path suffix for the package that is different from the parent package + // we want to use the GroupID and ArtifactID here to preserve uniqueness + // Some packages have the same name but different group IDs (e.g. "org.glassfish.jaxb/jaxb-core", "com.sun.xml.bind/jaxb-core") + // https://github.com/anchore/syft/issues/1944 + vPathSuffix += ":" + pomProperties.GroupID + ":" + pomProperties.ArtifactID } virtualPath := location.AccessPath() + vPathSuffix @@ -408,21 +422,26 @@ func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.Po } func packageIdentitiesMatch(p pkg.Package, parentPkg *pkg.Package) bool { - // the name/version pair matches... - if uniquePkgKey(&p) == uniquePkgKey(parentPkg) { - return true - } - metadata, ok := p.Metadata.(pkg.JavaMetadata) - if !ok { - log.WithFields("package", p.String()).Warn("unable to extract java metadata to check for matching package identity") - return false + parentMetadata, parentOk := parentPkg.Metadata.(pkg.JavaMetadata) + if !ok || !parentOk { + switch { + case !ok: + log.WithFields("package", p.String()).Trace("unable to extract java metadata to check for matching package identity for package: %s", p.Name) + case !parentOk: + log.WithFields("package", parentPkg.String()).Trace("unable to extract java metadata to check for matching package identity for package: %s", parentPkg.Name) + } + // if we can't extract metadata, we can check for matching identities via the package name + // this is not ideal, but it's better than nothing - this should not be used if we have Metadata + + return uniquePkgKey("", &p) == uniquePkgKey("", parentPkg) } - parentMetadata, ok := parentPkg.Metadata.(pkg.JavaMetadata) - if !ok { - log.WithFields("package", p.String()).Warn("unable to extract java metadata from parent for verifying virtual path") - return false + // try to determine identity with the metadata + groupID := groupIDFromJavaMetadata(p.Name, metadata) + parentGroupID := groupIDFromJavaMetadata(parentPkg.Name, parentMetadata) + if uniquePkgKey(groupID, &p) == uniquePkgKey(parentGroupID, parentPkg) { + return true } // the virtual path matches... @@ -434,10 +453,14 @@ func packageIdentitiesMatch(p pkg.Package, parentPkg *pkg.Package) bool { // note: you CANNOT use name-is-subset-of-artifact-id or vice versa --this is too generic. Shaded jars are a good // example of this: where the package name is "cloudbees-analytics-segment-driver" and a child is "analytics", but // they do not indicate the same package. - if metadata.PomProperties.ArtifactID != "" && parentPkg.Name == metadata.PomProperties.ArtifactID { - return true + // NOTE: artifactId might not be a good indicator of uniqueness since archives can contain forks with the same name + // from different groups (e.g. "org.glassfish.jaxb.jaxb-core" and "com.sun.xml.bind.jaxb-core") + // we will use this check as a last resort + if metadata.PomProperties != nil { + if metadata.PomProperties.ArtifactID != "" && parentPkg.Name == metadata.PomProperties.ArtifactID { + return true + } } - return false } diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index 422de7d480f..4635a9c9887 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -174,8 +174,8 @@ func TestParseJar(t *testing.T) { MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ // ensure that nested packages with different names than that of the parent are appended as - // a suffix on the virtual path - VirtualPath: "test-fixtures/java-builds/packages/example-java-app-gradle-0.1.0.jar:joda-time", + // a suffix on the virtual path with a colon separator between group name and artifact name + VirtualPath: "test-fixtures/java-builds/packages/example-java-app-gradle-0.1.0.jar:joda-time:joda-time", PomProperties: &pkg.PomProperties{ Path: "META-INF/maven/joda-time/joda-time/pom.properties", GroupID: "joda-time", @@ -240,7 +240,7 @@ func TestParseJar(t *testing.T) { Metadata: pkg.JavaMetadata{ // ensure that nested packages with different names than that of the parent are appended as // a suffix on the virtual path - VirtualPath: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar:joda-time", + VirtualPath: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar:joda-time:joda-time", PomProperties: &pkg.PomProperties{ Path: "META-INF/maven/joda-time/joda-time/pom.properties", GroupID: "joda-time", @@ -659,7 +659,7 @@ func Test_newPackageFromMavenData(t *testing.T) { Type: pkg.JavaPkg, MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ - VirtualPath: virtualPath + ":" + "some-artifact-id", + VirtualPath: virtualPath + ":" + "some-group-id" + ":" + "some-artifact-id", PomProperties: &pkg.PomProperties{ Name: "some-name", GroupID: "some-group-id", @@ -728,7 +728,7 @@ func Test_newPackageFromMavenData(t *testing.T) { Type: pkg.JavaPkg, MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ - VirtualPath: virtualPath + ":" + "some-artifact-id", + VirtualPath: virtualPath + ":" + "some-group-id" + ":" + "some-artifact-id", PomProperties: &pkg.PomProperties{ Name: "some-name", GroupID: "some-group-id", @@ -797,7 +797,7 @@ func Test_newPackageFromMavenData(t *testing.T) { Type: pkg.JenkinsPluginPkg, MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ - VirtualPath: virtualPath + ":" + "some-artifact-id", + VirtualPath: virtualPath + ":" + "com.cloudbees.jenkins.plugins" + ":" + "some-artifact-id", PomProperties: &pkg.PomProperties{ Name: "some-name", GroupID: "com.cloudbees.jenkins.plugins", @@ -894,44 +894,6 @@ func Test_newPackageFromMavenData(t *testing.T) { }, expectedPackage: nil, }, - { - name: "child matches parent by virtual path -- override name and version", - props: pkg.PomProperties{ - Name: "some-name", - GroupID: "some-group-id", - ArtifactID: "some-parent-name", // note: DOES NOT match parent package - Version: "3.0", // note: DOES NOT match parent package - }, - parent: &pkg.Package{ - Name: "", // note: empty, so should not be matched on - Version: "", // note: empty, so should not be matched on - Type: pkg.JavaPkg, - Metadata: pkg.JavaMetadata{ - VirtualPath: virtualPath, // note: matching virtual path - Manifest: nil, - PomProperties: nil, - Parent: nil, - }, - }, - expectedParent: pkg.Package{ - Name: "some-parent-name", - Version: "3.0", - Type: pkg.JavaPkg, - Metadata: pkg.JavaMetadata{ - VirtualPath: virtualPath, - Manifest: nil, - // note: we attach the discovered pom properties data - PomProperties: &pkg.PomProperties{ - Name: "some-name", - GroupID: "some-group-id", - ArtifactID: "some-parent-name", - Version: "3.0", - }, - Parent: nil, - }, - }, - expectedPackage: nil, - }, { name: "child matches parent by artifact id", props: pkg.PomProperties{ diff --git a/syft/pkg/cataloger/java/package_url.go b/syft/pkg/cataloger/java/package_url.go index adf05ff74e8..d29f1ae3c06 100644 --- a/syft/pkg/cataloger/java/package_url.go +++ b/syft/pkg/cataloger/java/package_url.go @@ -1,6 +1,8 @@ package java import ( + "strings" + "github.com/anchore/packageurl-go" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" @@ -9,9 +11,9 @@ import ( // PackageURL returns the PURL for the specific java package (see https://github.com/package-url/purl-spec) func packageURL(name, version string, metadata pkg.JavaMetadata) string { var groupID = name - groupIDs := cpe.GroupIDsFromJavaMetadata(name, metadata) - if len(groupIDs) > 0 { - groupID = groupIDs[0] + + if gID := groupIDFromJavaMetadata(name, metadata); gID != "" { + groupID = gID } pURL := packageurl.NewPackageURL( @@ -23,3 +25,121 @@ func packageURL(name, version string, metadata pkg.JavaMetadata) string { "") return pURL.ToString() } + +// GroupIDFromJavaPackage returns the authoritative group ID for a Java package. +// The order of precedence is: +// 1. The group ID from the POM properties +// 2. The group ID from the POM project +// 3. The group ID from a select map of known group IDs +// 4. The group ID from the Java manifest +func groupIDFromJavaMetadata(pkgName string, metadata pkg.JavaMetadata) (groupID string) { + if groupID = groupIDFromPomProperties(metadata.PomProperties); groupID != "" { + return groupID + } + + if groupID = groupIDFromPomProject(metadata.PomProject); groupID != "" { + return groupID + } + + if groupID = groupIDFromKnownPackageList(pkgName); groupID != "" { + return groupID + } + + if groupID = groupIDFromJavaManifest(metadata.Manifest); groupID != "" { + return groupID + } + + return groupID +} + +func groupIDFromKnownPackageList(pkgName string) (groupID string) { + if groupID, ok := cpe.DefaultArtifactIDToGroupID[pkgName]; ok { + return groupID + } + return groupID +} + +func groupIDFromJavaManifest(manifest *pkg.JavaManifest) (groupID string) { + if manifest == nil { + return groupID + } + + groupIDS := cpe.GetManifestFieldGroupIDs(manifest, cpe.PrimaryJavaManifestGroupIDFields) + // assumes that primaryJavaManifestNameFields are ordered by priority + if len(groupIDS) != 0 { + return groupIDS[0] + } + + groupIDS = cpe.GetManifestFieldGroupIDs(manifest, cpe.SecondaryJavaManifestGroupIDFields) + + if len(groupIDS) != 0 { + return groupIDS[0] + } + + return groupID +} + +func groupIDFromPomProperties(properties *pkg.PomProperties) (groupID string) { + if properties == nil { + return groupID + } + + if looksLikeGroupID(properties.GroupID) { + return cleanGroupID(properties.GroupID) + } + + // sometimes the publisher puts the group ID in the artifact ID field unintentionally + if looksLikeGroupID(properties.ArtifactID) { + // there is a strong indication that the artifact ID is really a group ID + return cleanGroupID(properties.ArtifactID) + } + + return groupID +} + +func groupIDFromPomProject(project *pkg.PomProject) (groupID string) { + if project == nil { + return groupID + } + + // check the project details + if looksLikeGroupID(project.GroupID) { + return cleanGroupID(project.GroupID) + } + + // sometimes the publisher puts the group ID in the artifact ID field unintentionally + if looksLikeGroupID(project.ArtifactID) { + // there is a strong indication that the artifact ID is really a group ID + return cleanGroupID(project.ArtifactID) + } + + // let's check the parent details + // if the current project does not have a group ID, but the parent does, we'll use the parent's group ID + if project.Parent != nil { + if looksLikeGroupID(project.Parent.GroupID) { + return cleanGroupID(project.Parent.GroupID) + } + + // sometimes the publisher puts the group ID in the artifact ID field unintentionally + if looksLikeGroupID(project.Parent.ArtifactID) { + // there is a strong indication that the artifact ID is really a group ID + return cleanGroupID(project.Parent.ArtifactID) + } + } + + return groupID +} +func looksLikeGroupID(value string) bool { + return strings.Contains(value, ".") +} + +func cleanGroupID(groupID string) string { + return strings.TrimSpace(removeOSCIDirectives(groupID)) +} + +func removeOSCIDirectives(groupID string) string { + // for example: + // org.bar;uses:=“org.foo” -> org.bar + // more about OSGI directives see https://spring.io/blog/2008/10/20/understanding-the-osgi-uses-directive/ + return strings.Split(groupID, ";")[0] +} diff --git a/syft/pkg/cataloger/java/package_url_test.go b/syft/pkg/cataloger/java/package_url_test.go index ac5eef6a06f..665827a2dcc 100644 --- a/syft/pkg/cataloger/java/package_url_test.go +++ b/syft/pkg/cataloger/java/package_url_test.go @@ -45,3 +45,59 @@ func Test_packageURL(t *testing.T) { }) } } + +func Test_groupIDFromJavaMetadata(t *testing.T) { + tests := []struct { + name string + pkgName string + metadata pkg.JavaMetadata + expect string + }{ + { + name: "pom properties", + metadata: pkg.JavaMetadata{ + PomProperties: &pkg.PomProperties{ + GroupID: "org.anchore", + }, + }, + expect: "org.anchore", + }, + { + name: "pom project", + metadata: pkg.JavaMetadata{ + PomProject: &pkg.PomProject{ + GroupID: "org.anchore", + }, + }, + expect: "org.anchore", + }, + { + name: "known package list", + pkgName: "ant-antlr", + metadata: pkg.JavaMetadata{}, + expect: "org.apache.ant", + }, + { + name: "java manifest", + metadata: pkg.JavaMetadata{ + Manifest: &pkg.JavaManifest{ + Main: map[string]string{ + "Implementation-Vendor": "org.anchore", + }, + }, + }, + expect: "org.anchore", + }, + { + name: "no group id", + metadata: pkg.JavaMetadata{}, + expect: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expect, groupIDFromJavaMetadata(tt.pkgName, tt.metadata)) + }) + } +} diff --git a/syft/pkg/cataloger/java/parse_pom_xml.go b/syft/pkg/cataloger/java/parse_pom_xml.go index 704cb559808..c59bfbf4057 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml.go +++ b/syft/pkg/cataloger/java/parse_pom_xml.go @@ -178,7 +178,9 @@ func resolveProperty(pom gopom.Project, property *string) string { pomValueType = pomValue.Type() if pomValueType.Kind() == reflect.Ptr { pomValue = pomValue.Elem() - pomValueType = pomValue.Type() + if !pomValue.IsZero() { + pomValueType = pomValue.Type() + } } if partNum == numParts-1 { return fmt.Sprintf("%v", pomValue.Interface())