diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index 00d614db437..a9662be65b1 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -382,6 +382,9 @@ func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.Po } parentKey := fmt.Sprintf("%s:%s:%s", groupID, parentPkg.Name, parentPkg.Version) + // Since we don't have a package yet, it's important to use the same `field: value` association that we used when creating the parent package + // See below where Name => pomProperties.ArtifactID and Version => pomProperties.Version. We want to check for potentially nested identical + // packages and create equal virtual paths so they are de duped in the future pomProjectKey := fmt.Sprintf("%s:%s:%s", pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version) if parentKey != pomProjectKey { // build a new virtual path suffix for the package that is different from the parent package diff --git a/syft/pkg/cataloger/java/package_url.go b/syft/pkg/cataloger/java/package_url.go index b41e321fac5..d29f1ae3c06 100644 --- a/syft/pkg/cataloger/java/package_url.go +++ b/syft/pkg/cataloger/java/package_url.go @@ -88,6 +88,12 @@ func groupIDFromPomProperties(properties *pkg.PomProperties) (groupID string) { return cleanGroupID(properties.GroupID) } + // sometimes the publisher puts the group ID in the artifact ID field unintentionally + if looksLikeGroupID(properties.ArtifactID) { + // there is a strong indication that the artifact ID is really a group ID + return cleanGroupID(properties.ArtifactID) + } + return groupID } @@ -101,12 +107,24 @@ func groupIDFromPomProject(project *pkg.PomProject) (groupID string) { return cleanGroupID(project.GroupID) } + // sometimes the publisher puts the group ID in the artifact ID field unintentionally + if looksLikeGroupID(project.ArtifactID) { + // there is a strong indication that the artifact ID is really a group ID + return cleanGroupID(project.ArtifactID) + } + // let's check the parent details // if the current project does not have a group ID, but the parent does, we'll use the parent's group ID if project.Parent != nil { if looksLikeGroupID(project.Parent.GroupID) { return cleanGroupID(project.Parent.GroupID) } + + // sometimes the publisher puts the group ID in the artifact ID field unintentionally + if looksLikeGroupID(project.Parent.ArtifactID) { + // there is a strong indication that the artifact ID is really a group ID + return cleanGroupID(project.Parent.ArtifactID) + } } return groupID