Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: 1944 - update purl generation to use a consistent groupID #2033

Merged
merged 18 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 113 additions & 2 deletions syft/pkg/cataloger/common/cpe/java.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ var (
}

primaryJavaManifestGroupIDFields = []string{
"Bundle-SymbolicName",
"Extension-Name",
"Specification-Vendor",
"Implementation-Vendor",
"Bundle-SymbolicName",
"Implementation-Vendor-Id",
"Implementation-Title",
"Bundle-Activator",
Expand Down Expand Up @@ -168,7 +168,7 @@ func artifactIDFromJavaPackage(p pkg.Package) string {
}

artifactID := strings.TrimSpace(metadata.PomProperties.ArtifactID)
if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
if looksLikeGroupID(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
// there is a strong indication that the artifact ID is really a group ID, don't use it
return ""
}
Expand All @@ -184,6 +184,113 @@ func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
return GroupIDsFromJavaMetadata(p.Name, metadata)
}

// GroupIDFromJavaPackage returns the authoritative group ID for a Java package.
// The order of precedence is:
// 1. The group ID from the POM properties
// 2. The group ID from the POM project
// 3. The group ID from a select map of known group IDs
// 3. The group ID from the Java manifest
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
func GroupIDFromJavaMetadata(pkgName string, metadata pkg.JavaMetadata) (groupID string) {
Copy link
Contributor

@wagoodman wagoodman Aug 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all of these new functions probably don't belong in the cpe package since they have been replicated to not affect the CPE generation logic today. My vote would be to put this under the java cataloger package (as near duplicate functions) and unexport all of the functions (I'd duplicate the looksLikeGroupID function too... since there isn't a specific definition for this and the impl may change based on the use case).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I parsed this comment correctly - check f1e05da

If there are some edits or things to move around from here we can do a quick sync on the package organization and exports/imports

spiffcs marked this conversation as resolved.
Show resolved Hide resolved
if groupID = groupIDFromPomProperties(metadata.PomProperties); groupID != "" {
return groupID
}

if groupID = groupIDFromPomProject(metadata.PomProject); groupID != "" {
return groupID
}

if groupID = groupIDFromKnownPackageList(pkgName); groupID != "" {
return groupID
}

if groupID = groupIDFromJavaManifest(metadata.Manifest); groupID != "" {
return groupID
}

return groupID
}

func groupIDFromKnownPackageList(pkgName string) (groupID string) {
if groupID, ok := defaultArtifactIDToGroupID[pkgName]; ok {
return groupID
}
return groupID
}

func groupIDFromJavaManifest(manifest *pkg.JavaManifest) (groupID string) {
if manifest == nil {
return groupID
}

groupIDS := getManifestFieldGroupIDs(manifest, primaryJavaManifestGroupIDFields)
// assumes that primaryJavaManifestNameFields are ordered by priority
if len(groupIDS) != 0 {
return groupIDS[0]
}

groupIDS = getManifestFieldGroupIDs(manifest, secondaryJavaManifestGroupIDFields)

if len(groupIDS) != 0 {
return groupIDS[0]
}

return groupID
}

func groupIDFromPomProperties(properties *pkg.PomProperties) (groupID string) {
if properties == nil {
return groupID
}

if looksLikeGroupID(properties.GroupID) {
return cleanGroupID(properties.GroupID)
}

// sometimes the publisher puts the group ID in the artifact ID field unintentionally
if looksLikeGroupID(properties.ArtifactID) && len(strings.Split(properties.ArtifactID, ".")) > 1 {
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
// there is a strong indication that the artifact ID is really a group ID
return cleanGroupID(properties.ArtifactID)
}

return groupID
}

func groupIDFromPomProject(project *pkg.PomProject) (groupID string) {
if project == nil {
return groupID
}

// check the project details
if looksLikeGroupID(project.GroupID) {
return cleanGroupID(project.GroupID)
}

// sometimes the publisher puts the group ID in the artifact ID field unintentionally
if looksLikeGroupID(project.GroupID) && len(strings.Split(project.ArtifactID, ".")) > 1 {
// there is a strong indication that the artifact ID is really a group ID
return cleanGroupID(project.ArtifactID)
}
spiffcs marked this conversation as resolved.
Show resolved Hide resolved

// let's check the parent details
// if the current project does not have a group ID, but the parent does, we'll use the parent's group ID
if project.Parent != nil {
if looksLikeGroupID(project.Parent.GroupID) {
return cleanGroupID(project.Parent.GroupID)
}

// sometimes the publisher puts the group ID in the artifact ID field unintentionally
if looksLikeGroupID(project.Parent.ArtifactID) && len(strings.Split(project.Parent.ArtifactID, ".")) > 1 {
// there is a strong indication that the artifact ID is really a group ID
return cleanGroupID(project.Parent.ArtifactID)
}
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
}

return groupID
}

// GroupIDsFromJavaMetadata returns the possible group IDs for a Java package
// This function is similar to GroupIDFromJavaPackage, but returns all possible group IDs and is less strict
// It is used as a way to generate possible candidates for CPE matching.
func GroupIDsFromJavaMetadata(pkgName string, metadata pkg.JavaMetadata) (groupIDs []string) {
groupIDs = append(groupIDs, groupIDsFromPomProperties(metadata.PomProperties)...)
groupIDs = append(groupIDs, groupIDsFromPomProject(metadata.PomProject)...)
Expand Down Expand Up @@ -302,3 +409,7 @@ func removeOSCIDirectives(groupID string) string {
func startsWithTopLevelDomain(value string) bool {
return internal.HasAnyOfPrefixes(value, domains...)
}

func looksLikeGroupID(value string) bool {
return strings.Contains(value, ".")
}
54 changes: 35 additions & 19 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

Expand Down Expand Up @@ -62,11 +63,11 @@ func parseJavaArchive(_ file.Resolver, _ *generic.Environment, reader file.Locat
}

// uniquePkgKey creates a unique string to identify the given package.
func uniquePkgKey(p *pkg.Package) string {
func uniquePkgKey(groupID string, p *pkg.Package) string {
if p == nil {
return ""
}
return fmt.Sprintf("%s|%s", p.Name, p.Version)
return fmt.Sprintf("%s|%s|%s", groupID, p.Name, p.Version)
}

// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
Expand Down Expand Up @@ -376,7 +377,13 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP
func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.PomProject, parentPkg *pkg.Package, location file.Location) *pkg.Package {
// keep the artifact name within the virtual path if this package does not match the parent package
vPathSuffix := ""
if !strings.HasPrefix(pomProperties.ArtifactID, parentPkg.Name) {
groupID := ""
if parentMetadata, ok := parentPkg.Metadata.(pkg.JavaMetadata); ok {
groupID = cpe.GroupIDFromJavaMetadata(parentPkg.Name, parentMetadata)
}
parentKey := fmt.Sprintf("%s:%s:%s", groupID, parentPkg.Name, parentPkg.Version)
pomProjectKey := fmt.Sprintf("%s:%s:%s", pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems to be trying to correlate the parent name and the current pom properties artifact ID and changing the virtual path when this occurs. Can you leave a code comment to why this is important? (and this might look test worthy)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated with commentary and test updates in 06bd7b9

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to clarify: why is the parent package name and pom properties artifact ID being compared? (instead of artifact id vs artifact id or name vs name?)

Copy link
Contributor Author

@spiffcs spiffcs Aug 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added comment explaining the significance and why we use the values we're using: 1bbb6c5

if parentKey != pomProjectKey {
vPathSuffix += ":" + pomProperties.ArtifactID
}
virtualPath := location.AccessPath() + vPathSuffix
Expand Down Expand Up @@ -408,36 +415,45 @@ func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.Po
}

func packageIdentitiesMatch(p pkg.Package, parentPkg *pkg.Package) bool {
// the name/version pair matches...
if uniquePkgKey(&p) == uniquePkgKey(parentPkg) {
return true
}
childMetadata, childOk := p.Metadata.(pkg.JavaMetadata)
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
parentMetadata, parentOk := parentPkg.Metadata.(pkg.JavaMetadata)
if !childOk || !parentOk {
switch {
case !childOk:
log.WithFields("package", p.String()).Debug("unable to extract java metadata to check for matching package identity for package: %s", p.Name)
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
case !parentOk:
log.WithFields("package", parentPkg.String()).Debug("unable to extract java metadata to check for matching package identity for package: %s", parentPkg.Name)
}
// if we can't extract metadata, we can check for matching identities via the package name
// this is not ideal, but it's better than nothing - this should not be used if we have Metadata

metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
log.WithFields("package", p.String()).Warn("unable to extract java metadata to check for matching package identity")
return false
return uniquePkgKey("", &p) == uniquePkgKey("", parentPkg)
}

parentMetadata, ok := parentPkg.Metadata.(pkg.JavaMetadata)
if !ok {
log.WithFields("package", p.String()).Warn("unable to extract java metadata from parent for verifying virtual path")
return false
// try to determine identity with the metadata
childGroupID := cpe.GroupIDFromJavaMetadata(p.Name, childMetadata)
parentGroupID := cpe.GroupIDFromJavaMetadata(parentPkg.Name, parentMetadata)
if uniquePkgKey(childGroupID, &p) == uniquePkgKey(parentGroupID, parentPkg) {
return true
}

// the virtual path matches...
if parentMetadata.VirtualPath == metadata.VirtualPath {
if parentMetadata.VirtualPath == childMetadata.VirtualPath {
return true
}

// the pom artifactId is the parent name
// note: you CANNOT use name-is-subset-of-artifact-id or vice versa --this is too generic. Shaded jars are a good
// example of this: where the package name is "cloudbees-analytics-segment-driver" and a child is "analytics", but
// they do not indicate the same package.
if metadata.PomProperties.ArtifactID != "" && parentPkg.Name == metadata.PomProperties.ArtifactID {
return true
// NOTE: artifactId might not be a good indicator of uniqueness since archives can contain forks with the same name
// from different groups (e.g. "org.glassfish.jaxb.jaxb-core" and "com.sun.xml.bind.jaxb-core")
// we will use this check as a last resort
if childMetadata.PomProperties != nil {
if childMetadata.PomProperties.ArtifactID != "" && parentPkg.Name == childMetadata.PomProperties.ArtifactID {
return true
}
}

return false
}

Expand Down
38 changes: 0 additions & 38 deletions syft/pkg/cataloger/java/archive_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -894,44 +894,6 @@ func Test_newPackageFromMavenData(t *testing.T) {
},
expectedPackage: nil,
},
{
kzantow marked this conversation as resolved.
Show resolved Hide resolved
name: "child matches parent by virtual path -- override name and version",
props: pkg.PomProperties{
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-parent-name", // note: DOES NOT match parent package
Version: "3.0", // note: DOES NOT match parent package
},
parent: &pkg.Package{
Name: "", // note: empty, so should not be matched on
Version: "", // note: empty, so should not be matched on
Type: pkg.JavaPkg,
Metadata: pkg.JavaMetadata{
VirtualPath: virtualPath, // note: matching virtual path
Manifest: nil,
PomProperties: nil,
Parent: nil,
},
},
expectedParent: pkg.Package{
Name: "some-parent-name",
Version: "3.0",
Type: pkg.JavaPkg,
Metadata: pkg.JavaMetadata{
VirtualPath: virtualPath,
Manifest: nil,
// note: we attach the discovered pom properties data
PomProperties: &pkg.PomProperties{
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-parent-name",
Version: "3.0",
},
Parent: nil,
},
},
expectedPackage: nil,
},
{
name: "child matches parent by artifact id",
props: pkg.PomProperties{
Expand Down
6 changes: 3 additions & 3 deletions syft/pkg/cataloger/java/package_url.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ import (
// PackageURL returns the PURL for the specific java package (see https://github.com/package-url/purl-spec)
func packageURL(name, version string, metadata pkg.JavaMetadata) string {
var groupID = name
groupIDs := cpe.GroupIDsFromJavaMetadata(name, metadata)
if len(groupIDs) > 0 {
groupID = groupIDs[0]

if gID := cpe.GroupIDFromJavaMetadata(name, metadata); gID != "" {
groupID = gID
}

pURL := packageurl.NewPackageURL(
Expand Down
4 changes: 3 additions & 1 deletion syft/pkg/cataloger/java/parse_pom_xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,9 @@ func resolveProperty(pom gopom.Project, property *string) string {
pomValueType = pomValue.Type()
if pomValueType.Kind() == reflect.Ptr {
pomValue = pomValue.Elem()
pomValueType = pomValue.Type()
if !pomValue.IsZero() {
pomValueType = pomValue.Type()
}
}
if partNum == numParts-1 {
return fmt.Sprintf("%v", pomValue.Interface())
Expand Down