Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test to ensure package metadata is represented in the JSON schema #1841

Merged
merged 7 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 5 additions & 15 deletions .github/scripts/json-schema-drift-check.sh
Original file line number Diff line number Diff line change
@@ -1,27 +1,17 @@
#!/usr/bin/env bash
set -u

if ! git diff-index --quiet HEAD --; then
git diff-index HEAD --
git --no-pager diff
echo "there are uncommitted changes, please commit them before running this check"
if [ "$(git status --porcelain | wc -l)" -ne "0" ]; then
echo " 🔴 there are uncommitted changes, please commit them before running this check"
exit 1
fi

success=true

if ! make generate-json-schema; then
echo "Generating json schema failed"
success=false
fi

if ! git diff-index --quiet HEAD --; then
git diff-index HEAD --
git --no-pager diff
echo "JSON schema drift detected!"
success=false
exit 1
fi

if ! $success; then
if [ "$(git status --porcelain | wc -l)" -ne "0" ]; then
echo " 🔴 there are uncommitted changes, please commit them before running this check"
exit 1
fi
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR)

.PHONY: generate-json-schema
generate-json-schema: ## Generate a new json schema
cd schema/json && go run generate.go
cd schema/json && go generate . && go run .

.PHONY: generate-license-list
generate-license-list: ## Generate an updated spdx license list
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ require (
github.com/Masterminds/sprig/v3 v3.2.3
github.com/anchore/go-logger v0.0.0-20220728155337-03b66a5207d8
github.com/anchore/stereoscope v0.0.0-20230522170632-e14bc4437b2e
github.com/dave/jennifer v1.6.1
github.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da
github.com/docker/docker v24.0.1+incompatible
github.com/github/go-spdx/v2 v2.1.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/dave/jennifer v1.6.1 h1:T4T/67t6RAA5AIV6+NP8Uk/BIsXgDoqEowgycdQQLuk=
github.com/dave/jennifer v1.6.1/go.mod h1:nXbxhEmQfOZhWml3D1cDK5M1FLnMSozpbFN/m3RmGZc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
50 changes: 50 additions & 0 deletions schema/json/generate/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package main

import (
"fmt"
"os"

"github.com/dave/jennifer/jen"

"github.com/anchore/syft/schema/json/internal"
)

// This program generates internal/generated.go.

const (
pkgImport = "github.com/anchore/syft/syft/pkg"
path = "internal/generated.go"
)

func main() {
typeNames, err := internal.AllSyftMetadataTypeNames()
if err != nil {
panic(fmt.Errorf("unable to get all metadata type names: %w", err))
}

fmt.Printf("updating metadata container object with %+v types\n", len(typeNames))

f := jen.NewFile("internal")
f.HeaderComment("DO NOT EDIT: generated by schema/json/generate/main.go")
f.ImportName(pkgImport, "pkg")
f.Comment("ArtifactMetadataContainer is a struct that contains all the metadata types for a package, as represented in the pkg.Package.Metadata field.")
f.Type().Id("ArtifactMetadataContainer").StructFunc(func(g *jen.Group) {
for _, typeName := range typeNames {
g.Id(typeName).Qual(pkgImport, typeName)
}
})

rendered := fmt.Sprintf("%#v", f)

fh, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
panic(fmt.Errorf("unable to open file: %w", err))
}
_, err = fh.WriteString(rendered)
if err != nil {
panic(fmt.Errorf("unable to write file: %w", err))
}
if err := fh.Close(); err != nil {
panic(fmt.Errorf("unable to close file: %w", err))
}
}
39 changes: 39 additions & 0 deletions schema/json/internal/generated.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// DO NOT EDIT: generated by schema/json/generate/main.go

package internal

import "github.com/anchore/syft/syft/pkg"

// ArtifactMetadataContainer is a struct that contains all the metadata types for a package, as represented in the pkg.Package.Metadata field.
type ArtifactMetadataContainer struct {
AlpmMetadata pkg.AlpmMetadata
ApkMetadata pkg.ApkMetadata
BinaryMetadata pkg.BinaryMetadata
CargoPackageMetadata pkg.CargoPackageMetadata
CocoapodsMetadata pkg.CocoapodsMetadata
ConanLockMetadata pkg.ConanLockMetadata
ConanMetadata pkg.ConanMetadata
DartPubMetadata pkg.DartPubMetadata
DotnetDepsMetadata pkg.DotnetDepsMetadata
DpkgMetadata pkg.DpkgMetadata
GemMetadata pkg.GemMetadata
GolangBinMetadata pkg.GolangBinMetadata
GolangModMetadata pkg.GolangModMetadata
HackageMetadata pkg.HackageMetadata
JavaMetadata pkg.JavaMetadata
KbPackageMetadata pkg.KbPackageMetadata
LinuxKernelMetadata pkg.LinuxKernelMetadata
LinuxKernelModuleMetadata pkg.LinuxKernelModuleMetadata
MixLockMetadata pkg.MixLockMetadata
NixStoreMetadata pkg.NixStoreMetadata
NpmPackageJSONMetadata pkg.NpmPackageJSONMetadata
NpmPackageLockJSONMetadata pkg.NpmPackageLockJSONMetadata
PhpComposerJSONMetadata pkg.PhpComposerJSONMetadata
PortageMetadata pkg.PortageMetadata
PythonPackageMetadata pkg.PythonPackageMetadata
PythonPipfileLockMetadata pkg.PythonPipfileLockMetadata
PythonRequirementsMetadata pkg.PythonRequirementsMetadata
RDescriptionFileMetadata pkg.RDescriptionFileMetadata
RebarLockMetadata pkg.RebarLockMetadata
RpmMetadata pkg.RpmMetadata
}
150 changes: 150 additions & 0 deletions schema/json/internal/metadata_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
package internal

import (
"fmt"
"go/ast"
"go/parser"
"go/token"
"os/exec"
"path/filepath"
"sort"
"strings"
"unicode"

"github.com/scylladb/go-set/strset"
)

var metadataExceptions = strset.New(
"FileMetadata",
)

func AllSyftMetadataTypeNames() ([]string, error) {
root, err := repoRoot()
if err != nil {
return nil, err
}
files, err := filepath.Glob(filepath.Join(root, "syft/pkg/*.go"))
if err != nil {
return nil, err
}
return findMetadataDefinitionNames(files...)
}
Comment on lines +21 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is some related data here, could you use any of these exported lists? These need to be kept up to date, used when decoding CDX.

Copy link
Contributor Author

@wagoodman wagoodman May 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about using these, but the pkg.MetadataType will be something that is removed closer to syft 1.0 #1735 which means this map would be a format-only concern. So instead I leaned towards the current ast approach, which has the added benefit of not needing to remember to add it to a central list/map, in which case the PR protection would be a little more brittle.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but the list of all metadata types is something needed by the Syft format decoder once the pkg.MetadataType goes away, which is specifically a concern with the Syft schema, is it not?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see -- that is, we should have a single source of truth for these sets. I'll try to account for that in this PR (warning, the scope will creep a bit, but I think it's warranted).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still agree with trying to get down to a single source of truth, however, I feel that is better tackled in #1735 and #1844 since there are naming inconsistencies and decoding considerations to make here. Additionally, this PR doesn't add another source of truth, this container struct being created exists today and is now being generated. Hopefully with the other two issues they can expand on this generation to include more things.


func repoRoot() (string, error) {
root, err := exec.Command("git", "rev-parse", "--show-toplevel").Output()
if err != nil {
return "", fmt.Errorf("unable to find repo root dir: %+v", err)
}
absRepoRoot, err := filepath.Abs(strings.TrimSpace(string(root)))
if err != nil {
return "", fmt.Errorf("unable to get abs path to repo root: %w", err)
}
return absRepoRoot, nil
}

func findMetadataDefinitionNames(paths ...string) ([]string, error) {
names := strset.New()
usedNames := strset.New()
for _, path := range paths {
metadataDefinitions, usedTypeNames, err := findMetadataDefinitionNamesInFile(path)
if err != nil {
return nil, err
}

// useful for debugging...
// fmt.Println(path)
// fmt.Println("Defs:", metadataDefinitions)
// fmt.Println("Used Types:", usedTypeNames)
// fmt.Println()

names.Add(metadataDefinitions...)
usedNames.Add(usedTypeNames...)
}

// any definition that is used within another struct should not be considered a top-level metadata definition
names.Remove(usedNames.List()...)

strNames := names.List()
sort.Strings(strNames)

// note: 30 is a point-in-time gut check. This number could be updated if new metadata definitions are added, but is not required.
// it is really intended to catch any major issues with the generation process that would generate, say, 0 definitions.
if len(strNames) < 30 {
return nil, fmt.Errorf("not enough metadata definitions found (discovered: " + fmt.Sprintf("%d", len(strNames)) + ")")
}

return strNames, nil
}

func findMetadataDefinitionNamesInFile(path string) ([]string, []string, error) {
// set up the parser
fs := token.NewFileSet()
f, err := parser.ParseFile(fs, path, nil, parser.ParseComments)
if err != nil {
return nil, nil, err
}

var metadataDefinitions []string
var usedTypeNames []string
for _, decl := range f.Decls {
// check if the declaration is a type declaration
spec, ok := decl.(*ast.GenDecl)
if !ok || spec.Tok != token.TYPE {
continue
}

// loop over all types declared in the type declaration
for _, typ := range spec.Specs {
// check if the type is a struct type
spec, ok := typ.(*ast.TypeSpec)
if !ok || spec.Type == nil {
continue
}

structType, ok := spec.Type.(*ast.StructType)
if !ok {
continue
}

// check if the struct type ends with "Metadata"
name := spec.Name.String()

// only look for exported types that end with "Metadata"
if isMetadataTypeCandidate(name) {
// print the full declaration of the struct type
metadataDefinitions = append(metadataDefinitions, name)
usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...)
}
}
}
return metadataDefinitions, usedTypeNames, nil
}

func typeNamesUsedInStruct(structType *ast.StructType) []string {
// recursively find all type names used in the struct type
var names []string
for i := range structType.Fields.List {
// capture names of all of the types (not field names)
ast.Inspect(structType.Fields.List[i].Type, func(n ast.Node) bool {
ident, ok := n.(*ast.Ident)
if !ok {
return true
}

// add the type name to the list
names = append(names, ident.Name)

// continue inspecting
return true
})
}

return names
}

func isMetadataTypeCandidate(name string) bool {
return len(name) > 0 &&
strings.HasSuffix(name, "Metadata") &&
unicode.IsUpper(rune(name[0])) && // must be exported
!metadataExceptions.Has(name)
}
53 changes: 8 additions & 45 deletions schema/json/generate.go → schema/json/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ import (
"github.com/invopop/jsonschema"

"github.com/anchore/syft/internal"
genInt "github.com/anchore/syft/schema/json/internal"
syftjsonModel "github.com/anchore/syft/syft/formats/syftjson/model"
"github.com/anchore/syft/syft/pkg"
)

/*
Expand All @@ -24,46 +24,9 @@ are not captured (empty interfaces). This means that pkg.Package.Metadata is not
can be extended to include specific package metadata struct shapes in the future.
*/

// This should represent all possible metadatas represented in the pkg.Package.Metadata field (an interface{}).
// When a new package metadata definition is created it will need to be manually added here. The variable name does
// not matter as long as it is exported.

// TODO: this should be generated from reflection of whats in the pkg package
// Should be created during generation below; use reflection's ability to
// create types at runtime.
// should be same name as struct minus metadata
type artifactMetadataContainer struct {
Alpm pkg.AlpmMetadata
Apk pkg.ApkMetadata
Binary pkg.BinaryMetadata
Cocopods pkg.CocoapodsMetadata
Conan pkg.ConanMetadata
ConanLock pkg.ConanLockMetadata
Dart pkg.DartPubMetadata
Dotnet pkg.DotnetDepsMetadata
Dpkg pkg.DpkgMetadata
Gem pkg.GemMetadata
GoBin pkg.GolangBinMetadata
GoMod pkg.GolangModMetadata
Hackage pkg.HackageMetadata
Java pkg.JavaMetadata
KbPackage pkg.KbPackageMetadata
LinuxKernel pkg.LinuxKernelMetadata
LinuxKernelModule pkg.LinuxKernelModuleMetadata
Nix pkg.NixStoreMetadata
NpmPackage pkg.NpmPackageJSONMetadata
NpmPackageLock pkg.NpmPackageLockJSONMetadata
MixLock pkg.MixLockMetadata
Php pkg.PhpComposerJSONMetadata
Portage pkg.PortageMetadata
PythonPackage pkg.PythonPackageMetadata
PythonPipfilelock pkg.PythonPipfileLockMetadata
PythonRequirements pkg.PythonRequirementsMetadata
RDescriptionFile pkg.RDescriptionFileMetadata
Rebar pkg.RebarLockMetadata
Rpm pkg.RpmMetadata
RustCargo pkg.CargoPackageMetadata
}
//go:generate go run ./generate/main.go

const schemaVersion = internal.JSONSchemaVersion

func main() {
write(encode(build()))
Expand All @@ -77,14 +40,14 @@ func build() *jsonschema.Schema {
},
}
documentSchema := reflector.ReflectFromType(reflect.TypeOf(&syftjsonModel.Document{}))
metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&artifactMetadataContainer{}))
metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&genInt.ArtifactMetadataContainer{}))
// TODO: inject source definitions

// inject the definitions of all metadatas into the schema definitions

var metadataNames []string
for name, definition := range metadataSchema.Definitions {
if name == "artifactMetadataContainer" {
if name == reflect.TypeOf(genInt.ArtifactMetadataContainer{}).Name() {
// ignore the definition for the fake container
continue
}
Expand Down Expand Up @@ -130,7 +93,7 @@ func encode(schema *jsonschema.Schema) []byte {
}

func write(schema []byte) {
filename := fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion)
filename := fmt.Sprintf("schema-%s.json", schemaVersion)

if _, err := os.Stat(filename); !os.IsNotExist(err) {
// check if the schema is the same...
Expand Down Expand Up @@ -167,5 +130,5 @@ func write(schema []byte) {

defer fh.Close()

fmt.Printf("wrote new schema to %q\n", filename)
fmt.Printf("Wrote new schema to %q\n", filename)
}
Loading