From 844ce862abf344e0181199dfd24829393bfc6f00 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 22 May 2023 12:38:46 -0400 Subject: [PATCH 1/6] [wip] try to reflect metadata types... probably wont work Signed-off-by: Alex Goodman --- schema/json/generate.go | 176 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 171 insertions(+), 5 deletions(-) diff --git a/schema/json/generate.go b/schema/json/generate.go index 169e3c22ff8..9088991adcf 100644 --- a/schema/json/generate.go +++ b/schema/json/generate.go @@ -4,11 +4,19 @@ import ( "bytes" "encoding/json" "fmt" + "github.com/scylladb/go-set/strset" + "go/ast" + "go/importer" + "go/parser" + "go/token" + "go/types" "io" "os" + "path/filepath" "reflect" "sort" "strings" + "unicode" "github.com/invopop/jsonschema" @@ -65,11 +73,169 @@ type artifactMetadataContainer struct { RustCargo pkg.CargoPackageMetadata } +const schemaVersion = internal.JSONSchemaVersion + +var metadataExceptions = strset.New( + "FileMetadata", +) + func main() { - write(encode(build())) + typeNames := findMetadataDefinitionNames(pkgFiles()...) + fmt.Println("Discovered metadata types: ", len(typeNames)) + for _, n := range typeNames { + fmt.Println(" -", n) + } + + fmt.Println("Crafting new metadata container type...") + metadata := metadataContainer(typeNames...) + fmt.Printf("Metadata container: %#v\n", metadata) + + fmt.Printf("Writing json schema for version=%q\n", schemaVersion) + write(encode(build(metadata))) +} + +func pkgFiles() []string { + values, err := filepath.Glob("../../syft/pkg/*.go") + if err != nil { + panic("unable to find package files") + } + return values +} + +func findMetadataDefinitionNames(paths ...string) []string { + names := strset.New() + usedNames := strset.New() + for _, path := range paths { + metadataDefinitions, usedTypeNames := findMetadataDefinitionNamesInFile(path) + + // useful for debugging... + //fmt.Println(path) + //fmt.Println("Defs:", metadataDefinitions) + //fmt.Println("Used Types:", usedTypeNames) + //fmt.Println() + + names.Add(metadataDefinitions...) + usedNames.Add(usedTypeNames...) + } + + // any definition that is used within another struct should not be considered a top-level metadata definition + names.Remove(usedNames.List()...) + + strNames := names.List() + sort.Strings(strNames) + + // note: 30 is a point-in-time gut check. This number could be updated if new metadata definitions are added, but is not required. + // it is really intended to catch any major issues with the generation process that would generate, say, 0 definitions. + if len(strNames) < 30 { + panic("not enough metadata definitions found (discovered: " + fmt.Sprintf("%d", len(strNames)) + ")") + } + + return strNames +} + +func findMetadataDefinitionNamesInFile(path string) ([]string, []string) { + // set up the parser + fs := token.NewFileSet() + f, err := parser.ParseFile(fs, path, nil, parser.ParseComments) + if err != nil { + panic(err) + } + + var metadataDefinitions []string + var usedTypeNames []string + for _, decl := range f.Decls { + // check if the declaration is a type declaration + spec, ok := decl.(*ast.GenDecl) + if !ok || spec.Tok != token.TYPE { + continue + } + + // loop over all types declared in the type declaration + for _, typ := range spec.Specs { + // check if the type is a struct type + spec, ok := typ.(*ast.TypeSpec) + if !ok || spec.Type == nil { + continue + } + + structType, ok := spec.Type.(*ast.StructType) + if !ok { + continue + } + + // check if the struct type ends with "Metadata" + name := spec.Name.String() + + // only look for exported types that end with "Metadata" + if isMetadataTypeCandidate(name) { + // print the full declaration of the struct type + metadataDefinitions = append(metadataDefinitions, name) + usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...) + } + } + } + return metadataDefinitions, usedTypeNames +} + +func typeNamesUsedInStruct(structType *ast.StructType) []string { + // recursively find all type names used in the struct type + var names []string + for i, _ := range structType.Fields.List { + // capture names of all of the types (not field names) + ast.Inspect(structType.Fields.List[i].Type, func(n ast.Node) bool { + ident, ok := n.(*ast.Ident) + if !ok { + return true + } + + // add the type name to the list + names = append(names, ident.Name) + + // continue inspecting + return true + }) + } + + return names +} + +func isMetadataTypeCandidate(name string) bool { + return len(name) > 0 && + strings.HasSuffix(name, "Metadata") && + unicode.IsUpper(rune(name[0])) && // must be exported + !metadataExceptions.Has(name) +} + +func metadataContainer(names ...string) any { + pkgPkg := getPackage("github.com/anchore/syft/syft/pkg") + + var structFields []reflect.StructField + for _, typeName := range names { + fieldName := typeName + fieldType := pkgPkg.Scope().Lookup(typeName).Type() + newField := reflect.StructField{ + Name: fieldName, + Type: reflect.PtrTo(reflect.TypeOf(fieldType)), + } + structFields = append(structFields, newField) + + } + + structType := reflect.StructOf(structFields) + instance := reflect.New(structType) + + return instance +} + +func getPackage(importPath string) *types.Package { + p, err := importer.Default().Import(importPath) + if err != nil { + panic(err) + } + return p } -func build() *jsonschema.Schema { +func build(metadataContainer any) *jsonschema.Schema { reflector := &jsonschema.Reflector{ AllowAdditionalProperties: true, Namer: func(r reflect.Type) string { @@ -77,7 +243,7 @@ func build() *jsonschema.Schema { }, } documentSchema := reflector.ReflectFromType(reflect.TypeOf(&syftjsonModel.Document{})) - metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&artifactMetadataContainer{})) + metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&metadataContainer)) // TODO: inject source definitions // inject the definitions of all metadatas into the schema definitions @@ -130,7 +296,7 @@ func encode(schema *jsonschema.Schema) []byte { } func write(schema []byte) { - filename := fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion) + filename := fmt.Sprintf("schema-%s.json", schemaVersion) if _, err := os.Stat(filename); !os.IsNotExist(err) { // check if the schema is the same... @@ -167,5 +333,5 @@ func write(schema []byte) { defer fh.Close() - fmt.Printf("wrote new schema to %q\n", filename) + fmt.Printf("Wrote new schema to %q\n", filename) } From f16db96b9836955246cbb211288678e1b2cdebf5 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 22 May 2023 13:42:31 -0400 Subject: [PATCH 2/6] refactor to add unit test to ensure there is coverage in the schema Signed-off-by: Alex Goodman --- schema/json/generate.go | 170 +---------------------------------- schema/json/generate_test.go | 151 +++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 167 deletions(-) create mode 100644 schema/json/generate_test.go diff --git a/schema/json/generate.go b/schema/json/generate.go index 9088991adcf..8bab53933d0 100644 --- a/schema/json/generate.go +++ b/schema/json/generate.go @@ -4,19 +4,11 @@ import ( "bytes" "encoding/json" "fmt" - "github.com/scylladb/go-set/strset" - "go/ast" - "go/importer" - "go/parser" - "go/token" - "go/types" "io" "os" - "path/filepath" "reflect" "sort" "strings" - "unicode" "github.com/invopop/jsonschema" @@ -75,167 +67,11 @@ type artifactMetadataContainer struct { const schemaVersion = internal.JSONSchemaVersion -var metadataExceptions = strset.New( - "FileMetadata", -) - func main() { - typeNames := findMetadataDefinitionNames(pkgFiles()...) - fmt.Println("Discovered metadata types: ", len(typeNames)) - for _, n := range typeNames { - fmt.Println(" -", n) - } - - fmt.Println("Crafting new metadata container type...") - metadata := metadataContainer(typeNames...) - fmt.Printf("Metadata container: %#v\n", metadata) - - fmt.Printf("Writing json schema for version=%q\n", schemaVersion) - write(encode(build(metadata))) -} - -func pkgFiles() []string { - values, err := filepath.Glob("../../syft/pkg/*.go") - if err != nil { - panic("unable to find package files") - } - return values -} - -func findMetadataDefinitionNames(paths ...string) []string { - names := strset.New() - usedNames := strset.New() - for _, path := range paths { - metadataDefinitions, usedTypeNames := findMetadataDefinitionNamesInFile(path) - - // useful for debugging... - //fmt.Println(path) - //fmt.Println("Defs:", metadataDefinitions) - //fmt.Println("Used Types:", usedTypeNames) - //fmt.Println() - - names.Add(metadataDefinitions...) - usedNames.Add(usedTypeNames...) - } - - // any definition that is used within another struct should not be considered a top-level metadata definition - names.Remove(usedNames.List()...) - - strNames := names.List() - sort.Strings(strNames) - - // note: 30 is a point-in-time gut check. This number could be updated if new metadata definitions are added, but is not required. - // it is really intended to catch any major issues with the generation process that would generate, say, 0 definitions. - if len(strNames) < 30 { - panic("not enough metadata definitions found (discovered: " + fmt.Sprintf("%d", len(strNames)) + ")") - } - - return strNames -} - -func findMetadataDefinitionNamesInFile(path string) ([]string, []string) { - // set up the parser - fs := token.NewFileSet() - f, err := parser.ParseFile(fs, path, nil, parser.ParseComments) - if err != nil { - panic(err) - } - - var metadataDefinitions []string - var usedTypeNames []string - for _, decl := range f.Decls { - // check if the declaration is a type declaration - spec, ok := decl.(*ast.GenDecl) - if !ok || spec.Tok != token.TYPE { - continue - } - - // loop over all types declared in the type declaration - for _, typ := range spec.Specs { - // check if the type is a struct type - spec, ok := typ.(*ast.TypeSpec) - if !ok || spec.Type == nil { - continue - } - - structType, ok := spec.Type.(*ast.StructType) - if !ok { - continue - } - - // check if the struct type ends with "Metadata" - name := spec.Name.String() - - // only look for exported types that end with "Metadata" - if isMetadataTypeCandidate(name) { - // print the full declaration of the struct type - metadataDefinitions = append(metadataDefinitions, name) - usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...) - } - } - } - return metadataDefinitions, usedTypeNames -} - -func typeNamesUsedInStruct(structType *ast.StructType) []string { - // recursively find all type names used in the struct type - var names []string - for i, _ := range structType.Fields.List { - // capture names of all of the types (not field names) - ast.Inspect(structType.Fields.List[i].Type, func(n ast.Node) bool { - ident, ok := n.(*ast.Ident) - if !ok { - return true - } - - // add the type name to the list - names = append(names, ident.Name) - - // continue inspecting - return true - }) - } - - return names -} - -func isMetadataTypeCandidate(name string) bool { - return len(name) > 0 && - strings.HasSuffix(name, "Metadata") && - unicode.IsUpper(rune(name[0])) && // must be exported - !metadataExceptions.Has(name) -} - -func metadataContainer(names ...string) any { - pkgPkg := getPackage("github.com/anchore/syft/syft/pkg") - - var structFields []reflect.StructField - for _, typeName := range names { - fieldName := typeName - fieldType := pkgPkg.Scope().Lookup(typeName).Type() - newField := reflect.StructField{ - Name: fieldName, - Type: reflect.PtrTo(reflect.TypeOf(fieldType)), - } - structFields = append(structFields, newField) - - } - - structType := reflect.StructOf(structFields) - instance := reflect.New(structType) - - return instance -} - -func getPackage(importPath string) *types.Package { - p, err := importer.Default().Import(importPath) - if err != nil { - panic(err) - } - return p + write(encode(build())) } -func build(metadataContainer any) *jsonschema.Schema { +func build() *jsonschema.Schema { reflector := &jsonschema.Reflector{ AllowAdditionalProperties: true, Namer: func(r reflect.Type) string { @@ -243,7 +79,7 @@ func build(metadataContainer any) *jsonschema.Schema { }, } documentSchema := reflector.ReflectFromType(reflect.TypeOf(&syftjsonModel.Document{})) - metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&metadataContainer)) + metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&artifactMetadataContainer{})) // TODO: inject source definitions // inject the definitions of all metadatas into the schema definitions diff --git a/schema/json/generate_test.go b/schema/json/generate_test.go new file mode 100644 index 00000000000..65febc7f832 --- /dev/null +++ b/schema/json/generate_test.go @@ -0,0 +1,151 @@ +package main + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" + "path/filepath" + "reflect" + "sort" + "strings" + "testing" + "unicode" + + "github.com/google/go-cmp/cmp" + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var metadataExceptions = strset.New( + "FileMetadata", +) + +func TestAllMetadataRepresented(t *testing.T) { + expected := findMetadataDefinitionNames(t, pkgFiles(t)...) + actual := allTypeNamesFromStruct(artifactMetadataContainer{}) + if !assert.ElementsMatch(t, expected, actual) { + t.Fatalf("metadata types not fully represented: \n%s", cmp.Diff(expected, actual)) + } +} + +func allTypeNamesFromStruct(instance any) []string { + // get all the type names from the struct (not recursively) + var typeNames []string + tt := reflect.TypeOf(instance) + for i := 0; i < tt.NumField(); i++ { + field := tt.Field(i) + typeNames = append(typeNames, field.Type.Name()) + } + sort.Strings(typeNames) + return typeNames +} + +func pkgFiles(t *testing.T) []string { + values, err := filepath.Glob("../../syft/pkg/*.go") + require.NoError(t, err) + return values +} + +func findMetadataDefinitionNames(t *testing.T, paths ...string) []string { + names := strset.New() + usedNames := strset.New() + for _, path := range paths { + metadataDefinitions, usedTypeNames := findMetadataDefinitionNamesInFile(t, path) + + // useful for debugging... + //fmt.Println(path) + //fmt.Println("Defs:", metadataDefinitions) + //fmt.Println("Used Types:", usedTypeNames) + //fmt.Println() + + names.Add(metadataDefinitions...) + usedNames.Add(usedTypeNames...) + } + + // any definition that is used within another struct should not be considered a top-level metadata definition + names.Remove(usedNames.List()...) + + strNames := names.List() + sort.Strings(strNames) + + // note: 30 is a point-in-time gut check. This number could be updated if new metadata definitions are added, but is not required. + // it is really intended to catch any major issues with the generation process that would generate, say, 0 definitions. + if len(strNames) < 30 { + t.Fatal("not enough metadata definitions found (discovered: " + fmt.Sprintf("%d", len(strNames)) + ")") + } + + return strNames +} + +func findMetadataDefinitionNamesInFile(t *testing.T, path string) ([]string, []string) { + // set up the parser + fs := token.NewFileSet() + f, err := parser.ParseFile(fs, path, nil, parser.ParseComments) + require.NoError(t, err) + + var metadataDefinitions []string + var usedTypeNames []string + for _, decl := range f.Decls { + // check if the declaration is a type declaration + spec, ok := decl.(*ast.GenDecl) + if !ok || spec.Tok != token.TYPE { + continue + } + + // loop over all types declared in the type declaration + for _, typ := range spec.Specs { + // check if the type is a struct type + spec, ok := typ.(*ast.TypeSpec) + if !ok || spec.Type == nil { + continue + } + + structType, ok := spec.Type.(*ast.StructType) + if !ok { + continue + } + + // check if the struct type ends with "Metadata" + name := spec.Name.String() + + // only look for exported types that end with "Metadata" + if isMetadataTypeCandidate(name) { + // print the full declaration of the struct type + metadataDefinitions = append(metadataDefinitions, name) + usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...) + } + } + } + return metadataDefinitions, usedTypeNames +} + +func typeNamesUsedInStruct(structType *ast.StructType) []string { + // recursively find all type names used in the struct type + var names []string + for i := range structType.Fields.List { + // capture names of all of the types (not field names) + ast.Inspect(structType.Fields.List[i].Type, func(n ast.Node) bool { + ident, ok := n.(*ast.Ident) + if !ok { + return true + } + + // add the type name to the list + names = append(names, ident.Name) + + // continue inspecting + return true + }) + } + + return names +} + +func isMetadataTypeCandidate(name string) bool { + return len(name) > 0 && + strings.HasSuffix(name, "Metadata") && + unicode.IsUpper(rune(name[0])) && // must be exported + !metadataExceptions.Has(name) +} From f664535bc9f6aa4ce4cc071726a7de37d88e3cef Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 22 May 2023 13:51:16 -0400 Subject: [PATCH 3/6] [wip] generate metadata container Signed-off-by: Alex Goodman --- Makefile | 2 +- schema/json/generate/main.go | 1 + schema/json/generated.go | 36 ++++++++++ schema/json/{generate.go => main.go} | 65 +++++++++---------- .../json/{generate_test.go => main_test.go} | 0 5 files changed, 70 insertions(+), 34 deletions(-) create mode 100644 schema/json/generate/main.go create mode 100644 schema/json/generated.go rename schema/json/{generate.go => main.go} (74%) rename schema/json/{generate_test.go => main_test.go} (100%) diff --git a/Makefile b/Makefile index 0b944b83bd9..48eabd50f2b 100644 --- a/Makefile +++ b/Makefile @@ -302,7 +302,7 @@ compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR) .PHONY: generate-json-schema generate-json-schema: ## Generate a new json schema - cd schema/json && go run generate.go + cd schema/json && go generate && go run main.go .PHONY: generate-license-list generate-license-list: ## Generate an updated spdx license list diff --git a/schema/json/generate/main.go b/schema/json/generate/main.go new file mode 100644 index 00000000000..eb8347795a7 --- /dev/null +++ b/schema/json/generate/main.go @@ -0,0 +1 @@ +package generate diff --git a/schema/json/generated.go b/schema/json/generated.go new file mode 100644 index 00000000000..e1a357154c7 --- /dev/null +++ b/schema/json/generated.go @@ -0,0 +1,36 @@ +package main + +import "github.com/anchore/syft/syft/pkg" + +type artifactMetadataContainer struct { + Alpm pkg.AlpmMetadata + Apk pkg.ApkMetadata + Binary pkg.BinaryMetadata + Cocopods pkg.CocoapodsMetadata + Conan pkg.ConanMetadata + ConanLock pkg.ConanLockMetadata + Dart pkg.DartPubMetadata + Dotnet pkg.DotnetDepsMetadata + Dpkg pkg.DpkgMetadata + Gem pkg.GemMetadata + GoBin pkg.GolangBinMetadata + GoMod pkg.GolangModMetadata + Hackage pkg.HackageMetadata + Java pkg.JavaMetadata + KbPackage pkg.KbPackageMetadata + LinuxKernel pkg.LinuxKernelMetadata + LinuxKernelModule pkg.LinuxKernelModuleMetadata + Nix pkg.NixStoreMetadata + NpmPackage pkg.NpmPackageJSONMetadata + NpmPackageLock pkg.NpmPackageLockJSONMetadata + MixLock pkg.MixLockMetadata + Php pkg.PhpComposerJSONMetadata + Portage pkg.PortageMetadata + PythonPackage pkg.PythonPackageMetadata + PythonPipfilelock pkg.PythonPipfileLockMetadata + PythonRequirements pkg.PythonRequirementsMetadata + RDescriptionFile pkg.RDescriptionFileMetadata + Rebar pkg.RebarLockMetadata + Rpm pkg.RpmMetadata + RustCargo pkg.CargoPackageMetadata +} diff --git a/schema/json/generate.go b/schema/json/main.go similarity index 74% rename from schema/json/generate.go rename to schema/json/main.go index 8bab53933d0..1aea6c5d758 100644 --- a/schema/json/generate.go +++ b/schema/json/main.go @@ -14,7 +14,6 @@ import ( "github.com/anchore/syft/internal" syftjsonModel "github.com/anchore/syft/syft/formats/syftjson/model" - "github.com/anchore/syft/syft/pkg" ) /* @@ -32,38 +31,38 @@ can be extended to include specific package metadata struct shapes in the future // Should be created during generation below; use reflection's ability to // create types at runtime. // should be same name as struct minus metadata -type artifactMetadataContainer struct { - Alpm pkg.AlpmMetadata - Apk pkg.ApkMetadata - Binary pkg.BinaryMetadata - Cocopods pkg.CocoapodsMetadata - Conan pkg.ConanMetadata - ConanLock pkg.ConanLockMetadata - Dart pkg.DartPubMetadata - Dotnet pkg.DotnetDepsMetadata - Dpkg pkg.DpkgMetadata - Gem pkg.GemMetadata - GoBin pkg.GolangBinMetadata - GoMod pkg.GolangModMetadata - Hackage pkg.HackageMetadata - Java pkg.JavaMetadata - KbPackage pkg.KbPackageMetadata - LinuxKernel pkg.LinuxKernelMetadata - LinuxKernelModule pkg.LinuxKernelModuleMetadata - Nix pkg.NixStoreMetadata - NpmPackage pkg.NpmPackageJSONMetadata - NpmPackageLock pkg.NpmPackageLockJSONMetadata - MixLock pkg.MixLockMetadata - Php pkg.PhpComposerJSONMetadata - Portage pkg.PortageMetadata - PythonPackage pkg.PythonPackageMetadata - PythonPipfilelock pkg.PythonPipfileLockMetadata - PythonRequirements pkg.PythonRequirementsMetadata - RDescriptionFile pkg.RDescriptionFileMetadata - Rebar pkg.RebarLockMetadata - Rpm pkg.RpmMetadata - RustCargo pkg.CargoPackageMetadata -} +//type artifactMetadataContainer struct { +// Alpm pkg.AlpmMetadata +// Apk pkg.ApkMetadata +// Binary pkg.BinaryMetadata +// Cocopods pkg.CocoapodsMetadata +// Conan pkg.ConanMetadata +// ConanLock pkg.ConanLockMetadata +// Dart pkg.DartPubMetadata +// Dotnet pkg.DotnetDepsMetadata +// Dpkg pkg.DpkgMetadata +// Gem pkg.GemMetadata +// GoBin pkg.GolangBinMetadata +// GoMod pkg.GolangModMetadata +// Hackage pkg.HackageMetadata +// Java pkg.JavaMetadata +// KbPackage pkg.KbPackageMetadata +// LinuxKernel pkg.LinuxKernelMetadata +// LinuxKernelModule pkg.LinuxKernelModuleMetadata +// Nix pkg.NixStoreMetadata +// NpmPackage pkg.NpmPackageJSONMetadata +// NpmPackageLock pkg.NpmPackageLockJSONMetadata +// MixLock pkg.MixLockMetadata +// Php pkg.PhpComposerJSONMetadata +// Portage pkg.PortageMetadata +// PythonPackage pkg.PythonPackageMetadata +// PythonPipfilelock pkg.PythonPipfileLockMetadata +// PythonRequirements pkg.PythonRequirementsMetadata +// RDescriptionFile pkg.RDescriptionFileMetadata +// Rebar pkg.RebarLockMetadata +// Rpm pkg.RpmMetadata +// RustCargo pkg.CargoPackageMetadata +//} const schemaVersion = internal.JSONSchemaVersion diff --git a/schema/json/generate_test.go b/schema/json/main_test.go similarity index 100% rename from schema/json/generate_test.go rename to schema/json/main_test.go From 9e70e5ff9f8912482a79e89d96969b761454ee7a Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 23 May 2023 15:48:56 -0400 Subject: [PATCH 4/6] add generation of metadata container struct for JSON schema generation Signed-off-by: Alex Goodman --- Makefile | 2 +- go.mod | 1 + go.sum | 2 + schema/json/generate/main.go | 53 ++++++++- schema/json/generated.go | 36 ------ schema/json/internal/generated.go | 39 +++++++ schema/json/internal/metadata_types.go | 150 +++++++++++++++++++++++++ schema/json/main.go | 46 +------- schema/json/main_test.go | 132 ++-------------------- 9 files changed, 259 insertions(+), 202 deletions(-) delete mode 100644 schema/json/generated.go create mode 100644 schema/json/internal/generated.go create mode 100644 schema/json/internal/metadata_types.go diff --git a/Makefile b/Makefile index 48eabd50f2b..ae917777f72 100644 --- a/Makefile +++ b/Makefile @@ -302,7 +302,7 @@ compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR) .PHONY: generate-json-schema generate-json-schema: ## Generate a new json schema - cd schema/json && go generate && go run main.go + cd schema/json && go generate . && go run . .PHONY: generate-license-list generate-license-list: ## Generate an updated spdx license list diff --git a/go.mod b/go.mod index 4a8359e09f8..f335a736095 100644 --- a/go.mod +++ b/go.mod @@ -54,6 +54,7 @@ require ( github.com/Masterminds/sprig/v3 v3.2.3 github.com/anchore/go-logger v0.0.0-20220728155337-03b66a5207d8 github.com/anchore/stereoscope v0.0.0-20230508133058-5543439b749f + github.com/dave/jennifer v1.6.1 github.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da github.com/docker/docker v24.0.0+incompatible github.com/github/go-spdx/v2 v2.1.2 diff --git a/go.sum b/go.sum index a6ed86ff04c..c0fa2c4cb07 100644 --- a/go.sum +++ b/go.sum @@ -156,6 +156,8 @@ github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/dave/jennifer v1.6.1 h1:T4T/67t6RAA5AIV6+NP8Uk/BIsXgDoqEowgycdQQLuk= +github.com/dave/jennifer v1.6.1/go.mod h1:nXbxhEmQfOZhWml3D1cDK5M1FLnMSozpbFN/m3RmGZc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/schema/json/generate/main.go b/schema/json/generate/main.go index eb8347795a7..1662e964c39 100644 --- a/schema/json/generate/main.go +++ b/schema/json/generate/main.go @@ -1 +1,52 @@ -package generate +package main + +import ( + "fmt" + "os" + + "github.com/dave/jennifer/jen" + + "github.com/anchore/syft/schema/json/internal" +) + +// This program generates internal/generated.go. + +const ( + pkgImport = "github.com/anchore/syft/syft/pkg" + path = "internal/generated.go" +) + +func main() { + typeNames, err := internal.AllSyftMetadataTypeNames() + if err != nil { + panic(fmt.Errorf("unable to get all metadata type names: %w", err)) + } + + fmt.Printf("updating metadata container object with %+v types\n", len(typeNames)) + + f := jen.NewFile("internal") + f.HeaderComment("DO NOT EDIT: generated by schema/json/generate/main.go") + f.ImportName(pkgImport, "pkg") + f.Comment("ArtifactMetadataContainer is a struct that contains all the metadata types for a package, as represented in the pkg.Package.Metadata field.") + f.Type().Id("ArtifactMetadataContainer").StructFunc(func(g *jen.Group) { + for _, typeName := range typeNames { + g.Id(typeName).Qual(pkgImport, typeName) + } + }) + + rendered := fmt.Sprintf("%#v", f) + + fh, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + panic(fmt.Errorf("unable to open file: %w", err)) + } + _, err = fh.WriteString(rendered) + if err != nil { + panic(fmt.Errorf("unable to write file: %w", err)) + } + if err := fh.Close(); err != nil { + panic(fmt.Errorf("unable to close file: %w", err)) + } + + fmt.Printf("wrote generated metadata container code to %q\n", path) +} diff --git a/schema/json/generated.go b/schema/json/generated.go deleted file mode 100644 index e1a357154c7..00000000000 --- a/schema/json/generated.go +++ /dev/null @@ -1,36 +0,0 @@ -package main - -import "github.com/anchore/syft/syft/pkg" - -type artifactMetadataContainer struct { - Alpm pkg.AlpmMetadata - Apk pkg.ApkMetadata - Binary pkg.BinaryMetadata - Cocopods pkg.CocoapodsMetadata - Conan pkg.ConanMetadata - ConanLock pkg.ConanLockMetadata - Dart pkg.DartPubMetadata - Dotnet pkg.DotnetDepsMetadata - Dpkg pkg.DpkgMetadata - Gem pkg.GemMetadata - GoBin pkg.GolangBinMetadata - GoMod pkg.GolangModMetadata - Hackage pkg.HackageMetadata - Java pkg.JavaMetadata - KbPackage pkg.KbPackageMetadata - LinuxKernel pkg.LinuxKernelMetadata - LinuxKernelModule pkg.LinuxKernelModuleMetadata - Nix pkg.NixStoreMetadata - NpmPackage pkg.NpmPackageJSONMetadata - NpmPackageLock pkg.NpmPackageLockJSONMetadata - MixLock pkg.MixLockMetadata - Php pkg.PhpComposerJSONMetadata - Portage pkg.PortageMetadata - PythonPackage pkg.PythonPackageMetadata - PythonPipfilelock pkg.PythonPipfileLockMetadata - PythonRequirements pkg.PythonRequirementsMetadata - RDescriptionFile pkg.RDescriptionFileMetadata - Rebar pkg.RebarLockMetadata - Rpm pkg.RpmMetadata - RustCargo pkg.CargoPackageMetadata -} diff --git a/schema/json/internal/generated.go b/schema/json/internal/generated.go new file mode 100644 index 00000000000..3341818deb7 --- /dev/null +++ b/schema/json/internal/generated.go @@ -0,0 +1,39 @@ +// DO NOT EDIT: generated by schema/json/generate/main.go + +package internal + +import "github.com/anchore/syft/syft/pkg" + +// ArtifactMetadataContainer is a struct that contains all the metadata types for a package, as represented in the pkg.Package.Metadata field. +type ArtifactMetadataContainer struct { + AlpmMetadata pkg.AlpmMetadata + ApkMetadata pkg.ApkMetadata + BinaryMetadata pkg.BinaryMetadata + CargoPackageMetadata pkg.CargoPackageMetadata + CocoapodsMetadata pkg.CocoapodsMetadata + ConanLockMetadata pkg.ConanLockMetadata + ConanMetadata pkg.ConanMetadata + DartPubMetadata pkg.DartPubMetadata + DotnetDepsMetadata pkg.DotnetDepsMetadata + DpkgMetadata pkg.DpkgMetadata + GemMetadata pkg.GemMetadata + GolangBinMetadata pkg.GolangBinMetadata + GolangModMetadata pkg.GolangModMetadata + HackageMetadata pkg.HackageMetadata + JavaMetadata pkg.JavaMetadata + KbPackageMetadata pkg.KbPackageMetadata + LinuxKernelMetadata pkg.LinuxKernelMetadata + LinuxKernelModuleMetadata pkg.LinuxKernelModuleMetadata + MixLockMetadata pkg.MixLockMetadata + NixStoreMetadata pkg.NixStoreMetadata + NpmPackageJSONMetadata pkg.NpmPackageJSONMetadata + NpmPackageLockJSONMetadata pkg.NpmPackageLockJSONMetadata + PhpComposerJSONMetadata pkg.PhpComposerJSONMetadata + PortageMetadata pkg.PortageMetadata + PythonPackageMetadata pkg.PythonPackageMetadata + PythonPipfileLockMetadata pkg.PythonPipfileLockMetadata + PythonRequirementsMetadata pkg.PythonRequirementsMetadata + RDescriptionFileMetadata pkg.RDescriptionFileMetadata + RebarLockMetadata pkg.RebarLockMetadata + RpmMetadata pkg.RpmMetadata +} diff --git a/schema/json/internal/metadata_types.go b/schema/json/internal/metadata_types.go new file mode 100644 index 00000000000..00b16027c78 --- /dev/null +++ b/schema/json/internal/metadata_types.go @@ -0,0 +1,150 @@ +package internal + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" + "os/exec" + "path/filepath" + "sort" + "strings" + "unicode" + + "github.com/scylladb/go-set/strset" +) + +var metadataExceptions = strset.New( + "FileMetadata", +) + +func AllSyftMetadataTypeNames() ([]string, error) { + root, err := repoRoot() + if err != nil { + return nil, err + } + files, err := filepath.Glob(filepath.Join(root, "syft/pkg/*.go")) + if err != nil { + return nil, err + } + return findMetadataDefinitionNames(files...) +} + +func repoRoot() (string, error) { + root, err := exec.Command("git", "rev-parse", "--show-toplevel").Output() + if err != nil { + return "", fmt.Errorf("unable to find repo root dir: %+v", err) + } + absRepoRoot, err := filepath.Abs(strings.TrimSpace(string(root))) + if err != nil { + return "", fmt.Errorf("unable to get abs path to repo root: %w", err) + } + return absRepoRoot, nil +} + +func findMetadataDefinitionNames(paths ...string) ([]string, error) { + names := strset.New() + usedNames := strset.New() + for _, path := range paths { + metadataDefinitions, usedTypeNames, err := findMetadataDefinitionNamesInFile(path) + if err != nil { + return nil, err + } + + // useful for debugging... + //fmt.Println(path) + //fmt.Println("Defs:", metadataDefinitions) + //fmt.Println("Used Types:", usedTypeNames) + //fmt.Println() + + names.Add(metadataDefinitions...) + usedNames.Add(usedTypeNames...) + } + + // any definition that is used within another struct should not be considered a top-level metadata definition + names.Remove(usedNames.List()...) + + strNames := names.List() + sort.Strings(strNames) + + // note: 30 is a point-in-time gut check. This number could be updated if new metadata definitions are added, but is not required. + // it is really intended to catch any major issues with the generation process that would generate, say, 0 definitions. + if len(strNames) < 30 { + return nil, fmt.Errorf("not enough metadata definitions found (discovered: " + fmt.Sprintf("%d", len(strNames)) + ")") + } + + return strNames, nil +} + +func findMetadataDefinitionNamesInFile(path string) ([]string, []string, error) { + // set up the parser + fs := token.NewFileSet() + f, err := parser.ParseFile(fs, path, nil, parser.ParseComments) + if err != nil { + return nil, nil, err + } + + var metadataDefinitions []string + var usedTypeNames []string + for _, decl := range f.Decls { + // check if the declaration is a type declaration + spec, ok := decl.(*ast.GenDecl) + if !ok || spec.Tok != token.TYPE { + continue + } + + // loop over all types declared in the type declaration + for _, typ := range spec.Specs { + // check if the type is a struct type + spec, ok := typ.(*ast.TypeSpec) + if !ok || spec.Type == nil { + continue + } + + structType, ok := spec.Type.(*ast.StructType) + if !ok { + continue + } + + // check if the struct type ends with "Metadata" + name := spec.Name.String() + + // only look for exported types that end with "Metadata" + if isMetadataTypeCandidate(name) { + // print the full declaration of the struct type + metadataDefinitions = append(metadataDefinitions, name) + usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...) + } + } + } + return metadataDefinitions, usedTypeNames, nil +} + +func typeNamesUsedInStruct(structType *ast.StructType) []string { + // recursively find all type names used in the struct type + var names []string + for i := range structType.Fields.List { + // capture names of all of the types (not field names) + ast.Inspect(structType.Fields.List[i].Type, func(n ast.Node) bool { + ident, ok := n.(*ast.Ident) + if !ok { + return true + } + + // add the type name to the list + names = append(names, ident.Name) + + // continue inspecting + return true + }) + } + + return names +} + +func isMetadataTypeCandidate(name string) bool { + return len(name) > 0 && + strings.HasSuffix(name, "Metadata") && + unicode.IsUpper(rune(name[0])) && // must be exported + !metadataExceptions.Has(name) +} diff --git a/schema/json/main.go b/schema/json/main.go index 1aea6c5d758..246abc532a5 100644 --- a/schema/json/main.go +++ b/schema/json/main.go @@ -13,6 +13,7 @@ import ( "github.com/invopop/jsonschema" "github.com/anchore/syft/internal" + genInt "github.com/anchore/syft/schema/json/internal" syftjsonModel "github.com/anchore/syft/syft/formats/syftjson/model" ) @@ -23,46 +24,7 @@ are not captured (empty interfaces). This means that pkg.Package.Metadata is not can be extended to include specific package metadata struct shapes in the future. */ -// This should represent all possible metadatas represented in the pkg.Package.Metadata field (an interface{}). -// When a new package metadata definition is created it will need to be manually added here. The variable name does -// not matter as long as it is exported. - -// TODO: this should be generated from reflection of whats in the pkg package -// Should be created during generation below; use reflection's ability to -// create types at runtime. -// should be same name as struct minus metadata -//type artifactMetadataContainer struct { -// Alpm pkg.AlpmMetadata -// Apk pkg.ApkMetadata -// Binary pkg.BinaryMetadata -// Cocopods pkg.CocoapodsMetadata -// Conan pkg.ConanMetadata -// ConanLock pkg.ConanLockMetadata -// Dart pkg.DartPubMetadata -// Dotnet pkg.DotnetDepsMetadata -// Dpkg pkg.DpkgMetadata -// Gem pkg.GemMetadata -// GoBin pkg.GolangBinMetadata -// GoMod pkg.GolangModMetadata -// Hackage pkg.HackageMetadata -// Java pkg.JavaMetadata -// KbPackage pkg.KbPackageMetadata -// LinuxKernel pkg.LinuxKernelMetadata -// LinuxKernelModule pkg.LinuxKernelModuleMetadata -// Nix pkg.NixStoreMetadata -// NpmPackage pkg.NpmPackageJSONMetadata -// NpmPackageLock pkg.NpmPackageLockJSONMetadata -// MixLock pkg.MixLockMetadata -// Php pkg.PhpComposerJSONMetadata -// Portage pkg.PortageMetadata -// PythonPackage pkg.PythonPackageMetadata -// PythonPipfilelock pkg.PythonPipfileLockMetadata -// PythonRequirements pkg.PythonRequirementsMetadata -// RDescriptionFile pkg.RDescriptionFileMetadata -// Rebar pkg.RebarLockMetadata -// Rpm pkg.RpmMetadata -// RustCargo pkg.CargoPackageMetadata -//} +//go:generate go run ./generate/main.go const schemaVersion = internal.JSONSchemaVersion @@ -78,14 +40,14 @@ func build() *jsonschema.Schema { }, } documentSchema := reflector.ReflectFromType(reflect.TypeOf(&syftjsonModel.Document{})) - metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&artifactMetadataContainer{})) + metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&genInt.ArtifactMetadataContainer{})) // TODO: inject source definitions // inject the definitions of all metadatas into the schema definitions var metadataNames []string for name, definition := range metadataSchema.Definitions { - if name == "artifactMetadataContainer" { + if name == reflect.TypeOf(genInt.ArtifactMetadataContainer{}).Name() { // ignore the definition for the fake container continue } diff --git a/schema/json/main_test.go b/schema/json/main_test.go index 65febc7f832..0903b4dde39 100644 --- a/schema/json/main_test.go +++ b/schema/json/main_test.go @@ -1,32 +1,28 @@ package main import ( - "fmt" - "go/ast" - "go/parser" - "go/token" - "path/filepath" "reflect" "sort" - "strings" "testing" - "unicode" "github.com/google/go-cmp/cmp" - "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" -) -var metadataExceptions = strset.New( - "FileMetadata", + "github.com/anchore/syft/schema/json/internal" ) func TestAllMetadataRepresented(t *testing.T) { - expected := findMetadataDefinitionNames(t, pkgFiles(t)...) - actual := allTypeNamesFromStruct(artifactMetadataContainer{}) + // this test checks that all the metadata types are represented in the currently generated ArtifactMetadataContainer struct + // such that PRs will reflect when there is drift from the implemented set of metadata types and the generated struct + // which controls the JSON schema content. + expected, err := internal.AllSyftMetadataTypeNames() + require.NoError(t, err) + actual := allTypeNamesFromStruct(internal.ArtifactMetadataContainer{}) if !assert.ElementsMatch(t, expected, actual) { - t.Fatalf("metadata types not fully represented: \n%s", cmp.Diff(expected, actual)) + t.Errorf("metadata types not fully represented: \n%s", cmp.Diff(expected, actual)) + t.Log("did you add a new pkg.*Metadata type without updating the JSON schema?") + t.Log("if so, you need to update the schema version and regenerate the JSON schema (make generate-json-schema)") } } @@ -41,111 +37,3 @@ func allTypeNamesFromStruct(instance any) []string { sort.Strings(typeNames) return typeNames } - -func pkgFiles(t *testing.T) []string { - values, err := filepath.Glob("../../syft/pkg/*.go") - require.NoError(t, err) - return values -} - -func findMetadataDefinitionNames(t *testing.T, paths ...string) []string { - names := strset.New() - usedNames := strset.New() - for _, path := range paths { - metadataDefinitions, usedTypeNames := findMetadataDefinitionNamesInFile(t, path) - - // useful for debugging... - //fmt.Println(path) - //fmt.Println("Defs:", metadataDefinitions) - //fmt.Println("Used Types:", usedTypeNames) - //fmt.Println() - - names.Add(metadataDefinitions...) - usedNames.Add(usedTypeNames...) - } - - // any definition that is used within another struct should not be considered a top-level metadata definition - names.Remove(usedNames.List()...) - - strNames := names.List() - sort.Strings(strNames) - - // note: 30 is a point-in-time gut check. This number could be updated if new metadata definitions are added, but is not required. - // it is really intended to catch any major issues with the generation process that would generate, say, 0 definitions. - if len(strNames) < 30 { - t.Fatal("not enough metadata definitions found (discovered: " + fmt.Sprintf("%d", len(strNames)) + ")") - } - - return strNames -} - -func findMetadataDefinitionNamesInFile(t *testing.T, path string) ([]string, []string) { - // set up the parser - fs := token.NewFileSet() - f, err := parser.ParseFile(fs, path, nil, parser.ParseComments) - require.NoError(t, err) - - var metadataDefinitions []string - var usedTypeNames []string - for _, decl := range f.Decls { - // check if the declaration is a type declaration - spec, ok := decl.(*ast.GenDecl) - if !ok || spec.Tok != token.TYPE { - continue - } - - // loop over all types declared in the type declaration - for _, typ := range spec.Specs { - // check if the type is a struct type - spec, ok := typ.(*ast.TypeSpec) - if !ok || spec.Type == nil { - continue - } - - structType, ok := spec.Type.(*ast.StructType) - if !ok { - continue - } - - // check if the struct type ends with "Metadata" - name := spec.Name.String() - - // only look for exported types that end with "Metadata" - if isMetadataTypeCandidate(name) { - // print the full declaration of the struct type - metadataDefinitions = append(metadataDefinitions, name) - usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...) - } - } - } - return metadataDefinitions, usedTypeNames -} - -func typeNamesUsedInStruct(structType *ast.StructType) []string { - // recursively find all type names used in the struct type - var names []string - for i := range structType.Fields.List { - // capture names of all of the types (not field names) - ast.Inspect(structType.Fields.List[i].Type, func(n ast.Node) bool { - ident, ok := n.(*ast.Ident) - if !ok { - return true - } - - // add the type name to the list - names = append(names, ident.Name) - - // continue inspecting - return true - }) - } - - return names -} - -func isMetadataTypeCandidate(name string) bool { - return len(name) > 0 && - strings.HasSuffix(name, "Metadata") && - unicode.IsUpper(rune(name[0])) && // must be exported - !metadataExceptions.Has(name) -} From ab4d82e3a2315e765689a7b414d8cf5e426d69d7 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 23 May 2023 15:56:26 -0400 Subject: [PATCH 5/6] fix linting Signed-off-by: Alex Goodman --- schema/json/generate/main.go | 2 -- schema/json/internal/metadata_types.go | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/schema/json/generate/main.go b/schema/json/generate/main.go index 1662e964c39..fc8dc120a21 100644 --- a/schema/json/generate/main.go +++ b/schema/json/generate/main.go @@ -47,6 +47,4 @@ func main() { if err := fh.Close(); err != nil { panic(fmt.Errorf("unable to close file: %w", err)) } - - fmt.Printf("wrote generated metadata container code to %q\n", path) } diff --git a/schema/json/internal/metadata_types.go b/schema/json/internal/metadata_types.go index 00b16027c78..4d515a18890 100644 --- a/schema/json/internal/metadata_types.go +++ b/schema/json/internal/metadata_types.go @@ -52,10 +52,10 @@ func findMetadataDefinitionNames(paths ...string) ([]string, error) { } // useful for debugging... - //fmt.Println(path) - //fmt.Println("Defs:", metadataDefinitions) - //fmt.Println("Used Types:", usedTypeNames) - //fmt.Println() + // fmt.Println(path) + // fmt.Println("Defs:", metadataDefinitions) + // fmt.Println("Used Types:", usedTypeNames) + // fmt.Println() names.Add(metadataDefinitions...) usedNames.Add(usedTypeNames...) From e99d968f5b7434f33ebaee7df7bd6f86d89ef1c1 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 23 May 2023 16:54:51 -0400 Subject: [PATCH 6/6] update linter script to account for code generation Signed-off-by: Alex Goodman --- .github/scripts/json-schema-drift-check.sh | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/.github/scripts/json-schema-drift-check.sh b/.github/scripts/json-schema-drift-check.sh index 7b7f7dd2f62..3002236d68b 100755 --- a/.github/scripts/json-schema-drift-check.sh +++ b/.github/scripts/json-schema-drift-check.sh @@ -1,27 +1,17 @@ #!/usr/bin/env bash set -u -if ! git diff-index --quiet HEAD --; then - git diff-index HEAD -- - git --no-pager diff - echo "there are uncommitted changes, please commit them before running this check" +if [ "$(git status --porcelain | wc -l)" -ne "0" ]; then + echo " 🔴 there are uncommitted changes, please commit them before running this check" exit 1 fi -success=true - if ! make generate-json-schema; then echo "Generating json schema failed" - success=false -fi - -if ! git diff-index --quiet HEAD --; then - git diff-index HEAD -- - git --no-pager diff - echo "JSON schema drift detected!" - success=false + exit 1 fi -if ! $success; then +if [ "$(git status --porcelain | wc -l)" -ne "0" ]; then + echo " 🔴 there are uncommitted changes, please commit them before running this check" exit 1 fi