From 87a159c4587a194fa9386fb340980fec53bd61cf Mon Sep 17 00:00:00 2001 From: Neil Pankey Date: Tue, 6 Oct 2020 16:03:49 -0700 Subject: [PATCH] utf16: Schema errors and encoding tests --- gen_testdata.go | 85 ++++++++++++++++++++++----------------------- main.go | 28 +++++++++------ main_test.go | 92 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 54 deletions(-) diff --git a/gen_testdata.go b/gen_testdata.go index 062497b..fed4aaf 100644 --- a/gen_testdata.go +++ b/gen_testdata.go @@ -1,57 +1,56 @@ // +build ignore -// generates clones the utf-8 tests data to the other +// gen_testdata clones the utf-8 tests data to the other // unicode encodings and adds BOM variants of each. package main import ( - "io/ioutil" - "log" - "os" - "path/filepath" + "io/ioutil" + "log" + "os" + "path/filepath" - "golang.org/x/text/encoding" - "golang.org/x/text/encoding/unicode" + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/unicode" ) - func main() { - var xforms = []struct { - dir, bom string - enc encoding.Encoding - } { - { "testdata/utf-16be", "\xFE\xFF", unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) }, - { "testdata/utf-16le", "\xFF\xFE", unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) }, - } - - paths, _ := filepath.Glob("testdata/utf-8/*") - for _, p := range paths { - src, err := ioutil.ReadFile(p) - if err != nil { - log.Fatal(err) - } - - write("testdata/utf-8_bom", p, "\xEF\xBB\xBF", src) - for _, xform := range xforms { - dst, err := xform.enc.NewEncoder().Bytes(src) - if err != nil { - log.Fatal(err) - } - write(xform.dir, p, "", dst) - write(xform.dir + "_bom", p, xform.bom, dst) - } - } + var xforms = []struct { + dir, bom string + enc encoding.Encoding + }{ + {"testdata/utf-16be", "\xFE\xFF", unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)}, + {"testdata/utf-16le", "\xFF\xFE", unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)}, + } + + paths, _ := filepath.Glob("testdata/utf-8/*") + for _, p := range paths { + src, err := ioutil.ReadFile(p) + if err != nil { + log.Fatal(err) + } + + write("testdata/utf-8_bom", p, "\xEF\xBB\xBF", src) + for _, xform := range xforms { + dst, err := xform.enc.NewEncoder().Bytes(src) + if err != nil { + log.Fatal(err) + } + write(xform.dir, p, "", dst) + write(xform.dir+"_bom", p, xform.bom, dst) + } + } } func write(dir, orig, bom string, buf []byte) { - f, err := os.Create(filepath.Join(dir, filepath.Base(orig))) - if err != nil { - log.Fatal(err) - } - if _, err = f.Write([]byte(bom)); err != nil { - log.Fatal(err) - } - if _, err = f.Write(buf); err != nil { - log.Fatal(err) - } + f, err := os.Create(filepath.Join(dir, filepath.Base(orig))) + if err != nil { + log.Fatal(err) + } + if _, err = f.Write([]byte(bom)); err != nil { + log.Fatal(err) + } + if _, err = f.Write(buf); err != nil { + log.Fatal(err) + } } diff --git a/main.go b/main.go index 766b97e..bf33441 100644 --- a/main.go +++ b/main.go @@ -79,7 +79,7 @@ func realMain(args []string, w io.Writer) int { dir := filepath.Dir(list) f, err := os.Open(list) if err != nil { - log.Fatalf("%s: %s\n", list, err) + return schemaError("%s: %s", list, err) } defer f.Close() @@ -93,7 +93,7 @@ func realMain(args []string, w io.Writer) int { docs = append(docs, glob(pattern)...) } if err := scanner.Err(); err != nil { - log.Fatalf("%s: invalid file list: %s\n", list, err) + return schemaError("%s: invalid file list: %s", list, err) } } if len(docs) == 0 { @@ -104,13 +104,13 @@ func realMain(args []string, w io.Writer) int { sl := gojsonschema.NewSchemaLoader() schemaPath, err := filepath.Abs(*schemaFlag) if err != nil { - log.Fatalf("%s: unable to convert to absolute path: %s\n", *schemaFlag, err) + return schemaError("%s: unable to convert to absolute path: %s", *schemaFlag, err) } for _, ref := range refFlags { for _, p := range glob(ref) { absPath, err := filepath.Abs(p) if err != nil { - log.Fatalf("%s: unable to convert to absolute path: %s\n", absPath, err) + return schemaError("%s: unable to convert to absolute path: %s", absPath, err) } if absPath == schemaPath { @@ -119,22 +119,22 @@ func realMain(args []string, w io.Writer) int { loader, err := jsonLoader(absPath) if err != nil { - log.Fatalf("%s: unable to load schema ref: %s\n", *schemaFlag, err) + return schemaError("%s: unable to load schema ref: %s", *schemaFlag, err) } if err := sl.AddSchemas(loader); err != nil { - log.Fatalf("%s: invalid schema: %s\n", p, err) + return schemaError("%s: invalid schema: %s", p, err) } } } schemaLoader, err := jsonLoader(schemaPath) if err != nil { - log.Fatalf("%s: unable to load schema: %s\n", *schemaFlag, err) + return schemaError("%s: unable to load schema: %s", *schemaFlag, err) } schema, err := sl.Compile(schemaLoader) if err != nil { - log.Fatalf("%s: invalid schema: %s\n", *schemaFlag, err) + return schemaError("%s: invalid schema: %s", *schemaFlag, err) } // Validate the schema against each doc in parallel, limiting simultaneous @@ -262,8 +262,8 @@ func jsonDecodeCharset(buf []byte) ([]byte, error) { func printUsage() { fmt.Fprintf(os.Stderr, `Usage: %s -s schema.(json|yml) [options] document.(json|yml) ... - yajsv validates JSON and YAML document(s) against a schema. One of three statuses are - reported per document: + yajsv validates JSON and YAML document(s) against a schema. One of three status + results are reported per document: pass: Document is valid relative to the schema fail: Document is invalid relative to the schema @@ -273,7 +273,8 @@ func printUsage() { schema validation failure. Sets the exit code to 1 on any failures, 2 on any errors, 3 on both, 4 on - invalid usage. Otherwise, 0 is returned if everything passes validation. + invalid usage, 5 on schema definition or file-list errors. Otherwise, 0 is + returned if everything passes validation. Options: @@ -288,6 +289,11 @@ func usageError(msg string) int { return 4 } +func schemaError(format string, args ...interface{}) int { + fmt.Fprintf(os.Stderr, format+"\n", args...) + return 5 +} + // glob is a wrapper that also resolves `~` since we may be skipping // the shell expansion when single-quoting globs at the command line func glob(pattern string) []string { diff --git a/main_test.go b/main_test.go index 878fe8b..42d0413 100644 --- a/main_test.go +++ b/main_test.go @@ -1,12 +1,23 @@ package main import ( + "fmt" + "os" "path/filepath" "sort" "strings" "testing" ) +func init() { + // TODO: Cleanup this global monkey-patching + devnull, err := os.Open(os.DevNull) + if err != nil { + panic(err) + } + os.Stderr = devnull +} + func TestMain(t *testing.T) { tests := []struct { in string @@ -14,6 +25,10 @@ func TestMain(t *testing.T) { exit int }{ { + "-s testdata/utf-16be_bom/schema.json testdata/utf-16le_bom/data-fail.yml", + []string{}, + 5, + }, { "-s testdata/utf-8/schema.yml testdata/utf-8/data-pass.yml", []string{"testdata/utf-8/data-pass.yml: pass"}, 0, @@ -89,3 +104,80 @@ func TestMain(t *testing.T) { }) } } + +func TestMatrix(t *testing.T) { + // schema.{format} {encoding}{_bom}/data-{expect}.{format} + type testcase struct { + schemaEnc, schemaFmt string + dataEnc, dataFmt, dataRes string + allowBOM bool + } + + encodings := []string{"utf-8", "utf-16be", "utf-16le", "utf-8_bom", "utf-16be_bom", "utf-16le_bom"} + formats := []string{"json", "yml"} + results := []string{"pass", "fail", "error"} + tests := []testcase{} + + // poor mans cartesian product + for _, senc := range encodings { + for _, sfmt := range formats { + for _, denc := range encodings { + for _, dfmt := range formats { + for _, dres := range results { + tests = append(tests, testcase{senc, sfmt, denc, dfmt, dres, false}) + tests = append(tests, testcase{senc, sfmt, denc, dfmt, dres, true}) + } + } + } + } + } + + for _, tt := range tests { + schemaBOM := strings.HasSuffix(tt.schemaEnc, "_bom") + schema16 := strings.HasPrefix(tt.schemaEnc, "utf-16") + dataBOM := strings.HasSuffix(tt.dataEnc, "_bom") + data16 := strings.HasPrefix(tt.dataEnc, "utf-16") + + schema := fmt.Sprintf("testdata/%s/schema.%s", tt.schemaEnc, tt.schemaFmt) + data := fmt.Sprintf("testdata/%s/data-%s.%s", tt.dataEnc, tt.dataRes, tt.dataFmt) + cmd := fmt.Sprintf("-s %s %s", schema, data) + if tt.allowBOM { + cmd = "-b " + cmd + } + + t.Run(cmd, func(t *testing.T) { + want := 0 + switch { + // Schema Errors (exit = 5) + // - YAML w/out BOM for UTF-16 + // - JSON w/ BOM but missing allowBOM flag + case tt.schemaFmt == "yml" && !schemaBOM && schema16: + want = 5 + case tt.schemaFmt == "json" && schemaBOM && !tt.allowBOM: + want = 5 + // Data Errors (exit = 2) + // - YAML w/out BOM for UTF-16 + // - JSON w/ BOM but missing allowBOM flag + // - standard malformed files (e.g. data-error) + case tt.dataFmt == "yml" && !dataBOM && data16: + want = 2 + case tt.dataFmt == "json" && dataBOM && !tt.allowBOM: + want = 2 + case tt.dataRes == "error": + want = 2 + // Data Failures + case tt.dataRes == "fail": + want = 1 + } + + // TODO: Cleanup this global monkey-patching + *bomFlag = tt.allowBOM + + var w strings.Builder + got := realMain(strings.Split(cmd, " "), &w) + if got != want { + t.Errorf("got(%d) != want(%d) bomflag %t", got, want, *bomFlag) + } + }) + } +}