From a87d6e70d9b5f57805fe4ee05a6e02300f038b5c Mon Sep 17 00:00:00 2001 From: Javier Alvarado Date: Thu, 7 Feb 2019 11:27:57 -0800 Subject: [PATCH 1/5] A user should not be faced with a stack trace due to a mistyped command line option. Don't output error message twice. --- dgraph/cmd/root.go | 2 +- x/error.go | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/dgraph/cmd/root.go b/dgraph/cmd/root.go index ebec7cc7dbc..35327aa6b8a 100644 --- a/dgraph/cmd/root.go +++ b/dgraph/cmd/root.go @@ -59,7 +59,7 @@ func Execute() { // https://github.com/kubernetes/kubernetes/issues/17162#issuecomment-225596212 x.Check(goflag.CommandLine.Parse([]string{})) - x.Check(RootCmd.Execute()) + x.CheckfNoLog(RootCmd.Execute()) } var rootConf = viper.New() diff --git a/x/error.go b/x/error.go index b5f3da16767..06ef02b2c66 100644 --- a/x/error.go +++ b/x/error.go @@ -31,6 +31,7 @@ package x import ( "fmt" "log" + "os" "github.com/pkg/errors" ) @@ -49,12 +50,20 @@ func Checkf(err error, format string, args ...interface{}) { } } +// CheckfNoTrace is Checkf without a stack trace. func CheckfNoTrace(err error) { if err != nil { log.Fatalf(err.Error()) } } +// CheckfNoLog exits on error without any message (to avoid duplicate error messages). +func CheckfNoLog(err error) { + if err != nil { + os.Exit(1) + } +} + // Check2 acts as convenience wrapper around Check, using the 2nd argument as error. func Check2(_ interface{}, err error) { Check(err) From bc0011ea09243f199908b212e37d30abb9a31ce0 Mon Sep 17 00:00:00 2001 From: Javier Alvarado Date: Thu, 7 Feb 2019 13:40:12 -0800 Subject: [PATCH 2/5] Don't output usage on invalid command line option since it had bury the error message. --- dgraph/cmd/root.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dgraph/cmd/root.go b/dgraph/cmd/root.go index 35327aa6b8a..c5ad99686f4 100644 --- a/dgraph/cmd/root.go +++ b/dgraph/cmd/root.go @@ -59,6 +59,9 @@ func Execute() { // https://github.com/kubernetes/kubernetes/issues/17162#issuecomment-225596212 x.Check(goflag.CommandLine.Parse([]string{})) + // Dumping the usage in case of an error makes the error messages harder to see. + RootCmd.SilenceUsage = true + x.CheckfNoLog(RootCmd.Execute()) } From 8573c21622d913e212f0e04d89f9dab63e364743 Mon Sep 17 00:00:00 2001 From: Javier Alvarado Date: Thu, 7 Feb 2019 16:55:38 -0800 Subject: [PATCH 3/5] Add --format option to bulk and live loaders. Replace --rdfs and --jsons in bulk loader with --files to match live loader. --- chunker/chunk.go | 26 +++++++++++++++++++++----- chunker/chunk_test.go | 8 ++++---- dgraph/cmd/bulk/loader.go | 35 +++++++++++++++++------------------ dgraph/cmd/bulk/run.go | 22 ++++++++-------------- dgraph/cmd/live/run.go | 38 +++++++++++++++++++++----------------- 5 files changed, 71 insertions(+), 58 deletions(-) diff --git a/chunker/chunk.go b/chunker/chunk.go index ae25ef6472e..1e4df2c9fcc 100644 --- a/chunker/chunk.go +++ b/chunker/chunk.go @@ -48,18 +48,19 @@ type rdfChunker struct{} type jsonChunker struct{} const ( - RdfInput int = iota - JsonInput + UnknownFormat int = iota + RdfFormat + JsonFormat ) func NewChunker(inputFormat int) Chunker { switch inputFormat { - case RdfInput: + case RdfFormat: return &rdfChunker{} - case JsonInput: + case JsonFormat: return &jsonChunker{} default: - panic("unknown chunker type") + panic("unknown input format") } } @@ -311,3 +312,18 @@ func IsJSONData(r *bufio.Reader) (bool, error) { return err == nil, nil } + +// DataFormat returns a file's data format (RDF, JSON, or unknown) based on the filename +// or the user-provided format option. The file extension has precedence. +func DataFormat(filename string, format string) int { + format = strings.ToLower(format) + filename = strings.TrimSuffix(strings.ToLower(filename), ".gz") + switch { + case strings.HasSuffix(filename, ".rdf") || format == "rdf": + return RdfFormat + case strings.HasSuffix(filename, ".json") || format == "json": + return JsonFormat + default: + return UnknownFormat + } +} diff --git a/chunker/chunk_test.go b/chunker/chunk_test.go index 230cb0ff9cf..dd027dce2d9 100644 --- a/chunker/chunk_test.go +++ b/chunker/chunk_test.go @@ -46,7 +46,7 @@ func TestJSONLoadStart(t *testing.T) { } for _, test := range tests { - chunker := NewChunker(JsonInput) + chunker := NewChunker(JsonFormat) require.Error(t, chunker.Begin(bufioReader(test.json)), test.desc) } } @@ -64,7 +64,7 @@ func TestJSONLoadReadNext(t *testing.T) { {"[{}", "malformed array"}, } for _, test := range tests { - chunker := NewChunker(JsonInput) + chunker := NewChunker(JsonFormat) reader := bufioReader(test.json) require.NoError(t, chunker.Begin(reader), test.desc) @@ -113,7 +113,7 @@ func TestJSONLoadSuccessFirst(t *testing.T) { }, } for _, test := range tests { - chunker := NewChunker(JsonInput) + chunker := NewChunker(JsonFormat) reader := bufioReader(test.json) require.NoError(t, chunker.Begin(reader), test.desc) @@ -176,7 +176,7 @@ func TestJSONLoadSuccessAll(t *testing.T) { }`, } - chunker := NewChunker(JsonInput) + chunker := NewChunker(JsonFormat) reader := bufioReader(testDoc) var json *bytes.Buffer diff --git a/dgraph/cmd/bulk/loader.go b/dgraph/cmd/bulk/loader.go index a9bce8dcf6b..42dc1d0ff92 100644 --- a/dgraph/cmd/bulk/loader.go +++ b/dgraph/cmd/bulk/loader.go @@ -31,6 +31,7 @@ import ( "github.com/dgraph-io/badger" bo "github.com/dgraph-io/badger/options" + "github.com/dgraph-io/dgraph/chunker" "github.com/dgraph-io/dgraph/protos/pb" "github.com/dgraph-io/dgraph/schema" @@ -41,8 +42,8 @@ import ( ) type options struct { - RDFDir string - JSONDir string + DataFiles string + DataFormat string SchemaFile string DgraphsDir string TmpDir string @@ -162,21 +163,19 @@ func (ld *loader) mapStage() { LRUSize: 1 << 19, }) - var dir, ext string - var loaderType int - if ld.opt.RDFDir != "" { - loaderType = chunker.RdfInput - dir = ld.opt.RDFDir - ext = ".rdf" - } else { - loaderType = chunker.JsonInput - dir = ld.opt.JSONDir - ext = ".json" - - } - files := x.FindDataFiles(dir, []string{ext, ext + ".gz"}) + files := x.FindDataFiles(ld.opt.DataFiles, []string{".rdf", ".rdf.gz", ".json", ".json.gz"}) if len(files) == 0 { - fmt.Printf("No *%s files found under %s.\n", ext, dir) + fmt.Printf("No data files found in %s.\n", ld.opt.DataFiles) + os.Exit(1) + } + + // Because mappers must handle chunks that may be from different input files, they must all + // assume the same data format, either RDF or JSON. Use the one specified by the user or by + // the first load file. + loadType := chunker.DataFormat(files[0], ld.opt.DataFormat) + if loadType == chunker.UnknownFormat { + // Dont't try to detect JSON input in bulk loader. + fmt.Printf("Need --format=rdf or --format=json to load %s", files[0]) os.Exit(1) } @@ -184,7 +183,7 @@ func (ld *loader) mapStage() { mapperWg.Add(len(ld.mappers)) for _, m := range ld.mappers { go func(m *mapper) { - m.run(loaderType) + m.run(loadType) mapperWg.Done() }(m) } @@ -201,7 +200,7 @@ func (ld *loader) mapStage() { r, cleanup := chunker.FileReader(file) defer cleanup() - chunker := chunker.NewChunker(loaderType) + chunker := chunker.NewChunker(loadType) x.Check(chunker.Begin(r)) for { chunkBuf, err := chunker.Chunk(r) diff --git a/dgraph/cmd/bulk/run.go b/dgraph/cmd/bulk/run.go index 046ecf100b8..b4d53a8970e 100644 --- a/dgraph/cmd/bulk/run.go +++ b/dgraph/cmd/bulk/run.go @@ -47,13 +47,12 @@ func init() { Bulk.EnvPrefix = "DGRAPH_BULK" flag := Bulk.Cmd.Flags() - flag.StringP("rdfs", "r", "", - "Location of RDF data to load.") - // would be nice to use -j to match -r, but already used by --num_go_routines - flag.String("jsons", "", - "Location of JSON data to load.") + flag.StringP("files", "f", "", + "Location of *.rdf(.gz) or *.json(.gz) file(s) to load") flag.StringP("schema_file", "s", "", "Location of schema file to load.") + flag.String("format", "", + "Specify file format (rdf or json) instead of getting it from filename") flag.String("out", "out", "Location to write the final dgraph data directories.") flag.String("tmp", "tmp", @@ -95,8 +94,8 @@ func init() { func run() { opt := options{ - RDFDir: Bulk.Conf.GetString("rdfs"), - JSONDir: Bulk.Conf.GetString("jsons"), + DataFiles: Bulk.Conf.GetString("files"), + DataFormat: Bulk.Conf.GetString("format"), SchemaFile: Bulk.Conf.GetString("schema_file"), DgraphsDir: Bulk.Conf.GetString("out"), TmpDir: Bulk.Conf.GetString("tmp"), @@ -124,13 +123,8 @@ func run() { fmt.Fprint(os.Stderr, "Schema file must be specified.\n") os.Exit(1) } - if opt.RDFDir == "" && opt.JSONDir == "" { - fmt.Fprint(os.Stderr, "RDF or JSON file(s) must be specified.\n") - os.Exit(1) - } - if opt.RDFDir != "" && opt.JSONDir != "" { - fmt.Fprintf(os.Stderr, "Invalid flags: only one of rdfs(%q) of jsons(%q) may be specified.\n", - opt.RDFDir, opt.JSONDir) + if opt.DataFiles == "" { + fmt.Fprint(os.Stderr, "RDF or JSON file(s) location must be specified.\n") os.Exit(1) } if opt.ReduceShards > opt.MapShards { diff --git a/dgraph/cmd/live/run.go b/dgraph/cmd/live/run.go index 5882e8f298e..13af684f69d 100644 --- a/dgraph/cmd/live/run.go +++ b/dgraph/cmd/live/run.go @@ -21,6 +21,7 @@ import ( "bytes" "compress/gzip" "context" + "errors" "fmt" "io" "io/ioutil" @@ -49,6 +50,7 @@ import ( type options struct { dataFiles string + dataFormat string schemaFile string dgraph string zero string @@ -82,6 +84,7 @@ func init() { flag := Live.Cmd.Flags() flag.StringP("files", "f", "", "Location of *.rdf(.gz) or *.json(.gz) file(s) to load") flag.StringP("schema", "s", "", "Location of schema file") + flag.String("format", "", "Specify file format (rdf or json) instead of getting it from filename") flag.StringP("dgraph", "d", "127.0.0.1:9080", "Dgraph alpha gRPC server address") flag.StringP("zero", "z", "127.0.0.1:5080", "Dgraph zero gRPC server address") flag.IntP("conc", "c", 10, @@ -169,27 +172,24 @@ func (l *loader) uid(val string) string { } // processFile forwards a file to the RDF or JSON processor as appropriate -func (l *loader) processFile(ctx context.Context, file string) error { - fmt.Printf("Processing data file %q\n", file) +func (l *loader) processFile(ctx context.Context, filename string) error { + fmt.Printf("Processing data file %q\n", filename) - rd, cleanup := chunker.FileReader(file) + rd, cleanup := chunker.FileReader(filename) defer cleanup() - var err error - var isJson bool - if strings.HasSuffix(file, ".rdf") || strings.HasSuffix(file, ".rdf.gz") { - err = l.processLoadFile(ctx, rd, chunker.NewChunker(chunker.RdfInput)) - } else if strings.HasSuffix(file, ".json") || strings.HasSuffix(file, ".json.gz") { - err = l.processLoadFile(ctx, rd, chunker.NewChunker(chunker.JsonInput)) - } else if isJson, err = chunker.IsJSONData(rd); err == nil { - if isJson { - err = l.processLoadFile(ctx, rd, chunker.NewChunker(chunker.JsonInput)) - } else { - err = fmt.Errorf("Unable to determine file content format: %s", file) + loadType := chunker.DataFormat(filename, opt.dataFormat) + if loadType == chunker.UnknownFormat { + if isJson, err := chunker.IsJSONData(rd); err == nil { + if isJson { + loadType = chunker.JsonFormat + } else { + return fmt.Errorf("Need --format=rdf or --format=json to load %s", filename) + } } } - return err + return l.processLoadFile(ctx, rd, chunker.NewChunker(loadType)) } func (l *loader) processLoadFile(ctx context.Context, rd *bufio.Reader, ck chunker.Chunker) error { @@ -287,6 +287,7 @@ func run() error { x.PrintVersion() opt = options{ dataFiles: Live.Conf.GetString("files"), + dataFormat: Live.Conf.GetString("format"), schemaFile: Live.Conf.GetString("schema"), dgraph: Live.Conf.GetString("dgraph"), zero: Live.Conf.GetString("zero"), @@ -346,11 +347,14 @@ func run() error { fmt.Printf("Processed schema file %q\n\n", opt.schemaFile) } + if opt.dataFiles == "" { + return errors.New("RDF or JSON file(s) location must be specified\n") + } + filesList := x.FindDataFiles(opt.dataFiles, []string{".rdf", ".rdf.gz", ".json", ".json.gz"}) totalFiles := len(filesList) if totalFiles == 0 { - fmt.Printf("No data files to process\n") - return nil + return fmt.Errorf("No data files found in %s\n", opt.dataFiles) } else { fmt.Printf("Found %d data file(s) to process\n", totalFiles) } From a59134e488ee970c2d669f2e3b86ae7731b2462f Mon Sep 17 00:00:00 2001 From: Javier Alvarado Date: Thu, 7 Feb 2019 17:59:25 -0800 Subject: [PATCH 4/5] Fix errant newlines in error messages. --- dgraph/cmd/live/run.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dgraph/cmd/live/run.go b/dgraph/cmd/live/run.go index 13af684f69d..a762ca7ca7b 100644 --- a/dgraph/cmd/live/run.go +++ b/dgraph/cmd/live/run.go @@ -348,13 +348,13 @@ func run() error { } if opt.dataFiles == "" { - return errors.New("RDF or JSON file(s) location must be specified\n") + return errors.New("RDF or JSON file(s) location must be specified") } filesList := x.FindDataFiles(opt.dataFiles, []string{".rdf", ".rdf.gz", ".json", ".json.gz"}) totalFiles := len(filesList) if totalFiles == 0 { - return fmt.Errorf("No data files found in %s\n", opt.dataFiles) + return fmt.Errorf("No data files found in %s", opt.dataFiles) } else { fmt.Printf("Found %d data file(s) to process\n", totalFiles) } From 926c4dc6c8f1d86e3440ee15d865d3f5d2840923 Mon Sep 17 00:00:00 2001 From: Javier Alvarado Date: Mon, 11 Feb 2019 11:11:34 -0800 Subject: [PATCH 5/5] Fix error message. --- dgraph/cmd/live/run.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dgraph/cmd/live/run.go b/dgraph/cmd/live/run.go index a762ca7ca7b..4f1e95169d5 100644 --- a/dgraph/cmd/live/run.go +++ b/dgraph/cmd/live/run.go @@ -184,7 +184,7 @@ func (l *loader) processFile(ctx context.Context, filename string) error { if isJson { loadType = chunker.JsonFormat } else { - return fmt.Errorf("Need --format=rdf or --format=json to load %s", filename) + return fmt.Errorf("need --format=rdf or --format=json to load %s", filename) } } }