From 036b1f399031cceb0ed685bf5838bfefdcca1b48 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Thu, 7 Jul 2022 13:29:00 -0300 Subject: [PATCH 1/4] added group finder --- BUILD.bazel | 10 + WORKSPACE | 14 +- examples/group/BUILD.bazel | 0 examples/group/finder/BUILD.bazel | 18 ++ examples/group/finder/main.go | 158 ++++++++++ finder/finder.go | 19 ++ group/BUILD.bazel | 0 group/dsl/BUILD.bazel | 23 ++ group/dsl/expression.go | 159 ++++++++++ group/dsl/expression_test.go | 265 ++++++++++++++++ group/dsl/parser.go | 298 ++++++++++++++++++ group/dsl/parser_test.go | 398 +++++++++++++++++++++++ group/dsl/scanner.go | 263 ++++++++++++++++ group/dsl/scanner_test.go | 131 ++++++++ group/finder/BUILD.bazel | 29 ++ group/finder/finder.go | 192 ++++++++++++ group/finder/finder_test.go | 502 ++++++++++++++++++++++++++++++ group/finder/internal.go | 119 +++++++ group/finder/internal_test.go | 99 ++++++ 19 files changed, 2690 insertions(+), 7 deletions(-) create mode 100644 examples/group/BUILD.bazel create mode 100644 examples/group/finder/BUILD.bazel create mode 100644 examples/group/finder/main.go create mode 100644 group/BUILD.bazel create mode 100644 group/dsl/BUILD.bazel create mode 100644 group/dsl/expression.go create mode 100644 group/dsl/expression_test.go create mode 100644 group/dsl/parser.go create mode 100644 group/dsl/parser_test.go create mode 100644 group/dsl/scanner.go create mode 100644 group/dsl/scanner_test.go create mode 100644 group/finder/BUILD.bazel create mode 100644 group/finder/finder.go create mode 100644 group/finder/finder_test.go create mode 100644 group/finder/internal.go create mode 100644 group/finder/internal_test.go diff --git a/BUILD.bazel b/BUILD.bazel index b679ac4..1e0c70a 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -2,3 +2,13 @@ load("@bazel_gazelle//:def.bzl", "gazelle") # gazelle:prefix github.com/pedroegsilva/gofindthem gazelle(name = "gazelle") + +gazelle( + name = "gazelle-update", + args = [ + "-from_file=go.mod", + "-to_macro=deps.bzl%go_dependencies", + "-prune", + ], + command = "update-repos", +) diff --git a/WORKSPACE b/WORKSPACE index 95a867e..1e358fc 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -2,19 +2,19 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( name = "io_bazel_rules_go", - sha256 = "8e968b5fcea1d2d64071872b12737bbb5514524ee5f0a4f54f5920266c261acb", + sha256 = "685052b498b6ddfe562ca7a97736741d87916fe536623afb7da2824c0211c369", urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.28.0/rules_go-v0.28.0.zip", - "https://github.com/bazelbuild/rules_go/releases/download/v0.28.0/rules_go-v0.28.0.zip", + "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.33.0/rules_go-v0.33.0.zip", + "https://github.com/bazelbuild/rules_go/releases/download/v0.33.0/rules_go-v0.33.0.zip", ], ) http_archive( name = "bazel_gazelle", - sha256 = "62ca106be173579c0a167deb23358fdfe71ffa1e4cfdddf5582af26520f1c66f", + sha256 = "501deb3d5695ab658e82f6f6f549ba681ea3ca2a5fb7911154b5aa45596183fa", urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.23.0/bazel-gazelle-v0.23.0.tar.gz", - "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.23.0/bazel-gazelle-v0.23.0.tar.gz", + "https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.26.0/bazel-gazelle-v0.26.0.tar.gz", + "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.26.0/bazel-gazelle-v0.26.0.tar.gz", ], ) @@ -27,6 +27,6 @@ go_dependencies() go_rules_dependencies() -go_register_toolchains(version = "1.16.7") +go_register_toolchains(version = "1.18.3") gazelle_dependencies() diff --git a/examples/group/BUILD.bazel b/examples/group/BUILD.bazel new file mode 100644 index 0000000..e69de29 diff --git a/examples/group/finder/BUILD.bazel b/examples/group/finder/BUILD.bazel new file mode 100644 index 0000000..698f1c8 --- /dev/null +++ b/examples/group/finder/BUILD.bazel @@ -0,0 +1,18 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") + +go_library( + name = "finder_lib", + srcs = ["main.go"], + importpath = "github.com/pedroegsilva/gofindthem/examples/group/finder", + visibility = ["//visibility:private"], + deps = [ + "//finder", + "//group/finder", + ], +) + +go_binary( + name = "finder", + embed = [":finder_lib"], + visibility = ["//visibility:public"], +) diff --git a/examples/group/finder/main.go b/examples/group/finder/main.go new file mode 100644 index 0000000..abe0342 --- /dev/null +++ b/examples/group/finder/main.go @@ -0,0 +1,158 @@ +package main + +import ( + "fmt" + + "github.com/pedroegsilva/gofindthem/finder" + gfinder "github.com/pedroegsilva/gofindthem/group/finder" +) + +func main() { + gofindthemRules := map[string][]string{ + "tag1": { + `"string1"`, + `"string2"`, + }, + "tag2": { + `"string3"`, + `"string4"`, + }, + "tag3": { + `"string5"`, + `"string6"`, + }, + "tag4": { + `"string7"`, + `"string8"`, + }, + } + + rules := map[string][]string{ + "rule1": {`"tag1" or "tag2"`}, + "rule2": {`"tag3:Field3.SomeField1" or "tag4"`}, + "rule3": {`"tag3:Field3" or "tag4"`}, + } + + gft, err := finder.NewFinderWithExpressions( + &finder.CloudflareForkEngine{}, + &finder.RegexpEngine{}, + false, + gofindthemRules, + ) + + if err != nil { + panic(err) + } + + gftg, err := gfinder.NewFinderWithRules(gft, rules) + if err != nil { + panic(err) + } + + someObject := struct { + Field1 string + Field2 int + Field3 struct { + SomeField1 string + SomeField2 []string + } + }{ + Field1: "some pretty text with string1", + Field2: 42, + Field3: struct { + SomeField1 string + SomeField2 []string + }{ + SomeField1: "some pretty text with string5", + SomeField2: []string{"some pretty text with string5", "some pretty text with string2", "some pretty text with string3"}, + }, + } + + matchedExpByFieldByTag, err := gftg.TagObject(someObject, gftg.GetFieldNames(), nil) + if err != nil { + panic(err) + } + + for tag, expressionsByField := range matchedExpByFieldByTag { + fmt.Println("Tag: ", tag) + for field, exprs := range expressionsByField { + fmt.Println(" Field: ", field) + for exp := range exprs { + fmt.Println(" Expressions: ", exp) + } + } + } + + res, err := gftg.ProcessObject(someObject, gftg.GetFieldNames(), nil) + if err != nil { + panic(err) + } + fmt.Println("ProcessObject: ", res) + + fmt.Println("-----------------------------") + arr := []struct { + FieldN string + FieldX string + }{ + {FieldN: "some pretty text with string5"}, + {FieldN: "some pretty text with string2"}, + {FieldN: "some pretty text with string3"}, + } + + matchedExpByFieldByTag2, err := gftg.TagObject(arr, nil, nil) + if err != nil { + panic(err) + } + for tag, expressionsByField := range matchedExpByFieldByTag2 { + fmt.Println("Tag: ", tag) + for field, exprs := range expressionsByField { + fmt.Println(" Field: ", field) + for exp := range exprs { + fmt.Println(" Expressions: ", exp) + } + } + } + + res2, err := gftg.ProcessObject(arr, nil, nil) + if err != nil { + panic(err) + } + fmt.Println("ProcessObject2: ", res2) + + fmt.Println("-----------------------------") + rawJson := ` + { + "Field1": "some pretty text with string1", + "Field2": 42, + "Field3": + { + "SomeField1": "some pretty text with string5", + "SomeField2": + [ + "some pretty text with string5", + "some pretty text with string2", + "some pretty text with string3" + ] + } + } + ` + + matchedExpByFieldByTag3, err := gftg.TagJson(rawJson, gftg.GetFieldNames(), nil) + if err != nil { + panic(err) + } + for tag, expressionsByField := range matchedExpByFieldByTag3 { + fmt.Println("Tag: ", tag) + for field, exprs := range expressionsByField { + fmt.Println(" Field: ", field) + for exp := range exprs { + fmt.Println(" Expressions: ", exp) + } + } + } + res3, err := gftg.ProcessJson(rawJson, gftg.GetFieldNames(), nil) + if err != nil { + panic(err) + } + fmt.Println("ProcessJson: ", res3) +} diff --git a/finder/finder.go b/finder/finder.go index 072c021..463f8e0 100644 --- a/finder/finder.go +++ b/finder/finder.go @@ -55,6 +55,25 @@ func NewFinder(subEng SubstringEngine, rgxEng RegexEngine, caseSensitive bool) ( } } +// NewFinderWithExpressions retruns a new instace of Finder with the +// expressions and tags given at expressionsByTag. +func NewFinderWithExpressions( + subEng SubstringEngine, + rgxEng RegexEngine, + caseSensitive bool, + expressionsByTag map[string][]string, +) (finder *Finder, err error) { + finder = NewFinder(subEng, rgxEng, caseSensitive) + for tag, expressions := range expressionsByTag { + err = finder.AddExpressionsWithTag(expressions, tag) + if err != nil { + return + } + } + + return +} + // AddExpression adds the expression to the finder. It also collect // and store the terms that are going to be used by the substring engine // If the expression is malformed returns an error. diff --git a/group/BUILD.bazel b/group/BUILD.bazel new file mode 100644 index 0000000..e69de29 diff --git a/group/dsl/BUILD.bazel b/group/dsl/BUILD.bazel new file mode 100644 index 0000000..e252fad --- /dev/null +++ b/group/dsl/BUILD.bazel @@ -0,0 +1,23 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "dsl", + srcs = [ + "expression.go", + "parser.go", + "scanner.go", + ], + importpath = "github.com/pedroegsilva/gofindthem/group/dsl", + visibility = ["//visibility:public"], +) + +go_test( + name = "dsl_test", + srcs = [ + "expression_test.go", + "parser_test.go", + "scanner_test.go", + ], + embed = [":dsl"], + deps = ["@com_github_stretchr_testify//assert"], +) diff --git a/group/dsl/expression.go b/group/dsl/expression.go new file mode 100644 index 0000000..72436ce --- /dev/null +++ b/group/dsl/expression.go @@ -0,0 +1,159 @@ +package dsl + +import ( + "fmt" + "strings" +) + +// ExprType are Special tokens used to define the expression type +type ExprType int + +const ( + UNSET_EXPR ExprType = iota + AND_EXPR + OR_EXPR + NOT_EXPR + UNIT_EXPR +) + +// GetName returns a readable name for the ExprType value +func (exprType ExprType) GetName() string { + switch exprType { + case UNSET_EXPR: + return "UNSET" + case AND_EXPR: + return "AND" + case OR_EXPR: + return "OR" + case NOT_EXPR: + return "NOT" + case UNIT_EXPR: + return "UNIT" + default: + return "UNEXPECTED" + } +} + +// TagInfo holds the name of the tag and the field path prefix that it need to be found at. +type TagInfo struct { + Name string + FieldPath string +} + +// Expression can be a TagInfo (UNIT) or a function composed by +// one or two other expressions (NOT, AND, OR). +type Expression struct { + LExpr *Expression + RExpr *Expression + Type ExprType + Tag TagInfo +} + +// GetTypeName returns the type of the expression with a readable name +func (exp *Expression) GetTypeName() string { + return exp.Type.GetName() +} + +// Solve solves the expresion using the ginven values of fieldPathByTag. +// fieldPathByTag will hold the values of all tags that were found with a +// list of field paths that the tag was found +func (exp *Expression) Solve( + matchedExpByFieldByTag map[string]map[string]map[string]struct{}, +) (bool, error) { + eval, err := exp.solve(matchedExpByFieldByTag) + return eval, err +} + +//solve implements Solve +func (exp *Expression) solve(matchedExpByFieldByTag map[string]map[string]map[string]struct{}) (bool, error) { + switch exp.Type { + case UNIT_EXPR: + if fieldPaths, ok := matchedExpByFieldByTag[exp.Tag.Name]; ok { + if exp.Tag.FieldPath == "" { + return true, nil + } + + for fieldPath, _ := range fieldPaths { + if strings.HasPrefix(fieldPath, exp.Tag.FieldPath) { + return true, nil + } + } + } + + return false, nil + + case AND_EXPR: + if exp.LExpr == nil || exp.RExpr == nil { + return false, fmt.Errorf("AND statement do not have right or left expression: %v", exp) + } + lval, err := exp.LExpr.solve(matchedExpByFieldByTag) + if err != nil { + return false, err + } + rval, err := exp.RExpr.solve(matchedExpByFieldByTag) + if err != nil { + return false, err + } + + return lval && rval, nil + case OR_EXPR: + if exp.LExpr == nil || exp.RExpr == nil { + return false, fmt.Errorf("OR statement do not have right or left expression: %v", exp) + } + lval, err := exp.LExpr.solve(matchedExpByFieldByTag) + if err != nil { + return false, err + } + rval, err := exp.RExpr.solve(matchedExpByFieldByTag) + if err != nil { + return false, err + } + + return lval || rval, nil + case NOT_EXPR: + if exp.RExpr == nil { + return false, fmt.Errorf("NOT statement do not have expression: %v", exp) + } + rval, err := exp.RExpr.solve(matchedExpByFieldByTag) + if err != nil { + return false, err + } + return !rval, nil + default: + return false, fmt.Errorf("unable to process expression type %d", exp.Type) + } +} + +// PrettyFormat returns the expression formated on a tabbed structure +// Eg: for the expression ("a" and "b") or "c" +// OR +// AND +// a +// b +// c +func (exp *Expression) PrettyFormat() string { + return exp.prettyFormat(0) +} + +// prettyFormat implementation of PrettyFormat() +func (exp *Expression) prettyFormat(lvl int) (pprint string) { + tabs := " " + onLVL := strings.Repeat(tabs, lvl) + if exp.Type == UNIT_EXPR { + fieldPath := "" + if exp.Tag.FieldPath != "" { + fieldPath = fmt.Sprintf("[%s]", exp.Tag.FieldPath) + } + return fmt.Sprintf("%s%s%s\n", onLVL, exp.Tag.Name, fieldPath) + } + pprint = fmt.Sprintf("%s%s\n", onLVL, exp.GetTypeName()) + if exp.LExpr != nil { + pprint += exp.LExpr.prettyFormat(lvl + 1) + } + + if exp.RExpr != nil { + pprint += exp.RExpr.prettyFormat(lvl + 1) + } + + return +} diff --git a/group/dsl/expression_test.go b/group/dsl/expression_test.go new file mode 100644 index 0000000..acf3e9c --- /dev/null +++ b/group/dsl/expression_test.go @@ -0,0 +1,265 @@ +package dsl + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSolver(t *testing.T) { + assert := assert.New(t) + for _, tc := range solverTestCases { + exp, err := NewParser(strings.NewReader(tc.expStr)).Parse() + assert.Nil(err, tc.message) + respInt, err := exp.Solve(tc.matchedExpByFieldByTag) + assert.Nil(err, tc.message) + assert.Equal(tc.expectedResp, respInt, tc.message) + } +} + +var solverTestCases = []struct { + expStr string + matchedExpByFieldByTag map[string]map[string]map[string]struct{} + expectedResp bool + message string +}{ + { + expStr: `"tag1"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + }, + expectedResp: true, + message: "single tag true", + }, + { + expStr: `"tag1"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{}, + expectedResp: false, + message: "single tag false", + }, + + // and tests + { + expStr: `"tag1" and "tag2" and "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + "tag2": nil, + "tag3": nil, + }, + expectedResp: true, + message: "and multi tags true", + }, + { + expStr: `"tag1" and "tag2" and "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag2": nil, + "tag3": nil, + }, + expectedResp: false, + message: "and multi tags false 1", + }, + { + expStr: `"tag1" and "tag2" and "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + "tag3": nil, + }, + expectedResp: false, + message: "and multi tags false 2", + }, + { + expStr: `"tag1" and "tag2" and "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + "tag2": nil, + }, + expectedResp: false, + message: "and multi tags false 3", + }, + + // or tests + { + expStr: `"tag1" or "tag2" or "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{}, + expectedResp: false, + message: "or multi tag false", + }, + { + expStr: `"tag1" or "tag2" or "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + }, + expectedResp: true, + message: "or multi tag true 1", + }, + { + expStr: `"tag1" or "tag2" or "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag2": nil, + }, + expectedResp: true, + message: "or multi tag true 2", + }, + { + expStr: `"tag1" or "tag2" or "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag3": nil, + }, + expectedResp: true, + message: "or multi tag true 3", + }, + + // not tests + { + expStr: `not "tag1"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{}, + expectedResp: true, + message: "not true", + }, + { + expStr: `not "tag1"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + }, + expectedResp: false, + message: "not false", + }, + { + expStr: `not "tag1" or not "tag2"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + "tag2": nil, + }, + expectedResp: false, + message: "not multi false", + }, + { + expStr: `not ("tag1" or "tag2") or "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + "tag3": nil, + }, + expectedResp: true, + message: "not multi true", + }, + { + expStr: `"tag1" and not "tag2" or "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + "tag3": nil, + }, + expectedResp: true, + message: "not multi true 1", + }, + { + expStr: ` not "tag2" and "tag1" or "tag3"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + "tag3": nil, + }, + expectedResp: true, + message: "not multi true 2", + }, + { + expStr: `"tag1" and "tag3" or not "tag2"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + "tag3": nil, + }, + expectedResp: true, + message: "not multi true 3", + }, + // parentheses tests + { + expStr: `not ("tag1" and "tag2") and ("tag1" or "tag2")`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + "tag2": nil, + }, + expectedResp: false, + message: "parentheses xor 1", + }, + { + expStr: `("tag1" or "tag2") and not ("tag1" and "tag2")`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{}, + expectedResp: false, + message: "parentheses xor 2", + }, + { + expStr: `not ("tag1" and "tag2") and ("tag1" or "tag2")`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag2": nil, + }, + expectedResp: true, + message: "parentheses xor 3", + }, + { + expStr: `(("tag1" and "tag2" and "tag3") or ("tag4" and not "tag5")) and ("tag6" or "tag7") and "tag8"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag4": nil, + "tag6": nil, + "tag8": nil, + }, + expectedResp: true, + message: "parentheses 1", + }, + { + expStr: `(("tag1" and "tag2" and "tag3") or ("tag4" and not "tag5")) and ("tag6" or "tag7") and "tag8"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag4": nil, + "tag6": nil, + }, + expectedResp: false, + message: "parentheses 2", + }, + // field tests + { + expStr: `"tag1:field1"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": {"field1": nil}, + }, + expectedResp: true, + message: "single tag with field true 1", + }, + { + expStr: `"tag1:field1"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": {"field3": nil, "field2": nil, "field1": nil}, + }, + expectedResp: true, + message: "single tag with field true 2", + }, + { + expStr: `"tag1:field1"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": nil, + }, + expectedResp: false, + message: "single tag with field false 1", + }, + { + expStr: `"tag1:field1"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": {"field2": nil, "field3": nil}, + }, + expectedResp: false, + message: "single tag with field false 2", + }, + { + expStr: `"tag1:field1"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": {"field1.field2.field3.index(2)": nil}, + }, + expectedResp: true, + message: "single tag with field partial field true 1", + }, + { + expStr: `"tag1:field"`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": {"field2": nil}, + }, + expectedResp: true, + message: "single tag with field partial field true 2", + }, +} diff --git a/group/dsl/parser.go b/group/dsl/parser.go new file mode 100644 index 0000000..6c00869 --- /dev/null +++ b/group/dsl/parser.go @@ -0,0 +1,298 @@ +package dsl + +import ( + "fmt" + "io" +) + +// Parser parser struct that holds needed information to +// parse the expression. +type Parser struct { + s *Scanner + buf struct { + tok Token // last read token + lit string // last read literal + unscanned bool // if it was unscanned + } + parCount int + fields map[string]struct{} + tags map[string]struct{} +} + +// NewParser returns a new instance of Parser. +// If case sensitive is not set all terms are changed to lowercase +func NewParser(r io.Reader) *Parser { + return &Parser{ + s: NewScanner(r), + parCount: 0, + fields: make(map[string]struct{}), + tags: make(map[string]struct{}), + } +} + +// Parse parses the expression and returns the root node +// of the parsed expression. +func (p *Parser) Parse() (expr *Expression, err error) { + return p.parse() +} + +// parse implementation of Parse() +func (p *Parser) parse() (*Expression, error) { + exp := &Expression{} + for { + tok, lit, err := p.scanIgnoreWhitespace() + if err != nil { + return exp, err + } + switch tok { + case OPPAR: + newExp, err := p.handleOpenPar() + if err != nil { + return exp, err + } + + if exp.LExpr == nil { + exp.LExpr = newExp + } else { + exp.RExpr = newExp + } + + case TAG: + p.unscan() + tag, err := p.parseTagInfo() + if err != nil { + return exp, err + } + + keyExp := &Expression{ + Type: UNIT_EXPR, + Tag: tag, + } + if exp.LExpr == nil { + exp.LExpr = keyExp + } else { + exp.RExpr = keyExp + } + p.tags[tag.Name] = struct{}{} + if tag.FieldPath != "" { + p.fields[tag.FieldPath] = struct{}{} + } + + case AND: + exp, err = p.handleDualOp(exp, AND_EXPR) + if err != nil { + return exp, err + } + + case OR: + exp, err = p.handleDualOp(exp, OR_EXPR) + if err != nil { + return exp, err + } + + case NOT: + nextTok, _, err := p.scanIgnoreWhitespace() + if err != nil { + return exp, err + } + + notExp := &Expression{ + Type: NOT_EXPR, + } + + switch nextTok { + case TAG: + p.unscan() + tag, err := p.parseTagInfo() + if err != nil { + return exp, err + } + notExp.RExpr = &Expression{ + Type: UNIT_EXPR, + Tag: tag, + } + p.tags[tag.Name] = struct{}{} + if tag.FieldPath != "" { + p.fields[tag.FieldPath] = struct{}{} + } + + case OPPAR: + newExp, err := p.handleOpenPar() + if err != nil { + return exp, err + } + notExp.RExpr = newExp + default: + return exp, fmt.Errorf("invalid expression: Unexpected token '%s' after NOT", nextTok.getName()) + } + + if exp.LExpr == nil { + exp.LExpr = notExp + } else { + exp.RExpr = notExp + } + + case CLPAR: + p.parCount-- + fallthrough + case EOF: + if p.parCount < 0 { + return exp, fmt.Errorf("invalid expression: unexpected EOF found. Extra closing parentheses: %d", p.parCount*-1) + } + + finalExp := exp + if exp.Type == UNSET_EXPR { + if exp.RExpr != nil { + finalExp = exp.RExpr + } else if exp.LExpr != nil { + finalExp = exp.LExpr + } else { + return nil, fmt.Errorf("invalid expression: unexpected EOF found") + } + } + switch finalExp.Type { + case AND_EXPR, OR_EXPR: + if finalExp.RExpr == nil { + return nil, fmt.Errorf("invalid expression: incomplete expression %s", finalExp.Type.GetName()) + } + } + return finalExp, nil + + default: + return exp, fmt.Errorf("invalid expression: Unexpected operator was found (%d = '%s')", tok, lit) + } + } +} + +// handleDualOp adds the needed information to the current expression and returns the next +// expression, that can be the same or another expression. +func (p *Parser) handleDualOp(exp *Expression, expType ExprType) (*Expression, error) { + if exp.LExpr == nil { + return exp, fmt.Errorf("invalid expression: no left expression was found for %s", expType.GetName()) + } + if exp.RExpr == nil { + exp.Type = expType + return exp, nil + } + + exp = &Expression{ + Type: expType, + LExpr: exp, + } + + nextTok, _, err := p.scanIgnoreWhitespace() + if err != nil { + return exp, err + } + + if nextTok == OPPAR { + newExp, err := p.handleOpenPar() + if err != nil { + return exp, err + } + exp.RExpr = newExp + } else { + p.unscan() + } + + return exp, nil +} + +// scan scans the next token and stores it on a buffer to +// make unscanning on token possible +func (p *Parser) scan() (tok Token, lit string, err error) { + // If we have a token on the buffer, then return it. + if p.buf.unscanned { + p.buf.unscanned = false + return p.buf.tok, p.buf.lit, nil + } + + // Otherwise read the next token from the scanner. + tok, lit, err = p.s.Scan() + if err != nil { + return + } + + // Save it to the buffer in case we unscan later. + p.buf.tok, p.buf.lit = tok, lit + + return +} + +// unscan sets the unscanned flag to assign the scan to +// use the buffered information. +func (p *Parser) unscan() { p.buf.unscanned = true } + +// scanIgnoreWhitespace scans the next non-whitespace token. +func (p *Parser) scanIgnoreWhitespace() (tok Token, lit string, err error) { + tok, lit, err = p.scan() + if err != nil { + return + } + if tok == WS { + tok, lit, err = p.scan() + } + return +} + +// handleOpenPar gets the expression that is inside the parentheses +func (p *Parser) handleOpenPar() (*Expression, error) { + parlvl := p.parCount + p.parCount++ + newExp, err := p.parse() + if err != nil { + return newExp, err + } + if p.parCount != parlvl { + return newExp, fmt.Errorf("invalid expression: Unexpected '('") + } + return newExp, nil +} + +// handleOpenPar gets the expression that is inside the parentheses +func (p *Parser) parseTagInfo() (TagInfo, error) { + tagInfo := TagInfo{} + tok, lit, err := p.scanIgnoreWhitespace() + if err != nil { + return tagInfo, err + } + + if tok != TAG { + return tagInfo, fmt.Errorf("invalid expression: Expecting TAG but found %s", tok.getName()) + } + + if lit == "" { + return tagInfo, fmt.Errorf("invalid expression: Found empty TAG") + } + + tagInfo.Name = lit + + nextTok, nextLit, err := p.scanIgnoreWhitespace() + if err != nil { + return tagInfo, err + } + + if nextTok != FIELD_PATH { + p.unscan() + return tagInfo, nil + } + + tagInfo.FieldPath = nextLit + return tagInfo, nil +} + +// GetFields returns the list of unique fields that were found on the expression +func (p *Parser) GetFields() (fields []string) { + for field := range p.fields { + fields = append(fields, field) + } + return fields +} + +// GetTags returns the list of unique tags that were found on the expression +func (p *Parser) GetTags() (tags []string) { + for tag := range p.tags { + tags = append(tags, tag) + } + return tags +} diff --git a/group/dsl/parser_test.go b/group/dsl/parser_test.go new file mode 100644 index 0000000..6351414 --- /dev/null +++ b/group/dsl/parser_test.go @@ -0,0 +1,398 @@ +package dsl + +import ( + "fmt" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParser(t *testing.T) { + assert := assert.New(t) + tests := []struct { + expStr string + expectedExp Expression + expectedTags map[string]struct{} + expectedPaths map[string]struct{} + expectedErr error + message string + }{ + { + expStr: `"tag1"`, + expectedExp: Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag1", + FieldPath: "", + }, + }, + expectedTags: map[string]struct{}{ + "tag1": {}, + }, + expectedPaths: map[string]struct{}{}, + expectedErr: nil, + message: "single tag", + }, + { + expStr: `"tag1:field1"`, + expectedExp: Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag1", + FieldPath: "field1", + }, + }, + expectedTags: map[string]struct{}{ + "tag1": {}, + }, + expectedPaths: map[string]struct{}{ + "field1": {}, + }, + expectedErr: nil, + message: "tag with field", + }, + { + expStr: `("tag1:field1")`, + expectedExp: Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag1", + FieldPath: "field1", + }, + }, + expectedTags: map[string]struct{}{ + "tag1": {}, + }, + expectedPaths: map[string]struct{}{ + "field1": {}, + }, + expectedErr: nil, + message: "single tag parentheses", + }, + { + expStr: `"tag1:field1" and "tag2:field2"`, + expectedExp: Expression{ + Type: AND_EXPR, + LExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag1", + FieldPath: "field1", + }, + }, + RExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag2", + FieldPath: "field2", + }, + }, + }, + expectedTags: map[string]struct{}{ + "tag1": {}, + "tag2": {}, + }, + expectedPaths: map[string]struct{}{ + "field1": {}, + "field2": {}, + }, + expectedErr: nil, + message: "simple and", + }, + { + expStr: `("tag1:field1" and "tag2:field2")`, + expectedExp: Expression{ + Type: AND_EXPR, + LExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag1", + FieldPath: "field1", + }, + }, + RExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag2", + FieldPath: "field2", + }, + }, + }, + expectedTags: map[string]struct{}{ + "tag1": {}, + "tag2": {}, + }, + expectedPaths: map[string]struct{}{ + "field1": {}, + "field2": {}, + }, + expectedErr: nil, + message: "simple and with parentheses", + }, + { + expStr: `"tag1:field1" or "tag2:field2"`, + expectedExp: Expression{ + Type: OR_EXPR, + LExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag1", + FieldPath: "field1", + }, + }, + RExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag2", + FieldPath: "field2", + }, + }, + }, + expectedTags: map[string]struct{}{ + "tag1": {}, + "tag2": {}, + }, + expectedPaths: map[string]struct{}{ + "field1": {}, + "field2": {}, + }, + expectedErr: nil, + message: "simple or", + }, + { + expStr: `not "tag1:field1"`, + expectedExp: Expression{ + Type: NOT_EXPR, + RExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag1", + FieldPath: "field1", + }, + }, + }, + expectedTags: map[string]struct{}{ + "tag1": {}, + }, + expectedPaths: map[string]struct{}{ + "field1": {}, + }, + expectedErr: nil, + message: "simple not", + }, + { + expStr: `"tag1:field1" and "tag2:field2" or not "tag3:field3"`, + expectedExp: Expression{ + Type: OR_EXPR, + LExpr: &Expression{ + Type: AND_EXPR, + LExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag1", + FieldPath: "field1", + }, + }, + RExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag2", + FieldPath: "field2", + }, + }, + }, + RExpr: &Expression{ + Type: NOT_EXPR, + RExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag3", + FieldPath: "field3", + }, + }, + }, + }, + expectedTags: map[string]struct{}{ + "tag1": {}, + "tag2": {}, + "tag3": {}, + }, + expectedPaths: map[string]struct{}{ + "field1": {}, + "field2": {}, + "field3": {}, + }, + expectedErr: nil, + message: "multiple function no parentheses", + }, + { + expStr: `"tag1:field1" and ("tag2:field2" or not "tag3:field3")`, + expectedExp: Expression{ + Type: AND_EXPR, + LExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag1", + FieldPath: "field1", + }, + }, + RExpr: &Expression{ + Type: OR_EXPR, + LExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag2", + FieldPath: "field2", + }, + }, + RExpr: &Expression{ + Type: NOT_EXPR, + RExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag3", + FieldPath: "field3", + }, + }, + }, + }, + }, + expectedTags: map[string]struct{}{ + "tag1": {}, + "tag2": {}, + "tag3": {}, + }, + expectedPaths: map[string]struct{}{ + "field1": {}, + "field2": {}, + "field3": {}, + }, + expectedErr: nil, + message: "multiple function with parentheses", + }, + { + expStr: `not("tag2:field2" or "tag3:field3") and "tag1:field1"`, + expectedExp: Expression{ + Type: AND_EXPR, + LExpr: &Expression{ + Type: NOT_EXPR, + RExpr: &Expression{ + Type: OR_EXPR, + LExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag2", + FieldPath: "field2", + }, + }, + RExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag3", + FieldPath: "field3", + }, + }, + }, + }, + RExpr: &Expression{ + Type: UNIT_EXPR, + Tag: TagInfo{ + Name: "tag1", + FieldPath: "field1", + }, + }, + }, + expectedTags: map[string]struct{}{ + "tag1": {}, + "tag2": {}, + "tag3": {}, + }, + expectedPaths: map[string]struct{}{ + "field1": {}, + "field2": {}, + "field3": {}, + }, + expectedErr: nil, + message: "not with parentheses", + }, + { + expStr: ``, + expectedExp: Expression{}, + expectedTags: map[string]struct{}{}, + expectedPaths: map[string]struct{}{}, + expectedErr: fmt.Errorf("invalid expression: unexpected EOF found"), + message: "empty expression", + }, + { + expStr: `(("tag1")`, + expectedExp: Expression{}, + expectedTags: map[string]struct{}{}, + expectedPaths: map[string]struct{}{}, + expectedErr: fmt.Errorf("invalid expression: Unexpected '('"), + message: "invalid open parentheses", + }, + { + expStr: `("tag1"))`, + expectedExp: Expression{}, + expectedTags: map[string]struct{}{}, + expectedPaths: map[string]struct{}{}, + expectedErr: fmt.Errorf("invalid expression: unexpected EOF found. Extra closing parentheses: 1"), + message: "invalid close parentheses", + }, + { + expStr: `and`, + expectedExp: Expression{}, + expectedTags: map[string]struct{}{}, + expectedPaths: map[string]struct{}{}, + expectedErr: fmt.Errorf("invalid expression: no left expression was found for AND"), + message: "invalid expression empty dual exp", + }, + { + expStr: ` "tag1" and `, + expectedExp: Expression{}, + expectedTags: map[string]struct{}{}, + expectedPaths: map[string]struct{}{}, + expectedErr: fmt.Errorf("invalid expression: incomplete expression AND"), + + message: "invalid expression incomplete dual exp", + }, + { + expStr: `or`, + expectedExp: Expression{}, + expectedTags: map[string]struct{}{}, + expectedPaths: map[string]struct{}{}, + expectedErr: fmt.Errorf("invalid expression: no left expression was found for OR"), + + message: "invalid expression empty dual exp", + }, + { + expStr: ` "tag1" or `, + expectedExp: Expression{}, + expectedTags: map[string]struct{}{}, + expectedPaths: map[string]struct{}{}, + expectedErr: fmt.Errorf("invalid expression: incomplete expression OR"), + + message: "invalid expression incomplete dual exp", + }, + { + expStr: `not`, + expectedExp: Expression{}, + expectedTags: map[string]struct{}{}, + expectedPaths: map[string]struct{}{}, + expectedErr: fmt.Errorf("invalid expression: Unexpected token 'EOF' after NOT"), + message: "invalid expression incomplete dual exp", + }, + } + + for _, tc := range tests { + p := NewParser(strings.NewReader(tc.expStr)) + exp, err := p.Parse() + assert.Equal(tc.expectedErr, err, tc.message) + if err == nil { + assert.Equal(tc.expectedExp, *exp, tc.message) + assert.Equal(tc.expectedTags, p.tags, tc.message) + assert.Equal(tc.expectedPaths, p.fields, tc.message) + } + } +} diff --git a/group/dsl/scanner.go b/group/dsl/scanner.go new file mode 100644 index 0000000..379095e --- /dev/null +++ b/group/dsl/scanner.go @@ -0,0 +1,263 @@ +package dsl + +import ( + "bufio" + "bytes" + "fmt" + "io" + "strings" +) + +// Token represents a lexical token. +type Token int + +const ( + // Special tokens + ILLEGAL Token = iota + EOF + WS + + // Literals + TAG // "tag" + FIELD_PATH // "tag:fieldpath" + + // Misc characters + QUOTATION // " + OPPAR // ( + CLPAR // ) + + // Operators + AND // 'and' or 'AND' + OR // 'or' or 'OR' + NOT // 'not' or 'NOT' +) + +// getName returns a readable name for the Token +func (tok Token) getName() string { + switch tok { + case ILLEGAL: + return "ILLEGAL" + case EOF: + return "EOF" + case WS: + return "WS" + case TAG: + return "TAG" + case FIELD_PATH: + return "FIELD_PATH" + case QUOTATION: + return "QUOTATION" + case OPPAR: + return "OPPAR" + case CLPAR: + return "CLPAR" + case AND: + return "AND" + case OR: + return "OR" + case NOT: + return "NOT" + default: + return "UNEXPECTED" + } +} + +// Scanner represents a lexical scanner. +type Scanner struct { + r *bufio.Reader +} + +// NewScanner returns a new instance of Scanner. +func NewScanner(r io.Reader) *Scanner { + return &Scanner{r: bufio.NewReader(r)} +} + +// Scan returns the next token and literal value. +func (s *Scanner) Scan() (tok Token, lit string, err error) { + // Read the next rune. + ch := s.read() + + // If we see whitespace then consume all contiguous whitespace. + // If we see a letter then consume as an operator. + // If we see a '"' consume as a TAG. + // If we see a '(' or ')' returns OPPAR or CLPAR respectively + switch { + case isWhitespace(ch): + s.unread() + return s.scanWhitespace() + case ch == '"': + s.unread() + return s.scanTag() + case ch == ':': + s.unread() + return s.scanFieldPath() + case isLetter(ch): + s.unread() + return s.scanOperators() + case ch == '(': + return OPPAR, "(", nil + case ch == ')': + return CLPAR, ")", nil + case ch == eof: + return EOF, "", nil + } + + return ILLEGAL, "", fmt.Errorf("illegal char was found %c", ch) +} + +// scanWhitespace consumes the current rune and all contiguous whitespace. +func (s *Scanner) scanWhitespace() (tok Token, lit string, err error) { + // Create a buffer and read the current character into it. + var buf bytes.Buffer + buf.WriteRune(s.read()) + + // Read every subsequent whitespace character into the buffer. + // Non-whitespace characters and EOF will cause the loop to exit. + for { + if ch := s.read(); ch == eof { + break + } else if !isWhitespace(ch) { + s.unread() + break + } else { + buf.WriteRune(ch) + } + } + + return WS, buf.String(), nil +} + +// scanOperators consumes the current rune and all contiguous operator runes. +func (s *Scanner) scanOperators() (tok Token, lit string, err error) { + // Create a buffer and read the current character into it. + ch := s.read() + if !isLetter(ch) { + return ILLEGAL, "", fmt.Errorf("fail to scan operator: expected letter but found %c", ch) + } + var buf bytes.Buffer + + buf.WriteRune(ch) + + // Read every subsequent operator character into the buffer. + // Non-operator characters and EOF will cause the loop to exit. + for { + if ch := s.read(); ch == eof { + break + } else if !isLetter(ch) { + s.unread() + break + } else { + _, _ = buf.WriteRune(ch) + } + } + + // If the string matches a operator then return that operator. + // Otherwise return an error. + lit = buf.String() + switch strings.ToUpper(lit) { + case "AND": + tok = AND + case "OR": + tok = OR + case "NOT": + tok = NOT + default: + return ILLEGAL, "", fmt.Errorf("failed to scan operator: unexpected operator '%s' found", lit) + } + + return +} + +// scanTag scans the tag and scape needed characters +// If a invalid scape is used an error will be returned and if EOF is found +// before a '"' returns an error as well. +func (s *Scanner) scanTag() (tok Token, lit string, err error) { + ch := s.read() + if ch != '"' { + return ILLEGAL, "", fmt.Errorf("fail to scan tag: expected \" but found %c", ch) + } + var buf bytes.Buffer + +Loop: + for { + ch := s.read() + switch ch { + case eof: + return ILLEGAL, "", fmt.Errorf("fail to scan tag: expected ':' but found EOF") + case '\\': + scapedCh := s.read() + switch scapedCh { + case '\\', '"', ':': + _, _ = buf.WriteRune(scapedCh) + default: + return ILLEGAL, "", fmt.Errorf("fail to scan tag: invalid escaped char %c", scapedCh) + } + case ':': + s.unread() + fallthrough + case '"': + break Loop + default: + _, _ = buf.WriteRune(ch) + } + } + lit = strings.Trim(buf.String(), " ") + tok = TAG + return +} + +// scanFieldPath scans the tag and scape needed characters +// If a invalid scape is used an error will be returned and if EOF is found +// before a '"' returns an error as well. +func (s *Scanner) scanFieldPath() (tok Token, lit string, err error) { + ch := s.read() + if ch != ':' { + return ILLEGAL, "", fmt.Errorf("fail to scan field: expected ':' but found %c", ch) + } + var buf bytes.Buffer +Loop: + for { + ch := s.read() + switch ch { + case eof: + return ILLEGAL, "", fmt.Errorf("fail to scan field: expected '\"' but found EOF") + case '\\': + scapedCh := s.read() + switch scapedCh { + case '\\', '"': + _, _ = buf.WriteRune(scapedCh) + default: + return ILLEGAL, "", fmt.Errorf("fail to scan field: invalid escaped char %c", scapedCh) + } + case '"': + break Loop + default: + _, _ = buf.WriteRune(ch) + } + } + lit = strings.Trim(buf.String(), " ") + tok = FIELD_PATH + return +} + +// read reads the next rune from the buffered reader. +// Returns the rune(0) if an error occurs (or io.EOF is returned). +func (s *Scanner) read() rune { + ch, _, err := s.r.ReadRune() + if err != nil { + return eof + } + return ch +} + +// unread places the previously read rune back on the reader. +func (s *Scanner) unread() { _ = s.r.UnreadRune() } + +// isWhitespace returns true if the rune is a space, tab, or newline. +func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' } + +// isLetter returns true if the rune is a letter. +func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') } + +// eof represents a marker rune for the end of the reader. +var eof = rune(0) diff --git a/group/dsl/scanner_test.go b/group/dsl/scanner_test.go new file mode 100644 index 0000000..b5ac0dd --- /dev/null +++ b/group/dsl/scanner_test.go @@ -0,0 +1,131 @@ +package dsl + +import ( + "fmt" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +type expectedAtScan struct { + Tok Token + Lit string + Err error +} + +func TestScanner(t *testing.T) { + assert := assert.New(t) + tests := []struct { + expStr string + expected []expectedAtScan + message string + }{ + { + expStr: `and or not "tag1:somepath.to.field" ( )`, + expected: []expectedAtScan{ + {Tok: AND, Lit: "and", Err: nil}, + {Tok: WS, Lit: " ", Err: nil}, + {Tok: OR, Lit: "or", Err: nil}, + {Tok: WS, Lit: " ", Err: nil}, + {Tok: NOT, Lit: "not", Err: nil}, + {Tok: WS, Lit: " ", Err: nil}, + {Tok: TAG, Lit: "tag1", Err: nil}, + {Tok: FIELD_PATH, Lit: "somepath.to.field", Err: nil}, + {Tok: WS, Lit: " ", Err: nil}, + {Tok: OPPAR, Lit: "(", Err: nil}, + {Tok: WS, Lit: " ", Err: nil}, + {Tok: CLPAR, Lit: ")", Err: nil}, + {Tok: EOF, Lit: "", Err: nil}, + }, + message: "all tokens", + }, + { + expStr: `invalidOne`, + expected: []expectedAtScan{ + { + Tok: ILLEGAL, + Lit: "", + Err: fmt.Errorf("failed to scan operator: unexpected operator 'invalidOne' found"), + }, + }, + message: "invalid operator token", + }, + { + expStr: `"invalidTag `, + expected: []expectedAtScan{ + { + Tok: ILLEGAL, + Lit: "", + Err: fmt.Errorf("fail to scan tag: expected ':' but found EOF"), + }, + }, + message: "invalid tag token", + }, + { + expStr: `"tag \: \" \\:path\"1\""`, + expected: []expectedAtScan{ + { + Tok: TAG, + Lit: "tag : \" \\", + Err: nil, + }, + { + Tok: FIELD_PATH, + Lit: "path\"1\"", + Err: nil, + }, + {Tok: EOF, Lit: "", Err: nil}, + }, + message: "valid scaped tag", + }, + { + expStr: `"tag \s"`, + expected: []expectedAtScan{ + { + Tok: ILLEGAL, + Lit: "", + Err: fmt.Errorf("fail to scan tag: invalid escaped char s"), + }, + {Tok: EOF, Lit: "", Err: nil}, + }, + message: "invalid scaped tag", + }, + + { + expStr: `123`, + expected: []expectedAtScan{ + { + Tok: ILLEGAL, + Lit: "", + Err: fmt.Errorf("illegal char was found 1"), + }, + }, + message: "invalid operator", + }, + } + + for _, tc := range tests { + scanner := NewScanner(strings.NewReader(tc.expStr)) + count := 0 + for { + tok, lit, err := scanner.Scan() + if count >= len(tc.expected) { + t.Fail() + break + } + expected := tc.expected[count] + assert.Equal(expected.Err, err, tc.message) + assert.Equal(expected.Tok, tok, tc.message) + assert.Equal(expected.Lit, lit, tc.message) + if err != nil { + break + } + + count++ + if tok == EOF { + break + } + } + } +} diff --git a/group/finder/BUILD.bazel b/group/finder/BUILD.bazel new file mode 100644 index 0000000..b1e679e --- /dev/null +++ b/group/finder/BUILD.bazel @@ -0,0 +1,29 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "finder", + srcs = [ + "finder.go", + "internal.go", + ], + importpath = "github.com/pedroegsilva/gofindthem/group/finder", + visibility = ["//visibility:public"], + deps = [ + "//finder", + "//group/dsl", + ], +) + +go_test( + name = "finder_test", + srcs = [ + "finder_test.go", + "internal_test.go", + ], + embed = [":finder"], + deps = [ + "//finder", + "//group/dsl", + "@com_github_stretchr_testify//assert", + ], +) diff --git a/group/finder/finder.go b/group/finder/finder.go new file mode 100644 index 0000000..9df6c51 --- /dev/null +++ b/group/finder/finder.go @@ -0,0 +1,192 @@ +package finder + +import ( + "encoding/json" + "strings" + + "github.com/pedroegsilva/gofindthem/finder" + "github.com/pedroegsilva/gofindthem/group/dsl" +) + +// GroupFinder stores all values needed for the rules +type GroupFinder struct { + findthem *finder.Finder + expressionWrapperByExprName map[string][]ExpressionWrapper + fields map[string]struct{} + tags map[string]struct{} +} + +// ExpressionWrapper store the parsed expression and the raw expressions +type ExpressionWrapper struct { + ExpressionString string + Expression *dsl.Expression +} + +// NewFinder returns initialized instancy of GroupFinder. +func NewFinder(findthem *finder.Finder) *GroupFinder { + return &GroupFinder{ + findthem: findthem, + expressionWrapperByExprName: make(map[string][]ExpressionWrapper), + fields: make(map[string]struct{}), + tags: make(map[string]struct{}), + } +} + +// NewFinderWithRules returns initialized instancy of GroupFinder with the given rules. +func NewFinderWithRules(findthem *finder.Finder, rulesByName map[string][]string) (rules *GroupFinder, err error) { + rules = NewFinder(findthem) + err = rules.AddRules(rulesByName) + return +} + +// AddRule adds the given expressions with the rule name to the tagger. +func (rf *GroupFinder) AddRule(ruleName string, expressions []string) error { + for _, rawExpr := range expressions { + p := dsl.NewParser(strings.NewReader(rawExpr)) + exp, err := p.Parse() + if err != nil { + return err + } + expWrapper := ExpressionWrapper{ + ExpressionString: rawExpr, + Expression: exp, + } + rf.expressionWrapperByExprName[ruleName] = append(rf.expressionWrapperByExprName[ruleName], expWrapper) + for _, tag := range p.GetTags() { + rf.tags[tag] = struct{}{} + } + for _, field := range p.GetFields() { + rf.fields[field] = struct{}{} + } + } + return nil +} + +// AddRules adds the given expressions with the rule names (key of the map) to the tagger. +func (rf *GroupFinder) AddRules(rulesByName map[string][]string) error { + for key, exprs := range rulesByName { + err := rf.AddRule(key, exprs) + if err != nil { + return err + } + } + return nil +} + +// GetFieldNames returns all the unique fields that can be found on all the expressions. +func (rf *GroupFinder) GetFieldNames() (fields []string) { + for field := range rf.fields { + fields = append(fields, field) + } + return +} + +// TagJson tags the fields of a data of type json. Warning at the moment golang json unmarshal, +// when provided a interface{} as the target object, consider all numbers as float64. +// So use the FloatTagger instead of IntTagger for tagging numbers. +func (rf *GroupFinder) TagJson( + data string, + includePaths []string, + excludePaths []string, +) (matchedExpByFieldByTag map[string]map[string]map[string]struct{}, err error) { + var genericObj interface{} + err = json.Unmarshal([]byte(data), &genericObj) + if err != nil { + return + } + + return rf.TagObject(genericObj, includePaths, excludePaths) +} + +// TagObject tags the fields of a data of type interface. +func (rf *GroupFinder) TagObject( + data interface{}, + includePaths []string, + excludePaths []string, +) (matchedExpByFieldByTag map[string]map[string]map[string]struct{}, err error) { + matchedExpByFieldByTag = make(map[string]map[string]map[string]struct{}) + err = rf.getRulesInfo(data, "", includePaths, excludePaths, matchedExpByFieldByTag) + return +} + +// TagText tags the fields of a string. +func (rf *GroupFinder) TagText( + data string, +) (matchedExpByTag map[string][]string, err error) { + matchedExpByTag = make(map[string][]string) + matchedExpByFieldByTag, err := rf.TagObject(data, nil, nil) + if err != nil { + return + } + + for tag, fields := range matchedExpByFieldByTag { + for exp := range fields[""] { + matchedExpByTag[tag] = append(matchedExpByTag[tag], exp) + } + } + return +} + +// EvaluateRules evaluate all rules with the given fields by tag. +func (rf *GroupFinder) EvaluateRules( + matchedExpByFieldByTag map[string]map[string]map[string]struct{}, +) (expressionsByRule map[string][]string, err error) { + expressionsByRule = make(map[string][]string) + for name, exprWrappers := range rf.expressionWrapperByExprName { + for _, ew := range exprWrappers { + eval, err := ew.Expression.Solve(matchedExpByFieldByTag) + if err != nil { + return nil, err + } + if eval { + expressionsByRule[name] = append(expressionsByRule[name], ew.ExpressionString) + } + } + } + return +} + +// ProcessJson extract all tags and evaluate all rules for the given data of type json. +// includePaths can be used to specify what fields will be used on the tagging, and +// excludePaths can be used to specify what fields will be skipped on the tagging +// use an empty array or nil to tag all fields. +func (rf *GroupFinder) ProcessJson( + rawJson string, + includePaths []string, + excludePaths []string, +) (expressionsByRule map[string][]string, err error) { + matchedExpByFieldByTag, err := rf.TagJson(rawJson, includePaths, excludePaths) + if err != nil { + return nil, err + } + + return rf.EvaluateRules(matchedExpByFieldByTag) +} + +// ProcessObject extract all tags and evaluate all rules for the given data of type interface. +// includePaths can be used to specify what fields will be used on the tagging, and +// excludePaths can be used to specify what fields will be skipped on the tagging +// use an empty array or nil to tag all fields. +func (rf *GroupFinder) ProcessObject( + obj interface{}, + includePaths []string, + excludePaths []string, +) (expressionsByRule map[string][]string, err error) { + matchedExpByFieldByTag, err := rf.TagObject(obj, includePaths, excludePaths) + if err != nil { + return nil, err + } + + return rf.EvaluateRules(matchedExpByFieldByTag) +} + +// ProcessText extract all tags and evaluate all rules for the given string. +func (rf *GroupFinder) ProcessText( + data string, +) (expressionsByRule map[string][]string, err error) { + matchedExpByFieldByTag, err := rf.TagObject(data, nil, nil) + if err != nil { + return nil, err + } + return rf.EvaluateRules(matchedExpByFieldByTag) +} diff --git a/group/finder/finder_test.go b/group/finder/finder_test.go new file mode 100644 index 0000000..b0d87df --- /dev/null +++ b/group/finder/finder_test.go @@ -0,0 +1,502 @@ +package finder + +import ( + "fmt" + "testing" + + gofindthem "github.com/pedroegsilva/gofindthem/finder" + "github.com/pedroegsilva/gofindthem/group/dsl" + "github.com/stretchr/testify/assert" +) + +func TestNewFinder(t *testing.T) { + gft := gofindthem.NewFinder(&gofindthem.CloudflareForkEngine{}, &gofindthem.EmptyRgxEngine{}, false) + assert := assert.New(t) + tests := []struct { + expectedFinder *GroupFinder + message string + }{ + { + expectedFinder: &GroupFinder{ + findthem: gft, + expressionWrapperByExprName: make(map[string][]ExpressionWrapper), + fields: make(map[string]struct{}), + tags: make(map[string]struct{}), + }, + message: "empty gftg", + }, + { + expectedFinder: &GroupFinder{ + findthem: gft, + expressionWrapperByExprName: make(map[string][]ExpressionWrapper), + fields: make(map[string]struct{}), + tags: make(map[string]struct{}), + }, + message: "gftgwith empty gftgs", + }, + } + + for _, tc := range tests { + gftg := NewFinder(gft) + assert.Equal(tc.expectedFinder, gftg, tc.message) + } +} + +func TestNewFinderWithRules(t *testing.T) { + gft := gofindthem.NewFinder(&gofindthem.CloudflareForkEngine{}, &gofindthem.EmptyRgxEngine{}, false) + assert := assert.New(t) + tests := []struct { + rulesByName map[string][]string + groupFinder *GroupFinder + expectedErr error + message string + }{ + { + rulesByName: map[string][]string{ + "rule1": { + `"tag1"`, + `"tag2:field1"`, + }, + "rule2": { + `"tag3:field2.field3"`, + `"tag4"`, + }, + }, + groupFinder: &GroupFinder{ + findthem: gft, + expressionWrapperByExprName: map[string][]ExpressionWrapper{ + "rule1": { + { + ExpressionString: `"tag1"`, + Expression: &dsl.Expression{ + Type: dsl.UNIT_EXPR, + Tag: dsl.TagInfo{Name: "tag1", FieldPath: ""}, + }, + }, + { + ExpressionString: `"tag2:field1"`, + Expression: &dsl.Expression{ + Type: dsl.UNIT_EXPR, + Tag: dsl.TagInfo{Name: "tag2", FieldPath: "field1"}, + }, + }, + }, + "rule2": { + { + ExpressionString: `"tag3:field2.field3"`, + Expression: &dsl.Expression{ + Type: dsl.UNIT_EXPR, + Tag: dsl.TagInfo{Name: "tag3", FieldPath: "field2.field3"}, + }, + }, + { + ExpressionString: `"tag4"`, + Expression: &dsl.Expression{ + Type: dsl.UNIT_EXPR, + Tag: dsl.TagInfo{Name: "tag4", FieldPath: ""}, + }, + }, + }, + }, + fields: map[string]struct{}{ + "field1": {}, + "field2.field3": {}, + }, + tags: map[string]struct{}{ + "tag1": {}, + "tag2": {}, + "tag3": {}, + "tag4": {}, + }, + }, + expectedErr: nil, + message: "new gftgwith valid rules", + }, + { + rulesByName: map[string][]string{ + "rule1": {`"tag1`}, + }, + groupFinder: &GroupFinder{ + findthem: gft, + expressionWrapperByExprName: map[string][]ExpressionWrapper{}, + fields: map[string]struct{}{}, + tags: map[string]struct{}{}, + }, + expectedErr: fmt.Errorf("fail to scan tag: expected ':' but found EOF"), + message: "new gftgwith invalid rules", + }, + } + + for _, tc := range tests { + gftg, err := NewFinderWithRules(gft, tc.rulesByName) + assert.Equal(tc.expectedErr, err, tc.message) + assert.Equal(tc.groupFinder, gftg, tc.message) + } +} + +func TestAddRule(t *testing.T) { + assert := assert.New(t) + gft := gofindthem.NewFinder(&gofindthem.CloudflareForkEngine{}, &gofindthem.EmptyRgxEngine{}, false) + tests := []struct { + ruleName string + expressions []string + groupFinder *GroupFinder + expectedErr error + message string + }{ + { + ruleName: "rule1", + expressions: []string{ + `"tag1"`, + `"tag2:field1"`, + }, + groupFinder: &GroupFinder{ + findthem: gft, + expressionWrapperByExprName: map[string][]ExpressionWrapper{ + "rule1": { + { + ExpressionString: `"tag1"`, + Expression: &dsl.Expression{ + Type: dsl.UNIT_EXPR, + Tag: dsl.TagInfo{Name: "tag1", FieldPath: ""}, + }, + }, + { + ExpressionString: `"tag2:field1"`, + Expression: &dsl.Expression{ + Type: dsl.UNIT_EXPR, + Tag: dsl.TagInfo{Name: "tag2", FieldPath: "field1"}, + }, + }, + }, + }, + fields: map[string]struct{}{ + "field1": {}, + }, + tags: map[string]struct{}{ + "tag1": {}, + "tag2": {}, + }, + }, + expectedErr: nil, + message: "add valid expressions", + }, + { + ruleName: "rule1", + expressions: []string{ + `"tag1`, + }, + groupFinder: &GroupFinder{ + findthem: gft, + expressionWrapperByExprName: map[string][]ExpressionWrapper{}, + fields: map[string]struct{}{}, + tags: map[string]struct{}{}, + }, + expectedErr: fmt.Errorf("fail to scan tag: expected ':' but found EOF"), + message: "add invalid expression", + }, + } + + for _, tc := range tests { + gftg := NewFinder(gft) + err := gftg.AddRule(tc.ruleName, tc.expressions) + assert.Equal(tc.expectedErr, err, tc.message) + assert.Equal(tc.groupFinder, gftg, tc.message) + } +} + +func TestAddRules(t *testing.T) { + assert := assert.New(t) + gft := gofindthem.NewFinder(&gofindthem.CloudflareForkEngine{}, &gofindthem.EmptyRgxEngine{}, false) + tests := []struct { + rulesByName map[string][]string + groupFinder *GroupFinder + expectedErr error + message string + }{ + { + rulesByName: map[string][]string{ + "rule1": { + `"tag1"`, + `"tag2:field1"`, + }, + "rule2": { + `"tag3:field2.field3"`, + `"tag4"`, + }, + }, + groupFinder: &GroupFinder{ + findthem: gft, + expressionWrapperByExprName: map[string][]ExpressionWrapper{ + "rule1": { + { + ExpressionString: `"tag1"`, + Expression: &dsl.Expression{ + Type: dsl.UNIT_EXPR, + Tag: dsl.TagInfo{Name: "tag1", FieldPath: ""}, + }, + }, + { + ExpressionString: `"tag2:field1"`, + Expression: &dsl.Expression{ + Type: dsl.UNIT_EXPR, + Tag: dsl.TagInfo{Name: "tag2", FieldPath: "field1"}, + }, + }, + }, + "rule2": { + { + ExpressionString: `"tag3:field2.field3"`, + Expression: &dsl.Expression{ + Type: dsl.UNIT_EXPR, + Tag: dsl.TagInfo{Name: "tag3", FieldPath: "field2.field3"}, + }, + }, + { + ExpressionString: `"tag4"`, + Expression: &dsl.Expression{ + Type: dsl.UNIT_EXPR, + Tag: dsl.TagInfo{Name: "tag4", FieldPath: ""}, + }, + }, + }, + }, + fields: map[string]struct{}{ + "field1": {}, + "field2.field3": {}, + }, + tags: map[string]struct{}{ + "tag1": {}, + "tag2": {}, + "tag3": {}, + "tag4": {}, + }, + }, + message: "add rules with valid rules", + }, + { + rulesByName: map[string][]string{ + "rule1": {`"tag1`}, + }, + groupFinder: &GroupFinder{ + findthem: gft, + expressionWrapperByExprName: map[string][]ExpressionWrapper{}, + fields: map[string]struct{}{}, + tags: map[string]struct{}{}, + }, + expectedErr: fmt.Errorf("fail to scan tag: expected ':' but found EOF"), + message: "add rules with invalid rule", + }, + } + + for _, tc := range tests { + gftg := NewFinder(gft) + err := gftg.AddRules(tc.rulesByName) + assert.Equal(tc.expectedErr, err, tc.message) + assert.Equal(tc.groupFinder, gftg, tc.message) + } +} + +func TestTagJson(t *testing.T) { + assert := assert.New(t) + gft := gofindthem.NewFinder(&gofindthem.CloudflareForkEngine{}, &gofindthem.EmptyRgxEngine{}, false) + tests := []struct { + rawJsonStr string + matchedExpByFieldByTag map[string]map[string]map[string]struct{} + expectedErr error + message string + }{ + { + rawJsonStr: `{"strField": "some string", "intField": 42, "floatField": 42.42}`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "strTag": {"strField": nil}, + }, + expectedErr: nil, + message: "tag json", + }, + } + + for _, tc := range tests { + gftg := NewFinder(gft) + res, err := gftg.TagJson(tc.rawJsonStr, nil, nil) + assert.Equal(tc.expectedErr, err, tc.message+" expected error") + for _, resFileds := range res { + for _, fields := range tc.matchedExpByFieldByTag { + assert.Equal(resFileds, fields, tc.message+" expected field element") + } + } + } + +} + +func TestTagObject(t *testing.T) { + assert := assert.New(t) + gft := gofindthem.NewFinder(&gofindthem.CloudflareForkEngine{}, &gofindthem.EmptyRgxEngine{}, false) + gft.AddExpressionWithTag(`"string"`, "strTag") + tests := []struct { + object interface{} + matchedExpByFieldByTag map[string]map[string]map[string]struct{} + expectedErr error + message string + }{ + { + object: `some random string`, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "strTag": {"": {`"string"`: struct{}{}}}, + }, + expectedErr: nil, + message: "tag object raw string", + }, + { + object: []string{`some random string`, `some random string`}, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "strTag": { + "index(0)": {`"string"`: struct{}{}}, + "index(1)": {`"string"`: struct{}{}}, + }, + }, + expectedErr: nil, + message: "tag object array of string", + }, + { + object: struct { + StrField string + StrArray []string + AnotherObj struct { + Field1 int + Field2 float32 + internalField int + } + internalStr string + internalArr []string + internalObj struct { + Field3 float64 + } + }{ + StrField: "some random string", + StrArray: []string{ + "some random string 1", + "some random string 2", + }, + AnotherObj: struct { + Field1 int + Field2 float32 + internalField int + }{ + Field1: 42, + Field2: 42.42, + internalField: 0, + }, + internalStr: "some internal value", + internalArr: []string{"some internal value 0"}, + internalObj: struct { + Field3 float64 + }{ + Field3: 0.0, + }, + }, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "strTag": { + "StrField": {`"string"`: struct{}{}}, + "StrArray.index(0)": {`"string"`: struct{}{}}, + "StrArray.index(1)": {`"string"`: struct{}{}}, + }, + }, + expectedErr: nil, + message: "tag object struct with internal fields", + }, + } + + rules := map[string][]string{ + "test": {`"strTag"`}, + } + gftg, _ := NewFinderWithRules(gft, rules) + for _, tc := range tests { + res, err := gftg.TagObject(tc.object, nil, nil) + assert.Equal(tc.expectedErr, err, tc.message+" expected error") + assert.Equal(tc.matchedExpByFieldByTag, res, tc.message+" expected equal elements found") + } + +} + +func TestTagText(t *testing.T) { + assert := assert.New(t) + gft := gofindthem.NewFinder(&gofindthem.CloudflareForkEngine{}, &gofindthem.EmptyRgxEngine{}, false) + gft.AddExpressionWithTag(`"string"`, "strTag") + tests := []struct { + text string + matchedExpByTag map[string][]string + expectedErr error + message string + }{ + { + text: `some random string`, + matchedExpByTag: map[string][]string{ + "strTag": {`"string"`}, + }, + expectedErr: nil, + message: "tag text", + }, + } + for _, tc := range tests { + gftg := NewFinder(gft) + res, err := gftg.TagText(tc.text) + assert.Equal(tc.expectedErr, err, tc.message+" expected error") + assert.Equal(tc.matchedExpByTag, res, tc.message+" result") + } +} + +func TestEvaluateRules(t *testing.T) { + assert := assert.New(t) + gft := gofindthem.NewFinder(&gofindthem.CloudflareForkEngine{}, &gofindthem.EmptyRgxEngine{}, false) + tests := []struct { + rulesByName map[string][]string + matchedExpByFieldByTag map[string]map[string]map[string]struct{} + expectedExpressionsByRule map[string][]string + expectedErr error + message string + }{ + { + rulesByName: map[string][]string{ + "rule1": { + `"tag1" and "tag2"`, + }, + "rule2": { + `"tag3:field1" and "tag4:field2.innerfield1"`, + }, + "rule3": { + `"tag5:field3"`, + }, + "unmatched rule 1": { + `"tag4:field1"`, + }, + }, + matchedExpByFieldByTag: map[string]map[string]map[string]struct{}{ + "tag1": {"randomFiled": nil}, + "tag2": {"randomFiled2": nil}, + "tag3": {"field1": nil}, + "tag4": {"field2.innerfield1": nil}, + "tag5": {"field3.innerfield2": nil}, + }, + expectedExpressionsByRule: map[string][]string{ + "rule1": { + `"tag1" and "tag2"`, + }, + "rule2": { + `"tag3:field1" and "tag4:field2.innerfield1"`, + }, + "rule3": { + `"tag5:field3"`, + }, + }, + expectedErr: nil, + message: "evaluate rules", + }, + } + for _, tc := range tests { + gftg, _ := NewFinderWithRules(gft, tc.rulesByName) + extractorInfoByTaggerName, err := gftg.EvaluateRules(tc.matchedExpByFieldByTag) + assert.Equal(tc.expectedErr, err, tc.message+" expected error") + assert.Equal(tc.expectedExpressionsByRule, extractorInfoByTaggerName, tc.message+" result") + } +} diff --git a/group/finder/internal.go b/group/finder/internal.go new file mode 100644 index 0000000..4da5d30 --- /dev/null +++ b/group/finder/internal.go @@ -0,0 +1,119 @@ +package finder + +import ( + "fmt" + "reflect" + "strings" +) + +func (rf *GroupFinder) getRulesInfo( + data interface{}, + fieldName string, + includePaths []string, + excludePaths []string, + matchedExpByFieldByByTag map[string]map[string]map[string]struct{}, +) (err error) { + t := reflect.TypeOf(data) + + val := reflect.ValueOf(data) + + switch val.Kind() { + case reflect.String: + if !isValidateFieldPath(fieldName, includePaths, excludePaths) { + return + } + expRes, err := rf.findthem.ProcessText(val.String()) + if err != nil { + return err + } + + for _, er := range expRes { + if _, ok := matchedExpByFieldByByTag[er.Tag]; !ok { + matchedExpByFieldByByTag[er.Tag] = make(map[string]map[string]struct{}) + } + if _, ok := matchedExpByFieldByByTag[er.Tag][fieldName]; !ok { + matchedExpByFieldByByTag[er.Tag][fieldName] = make(map[string]struct{}) + } + matchedExpByFieldByByTag[er.Tag][fieldName][er.ExpresionStr] = struct{}{} + } + + case reflect.Struct: + numField := t.NumField() + + for i := 0; i < numField; i++ { + structField := t.Field(i) + fn := structField.Name + if fieldName != "" { + fn = fieldName + "." + fn + } + if !val.Field(i).CanInterface() { + continue + } + err := rf.getRulesInfo(val.Field(i).Interface(), fn, includePaths, excludePaths, matchedExpByFieldByByTag) + if err != nil { + return err + } + } + + case reflect.Map: + iter := val.MapRange() + for iter.Next() { + k := iter.Key() + if k.Type().Kind() != reflect.String { + break + } + + v := iter.Value() + fn := k.String() + if fieldName != "" { + fn = fieldName + "." + fn + } + if !v.CanInterface() { + continue + } + err := rf.getRulesInfo(v.Interface(), fn, includePaths, excludePaths, matchedExpByFieldByByTag) + if err != nil { + return err + } + } + + case reflect.Array, reflect.Slice: + for i := 0; i < val.Len(); i++ { + fn := fmt.Sprintf("index(%d)", i) + if fieldName != "" { + fn = fieldName + "." + fn + } + if !val.Index(i).CanInterface() { + continue + } + err := rf.getRulesInfo(val.Index(i).Interface(), fn, includePaths, excludePaths, matchedExpByFieldByByTag) + if err != nil { + return err + } + } + } + + return +} + +// isValidateFieldPath returns true if the field path is valid for tagging +func isValidateFieldPath(fieldPath string, includePaths []string, excludePaths []string) bool { + if len(excludePaths) > 0 { + for _, excP := range excludePaths { + if strings.HasPrefix(fieldPath, excP) { + return false + } + } + } + + if len(includePaths) > 0 { + for _, incP := range includePaths { + if strings.HasPrefix(fieldPath, incP) { + return true + } + } + return false + } + + return true +} diff --git a/group/finder/internal_test.go b/group/finder/internal_test.go new file mode 100644 index 0000000..c69ba1d --- /dev/null +++ b/group/finder/internal_test.go @@ -0,0 +1,99 @@ +package finder + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_isValidateFieldPath(t *testing.T) { + assert := assert.New(t) + + type args struct { + fieldPath string + includePaths []string + excludePaths []string + } + tests := []struct { + args args + expected bool + message string + }{ + { + args: args{ + fieldPath: "field1.inner1.inner2", + includePaths: []string{}, + excludePaths: []string{}, + }, + expected: true, + message: "empty includes and excludes", + }, + { + args: args{ + fieldPath: "field1.inner1.inner2", + includePaths: []string{"field1.inner1.inner2"}, + excludePaths: []string{}, + }, + expected: true, + message: "include exact match", + }, + { + args: args{ + fieldPath: "field1.inner1.inner2", + includePaths: []string{"field2"}, + excludePaths: []string{}, + }, + expected: false, + message: "include no match ", + }, + { + args: args{ + fieldPath: "field1.inner1.inner2", + includePaths: []string{}, + excludePaths: []string{"field1.inner1.inner2"}, + }, + expected: false, + message: "exclude exact match", + }, + { + args: args{ + fieldPath: "field1.inner1.inner2", + includePaths: []string{"field1.inner1.inner2"}, + excludePaths: []string{"field1.inner1.inner2"}, + }, + expected: false, + message: "exclude and include exact match", + }, + { + args: args{ + fieldPath: "field1.inner1.inner2", + includePaths: []string{"field1.inner1"}, + excludePaths: []string{}, + }, + expected: true, + message: "include partial match", + }, + { + args: args{ + fieldPath: "field1.inner1.inner2", + includePaths: []string{}, + excludePaths: []string{"field1.inner1"}, + }, + expected: false, + message: "exclude partial match", + }, + { + args: args{ + fieldPath: "field1.inner1.inner2", + includePaths: []string{"field1.inner1"}, + excludePaths: []string{"field1.inner1"}, + }, + expected: false, + message: "exclude and include partial match", + }, + } + for _, tc := range tests { + res := isValidateFieldPath(tc.args.fieldPath, tc.args.includePaths, tc.args.excludePaths) + assert.Equal(tc.expected, res, tc.message) + } +} From d2bd555089da618296c808d719329b447d24dc4f Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Thu, 7 Jul 2022 15:34:34 -0300 Subject: [PATCH 2/4] updated readme --- group/README.md | 97 ++++++++++++++++++++++++++++++++++++++++++ group/finder/finder.go | 16 ++++--- 2 files changed, 107 insertions(+), 6 deletions(-) create mode 100644 group/README.md diff --git a/group/README.md b/group/README.md new file mode 100644 index 0000000..3cf9d39 --- /dev/null +++ b/group/README.md @@ -0,0 +1,97 @@ +# Group Finder +Group finder is a package that adds another DSL to improve the maintainability +of the searched patterns and enables searches on specific fields of structured documents. + +## Finder +The finder is used to manage multiple rules. It will use the DSL along with the gofindthem finder +to verify if the tags where found on the specified fields. + +### Usage +You will need 2 sets of expressions, one to define the patterns that are needed to +be searched with its tag and the second one with the expressions to define the tags + and field relations. + +```golang + gofindthemRules := map[string][]string{ + "tag1": { + `"string1"`, + `"string2"`, + }, + "tag2": { + `"string3"`, + `"string4"`, + }, + "tag3": { + `"string5"`, + `"string6"`, + }, + "tag4": { + `"string7"`, + `"string8"`, + }, + } + + rules := map[string][]string{ + "rule1": {`"tag1" or "tag2" and not "tag3"`}, + "rule2": {`"tag3:Field3.SomeField1" or "tag4"`}, + "rule3": {`"tag3:Field3" or "tag4"`}, + } +``` + +With the 2 sets of expressions ready you will first need to create the +gofinthem finder and the group finder: + +```golang + gft, err := finder.NewFinderWithExpressions( + &finder.CloudflareForkEngine{}, + &finder.RegexpEngine{}, + false, + gofindthemRules, + ) + + gftg, err := gfinder.NewFinderWithRules(gft, rules) + if err != nil { + panic(err) + } +``` + +Now its possible check which rules where evaluated as true on a text +or on a structured document: + +```golang + // searching on a struct + res, err := gftg.ProcessObject(someObject, gftg.GetFieldNames(), nil) + if err != nil { + panic(err) + } + fmt.Println("ProcessObject: ", res) + + // searching on a raw json + res3, err := gftg.ProcessJson(rawJson, gftg.GetFieldNames(), nil) + if err != nil { + panic(err) + } + fmt.Println("ProcessJson: ", res3) +``` +The full example can be found at `/examples/group/finder/main.go` + +## Group Finder DSL +### Definition +The DSL uses 3 operators (AND, OR, NOT), Tag (defined by "tag:(field)"), +where the field is optional, and parentheses to form expressions. +A valid expression can be: + +- A single rule with or without a specific field. Eg: `"tag1"` `"tag1:field1"` +- The result of an operation. `"tag1" OR "tag2:field1"` +- An expression enclosed by parentheses `("tag1" OR "tag2:field1")` + +Each operator functions as the following: + +- **AND** - Uses the expression before and after it to solve them as a logical `AND` operator. + > (valid expression) AND (valid expression) eg: `"term 1" AND "term 2"` + +- **OR** - Uses the expression before and after it to solve them as a logical `OR` operator. + > \ OR \ eg: `"term 1" OR "term 2"` + +- **NOT** - Uses the expression after it to solve them as a logical `NOT` operator. + > NOT \ eg: `NOT "term 1"` diff --git a/group/finder/finder.go b/group/finder/finder.go index 9df6c51..3348c31 100644 --- a/group/finder/finder.go +++ b/group/finder/finder.go @@ -147,9 +147,11 @@ func (rf *GroupFinder) EvaluateRules( } // ProcessJson extract all tags and evaluate all rules for the given data of type json. -// includePaths can be used to specify what fields will be used on the tagging, and -// excludePaths can be used to specify what fields will be skipped on the tagging -// use an empty array or nil to tag all fields. +// includePaths can be used to specify what fields will be used on the tagging. +// if empty array or nil is passed to 'includePaths' it will consider all fields as taggable. +// excludePaths can be used to specify what fields will be skipped on the tagging, if +// there is a conflict on a specific field the excludePath has precedence over the include paths. +// Empty array or nil can be used to not exclude any fields func (rf *GroupFinder) ProcessJson( rawJson string, includePaths []string, @@ -164,9 +166,11 @@ func (rf *GroupFinder) ProcessJson( } // ProcessObject extract all tags and evaluate all rules for the given data of type interface. -// includePaths can be used to specify what fields will be used on the tagging, and -// excludePaths can be used to specify what fields will be skipped on the tagging -// use an empty array or nil to tag all fields. +// includePaths can be used to specify what fields will be used on the tagging. +// if empty array or nil is passed to 'includePaths' it will consider all fields as taggable. +// excludePaths can be used to specify what fields will be skipped on the tagging, if +// there is a conflict on a specific field the excludePath has precedence over the include paths. +// Empty array or nil can be used to not exclude any fields func (rf *GroupFinder) ProcessObject( obj interface{}, includePaths []string, From d3d009bfba17565b0ea4835ff19589401342abbf Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Thu, 7 Jul 2022 21:12:08 -0300 Subject: [PATCH 3/4] added information of group finder to the main README --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2b46509..77a4011 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ from which is heavily influenced by the [InfluxQL parser](https://github.com/inf ## Usage/Examples -There are 2 libraries on this repository, the DSL and the Finder. +There are 3 libraries/packages on this repository, the DSL, the Finder and the GroupFinder. ### Finder The finder is used to manage multiple expressions. It will use the DSL to extract the terms and regex from each expression and use them to process the text with the appropriate engine. @@ -80,6 +80,15 @@ And finally you can check which expressions were match on each text. The full example can be found at `/examples/finder/main.go` +### GroupFinder +The Group finder is a package that adds another DSL to improve the maintainability +of the searched patterns and enables searches on specific fields of structured documents. +It allows the configuration to be split into 2 categories(rules and tags) so that the tags +can be used by multiple rules. The Rules also enables to check if a given tag was found on +a specific field for a structured document. +You can find more about the usage of the Group Finder at its [README](https://github.com/pedroegsilva/gofindthem/tree/main/group) + + ### DSL #### Definition The DSL uses 5 operators (AND, OR, NOT, R, INORD), terms (defined by "") and parentheses to form expressions. A valid expression can be: From 225261562354484c54da6f9810ce81fec50870c8 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Thu, 7 Jul 2022 21:25:15 -0300 Subject: [PATCH 4/4] minor fixes on documentation --- group/README.md | 6 +++--- group/dsl/expression.go | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/group/README.md b/group/README.md index 3cf9d39..8836308 100644 --- a/group/README.md +++ b/group/README.md @@ -88,10 +88,10 @@ A valid expression can be: Each operator functions as the following: - **AND** - Uses the expression before and after it to solve them as a logical `AND` operator. - > (valid expression) AND (valid expression) eg: `"term 1" AND "term 2"` + > (valid expression) AND (valid expression) eg: `"tag1" AND "tag2"` - **OR** - Uses the expression before and after it to solve them as a logical `OR` operator. - > \ OR \ eg: `"term 1" OR "term 2"` + > \ OR \ eg: `"tag1" OR "tag2"` - **NOT** - Uses the expression after it to solve them as a logical `NOT` operator. - > NOT \ eg: `NOT "term 1"` + > NOT \ eg: `NOT "tag1"` diff --git a/group/dsl/expression.go b/group/dsl/expression.go index 72436ce..026fe93 100644 --- a/group/dsl/expression.go +++ b/group/dsl/expression.go @@ -54,8 +54,8 @@ func (exp *Expression) GetTypeName() string { return exp.Type.GetName() } -// Solve solves the expresion using the ginven values of fieldPathByTag. -// fieldPathByTag will hold the values of all tags that were found with a +// Solve solves the expression using the given values of matchedExpByFieldByTag. +// matchedExpByFieldByTag will hold the values of all tags that were found with a // list of field paths that the tag was found func (exp *Expression) Solve( matchedExpByFieldByTag map[string]map[string]map[string]struct{}, @@ -73,7 +73,7 @@ func (exp *Expression) solve(matchedExpByFieldByTag map[string]map[string]map[st return true, nil } - for fieldPath, _ := range fieldPaths { + for fieldPath := range fieldPaths { if strings.HasPrefix(fieldPath, exp.Tag.FieldPath) { return true, nil } @@ -124,7 +124,7 @@ func (exp *Expression) solve(matchedExpByFieldByTag map[string]map[string]map[st } } -// PrettyFormat returns the expression formated on a tabbed structure +// PrettyFormat returns the expression formatted on a tabbed structure // Eg: for the expression ("a" and "b") or "c" // OR // AND