From c5678534e691e68181fc24b9ac938ec4189968fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Carlos=20Ch=C3=A1vez?= Date: Sun, 5 Nov 2023 18:25:39 +0100 Subject: [PATCH 1/3] chore: lazy load regexes to save memory. --- collection/collection.go | 4 +- internal/collections/concat.go | 5 +- internal/collections/concat_test.go | 3 +- internal/collections/map.go | 5 +- internal/collections/map_test.go | 3 +- internal/collections/named.go | 5 +- internal/collections/named_test.go | 3 +- internal/collections/sized.go | 5 +- internal/corazawaf/rule.go | 15 ++--- internal/corazawaf/transaction_test.go | 3 +- internal/corazawaf/waf.go | 5 +- internal/operators/restpath.go | 7 ++- internal/operators/rx.go | 7 ++- internal/operators/rx_test.go | 3 +- internal/operators/validate_nid.go | 7 ++- internal/regexp/regex.go | 82 ++++++++++++++++++++++++++ internal/regexp/regex_test.go | 44 ++++++++++++++ internal/seclang/directives.go | 5 +- internal/seclang/rules_test.go | 3 +- internal/variables/generator/main.go | 3 +- 20 files changed, 180 insertions(+), 37 deletions(-) create mode 100644 internal/regexp/regex.go create mode 100644 internal/regexp/regex_test.go diff --git a/collection/collection.go b/collection/collection.go index 668608966..8b00c5a0a 100644 --- a/collection/collection.go +++ b/collection/collection.go @@ -4,7 +4,7 @@ package collection import ( - "regexp" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" ) @@ -37,7 +37,7 @@ type Keyed interface { Get(key string) []string // FindRegex returns a slice of MatchData for the regex - FindRegex(key *regexp.Regexp) []types.MatchData + FindRegex(key regexp.Regexp) []types.MatchData // FindString returns a slice of MatchData for the string FindString(key string) []types.MatchData diff --git a/internal/collections/concat.go b/internal/collections/concat.go index f54489e0d..390e81eae 100644 --- a/internal/collections/concat.go +++ b/internal/collections/concat.go @@ -4,9 +4,10 @@ package collections import ( - "regexp" "strings" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" "github.com/corazawaf/coraza/v3/types" @@ -67,7 +68,7 @@ func (c *ConcatKeyed) Get(key string) []string { } // FindRegex returns a slice of MatchData for the regex -func (c *ConcatKeyed) FindRegex(key *regexp.Regexp) []types.MatchData { +func (c *ConcatKeyed) FindRegex(key regexp.Regexp) []types.MatchData { var res []types.MatchData for _, d := range c.data { res = append(res, replaceVariable(c.variable, d.FindRegex(key))...) diff --git a/internal/collections/concat_test.go b/internal/collections/concat_test.go index 77761e4b7..efa437d10 100644 --- a/internal/collections/concat_test.go +++ b/internal/collections/concat_test.go @@ -4,10 +4,11 @@ package collections import ( - "regexp" "strings" "testing" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/map.go b/internal/collections/map.go index eca4dcd38..6eda3d8e8 100644 --- a/internal/collections/map.go +++ b/internal/collections/map.go @@ -4,9 +4,10 @@ package collections import ( - "regexp" "strings" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" "github.com/corazawaf/coraza/v3/types" @@ -40,7 +41,7 @@ func (c *Map) Get(key string) []string { return values } -func (c *Map) FindRegex(key *regexp.Regexp) []types.MatchData { +func (c *Map) FindRegex(key regexp.Regexp) []types.MatchData { var result []types.MatchData for k, data := range c.data { if key.MatchString(k) { diff --git a/internal/collections/map_test.go b/internal/collections/map_test.go index c73e09dd5..fb0a01bfd 100644 --- a/internal/collections/map_test.go +++ b/internal/collections/map_test.go @@ -15,9 +15,10 @@ package collections import ( "fmt" - "regexp" "testing" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/named.go b/internal/collections/named.go index 8cfb81240..6d4234c9a 100644 --- a/internal/collections/named.go +++ b/internal/collections/named.go @@ -5,9 +5,10 @@ package collections import ( "fmt" - "regexp" "strings" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" "github.com/corazawaf/coraza/v3/types" @@ -94,7 +95,7 @@ type NamedCollectionNames struct { collection *NamedCollection } -func (c *NamedCollectionNames) FindRegex(key *regexp.Regexp) []types.MatchData { +func (c *NamedCollectionNames) FindRegex(key regexp.Regexp) []types.MatchData { panic("selection operator not supported") } diff --git a/internal/collections/named_test.go b/internal/collections/named_test.go index 0a773b4d4..229a819bc 100644 --- a/internal/collections/named_test.go +++ b/internal/collections/named_test.go @@ -5,9 +5,10 @@ package collections import ( "fmt" - "regexp" "testing" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/sized.go b/internal/collections/sized.go index 146395ddc..81571951f 100644 --- a/internal/collections/sized.go +++ b/internal/collections/sized.go @@ -5,10 +5,11 @@ package collections import ( "fmt" - "regexp" "strconv" "strings" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" "github.com/corazawaf/coraza/v3/types" @@ -32,7 +33,7 @@ func NewSizeCollection(variable variables.RuleVariable, data ...*NamedCollection } // FindRegex returns a slice of MatchData for the regex -func (c *SizeCollection) FindRegex(*regexp.Regexp) []types.MatchData { +func (c *SizeCollection) FindRegex(regexp.Regexp) []types.MatchData { return c.FindAll() } diff --git a/internal/corazawaf/rule.go b/internal/corazawaf/rule.go index 14016b957..12f7ba3c8 100644 --- a/internal/corazawaf/rule.go +++ b/internal/corazawaf/rule.go @@ -6,12 +6,13 @@ package corazawaf import ( "fmt" "reflect" - "regexp" "strconv" "strings" "sync" "unsafe" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/experimental/plugins/macro" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazarules" @@ -50,7 +51,7 @@ type ruleVariableException struct { // The key for the variable that is going to be requested // If nil, KeyStr is going to be used - KeyRx *regexp.Regexp + KeyRx regexp.Regexp } // RuleVariable is compiled during runtime by transactions @@ -65,7 +66,7 @@ type ruleVariableParams struct { // The key for the variable that is going to be requested // If nil, KeyStr is going to be used - KeyRx *regexp.Regexp + KeyRx regexp.Regexp // The string key for the variable that is going to be requested // If KeyRx is not nil, KeyStr is ignored @@ -454,14 +455,14 @@ func (r *Rule) AddAction(name string, action plugintypes.Action) error { // it will be used to match the variable, in case of string it will // be a fixed match, in case of nil it will match everything func (r *Rule) AddVariable(v variables.RuleVariable, key string, iscount bool) error { - var re *regexp.Regexp + var re regexp.Regexp if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' { key = key[1 : len(key)-1] if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil { return err } else { - re = vare.(*regexp.Regexp) + re = vare.(regexp.Regexp) } } @@ -524,13 +525,13 @@ func (r *Rule) AddVariable(v variables.RuleVariable, key string, iscount bool) e // OK: SecRule !ARGS:id "..." // ERROR: SecRule !ARGS: "..." func (r *Rule) AddVariableNegation(v variables.RuleVariable, key string) error { - var re *regexp.Regexp + var re regexp.Regexp if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' { key = key[1 : len(key)-1] if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil { return err } else { - re = vare.(*regexp.Regexp) + re = vare.(regexp.Regexp) } } // Prevent sigsev diff --git a/internal/corazawaf/transaction_test.go b/internal/corazawaf/transaction_test.go index 938aef632..77c653e0e 100644 --- a/internal/corazawaf/transaction_test.go +++ b/internal/corazawaf/transaction_test.go @@ -7,12 +7,13 @@ import ( "bytes" "fmt" "io" - "regexp" "runtime/debug" "strconv" "strings" "testing" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/debuglog" "github.com/corazawaf/coraza/v3/experimental/plugins/macro" diff --git a/internal/corazawaf/waf.go b/internal/corazawaf/waf.go index 7af329a10..9968795e3 100644 --- a/internal/corazawaf/waf.go +++ b/internal/corazawaf/waf.go @@ -9,11 +9,12 @@ import ( "io" "io/fs" "os" - "regexp" "strconv" "strings" "time" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/debuglog" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/auditlog" @@ -119,7 +120,7 @@ type WAF struct { AuditLogParts types.AuditLogParts // Contains the regular expression for relevant status audit logging - AuditLogRelevantStatus *regexp.Regexp + AuditLogRelevantStatus regexp.Regexp auditLogWriter plugintypes.AuditLogWriter diff --git a/internal/operators/restpath.go b/internal/operators/restpath.go index f1e4a8911..2e194fcda 100644 --- a/internal/operators/restpath.go +++ b/internal/operators/restpath.go @@ -7,9 +7,10 @@ package operators import ( "fmt" - "regexp" "strings" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" ) @@ -21,7 +22,7 @@ var rePathTokenRe = regexp.MustCompile(`\{([^\}]+)\}`) // It will later transform the path to a regex and assign the variables to // ARGS_PATH type restpath struct { - re *regexp.Regexp + re regexp.Regexp } var _ plugintypes.Operator = (*restpath)(nil) @@ -36,7 +37,7 @@ func newRESTPath(options plugintypes.OperatorOptions) (plugintypes.Operator, err if err != nil { return nil, err } - return &restpath{re: re.(*regexp.Regexp)}, nil + return &restpath{re: re.(regexp.Regexp)}, nil } func (o *restpath) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/operators/rx.go b/internal/operators/rx.go index e801c9f72..0499bb84d 100644 --- a/internal/operators/rx.go +++ b/internal/operators/rx.go @@ -7,10 +7,11 @@ package operators import ( "fmt" - "regexp" "strconv" "unicode/utf8" + "github.com/corazawaf/coraza/v3/internal/regexp" + "rsc.io/binaryregexp" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" @@ -18,7 +19,7 @@ import ( ) type rx struct { - re *regexp.Regexp + re regexp.Regexp } var _ plugintypes.Operator = (*rx)(nil) @@ -40,7 +41,7 @@ func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { if err != nil { return nil, err } - return &rx{re: re.(*regexp.Regexp)}, nil + return &rx{re: re.(regexp.Regexp)}, nil } func (o *rx) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/operators/rx_test.go b/internal/operators/rx_test.go index e9785713b..be276edad 100644 --- a/internal/operators/rx_test.go +++ b/internal/operators/rx_test.go @@ -5,9 +5,10 @@ package operators import ( "fmt" - "regexp" "testing" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazawaf" ) diff --git a/internal/operators/validate_nid.go b/internal/operators/validate_nid.go index 383f160b6..53b130ef5 100644 --- a/internal/operators/validate_nid.go +++ b/internal/operators/validate_nid.go @@ -7,10 +7,11 @@ package operators import ( "fmt" - "regexp" "strconv" "strings" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" ) @@ -19,7 +20,7 @@ type validateNidFunction = func(input string) bool type validateNid struct { fn validateNidFunction - re *regexp.Regexp + re regexp.Regexp } var _ plugintypes.Operator = (*validateNid)(nil) @@ -46,7 +47,7 @@ func newValidateNID(options plugintypes.OperatorOptions) (plugintypes.Operator, return nil, err } - return &validateNid{fn: fn, re: re.(*regexp.Regexp)}, nil + return &validateNid{fn: fn, re: re.(regexp.Regexp)}, nil } func (o *validateNid) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/regexp/regex.go b/internal/regexp/regex.go new file mode 100644 index 000000000..5ec0d5f6c --- /dev/null +++ b/internal/regexp/regex.go @@ -0,0 +1,82 @@ +package regexp + +import ( + "regexp" + "sync" +) + +func MustCompile(str string) *regexp.Regexp { + return regexp.MustCompile(str) +} + +type Regexp interface { + MatchString(s string) bool + FindStringSubmatch(s string) []string + FindAllStringSubmatch(s string, n int) [][]string + SubexpNames() []string + Match(s []byte) bool + String() string +} + +type lazyRegexp struct { + expr string + re *regexp.Regexp + once sync.Once +} + +var _ Regexp = (*lazyRegexp)(nil) + +func (r *lazyRegexp) MatchString(s string) bool { + r.once.Do(func() { + r.re = regexp.MustCompile(r.expr) + }) + + return r.re.MatchString(s) +} + +func (r *lazyRegexp) FindStringSubmatch(s string) []string { + r.once.Do(func() { + r.re = regexp.MustCompile(r.expr) + }) + + return r.re.FindStringSubmatch(s) +} + +func (r *lazyRegexp) FindAllStringSubmatch(s string, n int) [][]string { + r.once.Do(func() { + r.re = regexp.MustCompile(r.expr) + }) + + return r.re.FindAllStringSubmatch(s, n) +} + +func (r *lazyRegexp) SubexpNames() []string { + r.once.Do(func() { + r.re = regexp.MustCompile(r.expr) + }) + + return r.re.SubexpNames() +} + +func (r *lazyRegexp) Match(b []byte) bool { + r.once.Do(func() { + r.re = regexp.MustCompile(r.expr) + }) + + return r.re.Match(b) +} + +func (r *lazyRegexp) String() string { + return r.expr +} + +func Compile(expr string) (Regexp, error) { + _, err := regexp.Compile(expr) + if err != nil { + return nil, err + } + + return &lazyRegexp{expr: expr}, nil +} + +var _ Regexp = (*regexp.Regexp)(nil) diff --git a/internal/regexp/regex_test.go b/internal/regexp/regex_test.go new file mode 100644 index 000000000..1df583203 --- /dev/null +++ b/internal/regexp/regex_test.go @@ -0,0 +1,44 @@ +package regexp + +import ( + "testing" +) + +func TestCompile(t *testing.T) { + _, err := Compile(`[]`) + if err == nil { + t.Fatalf("expected error") + } + + _, err = Compile("[a-z]+") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestMustCompile(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Errorf("MustCompile panicked with error: %v", r) + } + }() + MustCompile("[a-z]+") +} + +func TestCompileCompilesJustOnce(t *testing.T) { + re, _ := Compile("[a-z]+") + lre := re.(*lazyRegexp) + + if lre.re != nil { + t.Fatalf("expected nil") + } + + m := re.Match([]byte("abc")) + if !m { + t.Fatalf("expected match") + } + + if lre.re == nil { + t.Fatalf("unexpected nil") + } +} diff --git a/internal/seclang/directives.go b/internal/seclang/directives.go index 7a158c349..778515eff 100644 --- a/internal/seclang/directives.go +++ b/internal/seclang/directives.go @@ -9,10 +9,11 @@ import ( "errors" "fmt" "io/fs" - "regexp" "strconv" "strings" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/debuglog" "github.com/corazawaf/coraza/v3/internal/auditlog" "github.com/corazawaf/coraza/v3/internal/corazawaf" @@ -737,7 +738,7 @@ func directiveSecAuditLogRelevantStatus(options *DirectiveOptions) error { return err } - options.WAF.AuditLogRelevantStatus = re.(*regexp.Regexp) + options.WAF.AuditLogRelevantStatus = re.(regexp.Regexp) return nil } diff --git a/internal/seclang/rules_test.go b/internal/seclang/rules_test.go index b9b2b3693..44a29a81d 100644 --- a/internal/seclang/rules_test.go +++ b/internal/seclang/rules_test.go @@ -4,10 +4,11 @@ package seclang import ( - "regexp" "strings" "testing" + "github.com/corazawaf/coraza/v3/internal/regexp" + "github.com/corazawaf/coraza/v3/internal/corazawaf" "github.com/corazawaf/coraza/v3/types" ) diff --git a/internal/variables/generator/main.go b/internal/variables/generator/main.go index 07fb0d7b2..a33005b01 100644 --- a/internal/variables/generator/main.go +++ b/internal/variables/generator/main.go @@ -14,9 +14,10 @@ import ( "go/types" "log" "os" - "regexp" "strings" "text/template" + + "github.com/corazawaf/coraza/v3/internal/regexp" ) //go:embed variablesmap.go.tmpl From aedfc507f04db2d229579a50186e9ef6113e6b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Carlos=20Ch=C3=A1vez?= Date: Tue, 14 Nov 2023 13:13:52 +0100 Subject: [PATCH 2/3] feat: expose regex compiler mutation through experimental API. --- experimental/regexp.go | 24 +++++++++++++ internal/regexp/regex.go | 68 +++++------------------------------ internal/regexp/regex_test.go | 18 ---------- 3 files changed, 33 insertions(+), 77 deletions(-) create mode 100644 experimental/regexp.go diff --git a/experimental/regexp.go b/experimental/regexp.go new file mode 100644 index 000000000..f42ed774a --- /dev/null +++ b/experimental/regexp.go @@ -0,0 +1,24 @@ +package experimental + +import ( + "fmt" + + "github.com/corazawaf/coraza/v3/internal/regexp" +) + +// SetRegexpCompiler sets the regex compiler used by the WAF. This is specially +// useful when we want to lazily compile regexes in a mono thread environment as +// we don't need to synchronize the regex compilation. +func SetRegexpCompiler(fn func(expr string) (regexp.Regexp, error)) { + if fn == nil { + fmt.Println("invalid regex compiler") + return + } + + if regexp.RegexCompiler != nil { + fmt.Println("regex compiler already set") + return + } + + regexp.RegexCompiler = fn +} diff --git a/internal/regexp/regex.go b/internal/regexp/regex.go index 5ec0d5f6c..d928b84a0 100644 --- a/internal/regexp/regex.go +++ b/internal/regexp/regex.go @@ -2,9 +2,16 @@ package regexp import ( "regexp" - "sync" ) +var RegexCompiler func(expr string) (Regexp, error) + +func init() { + RegexCompiler = func(expr string) (Regexp, error) { + return regexp.Compile(expr) + } +} + func MustCompile(str string) *regexp.Regexp { return regexp.MustCompile(str) } @@ -18,65 +25,8 @@ type Regexp interface { String() string } -type lazyRegexp struct { - expr string - re *regexp.Regexp - once sync.Once -} - -var _ Regexp = (*lazyRegexp)(nil) - -func (r *lazyRegexp) MatchString(s string) bool { - r.once.Do(func() { - r.re = regexp.MustCompile(r.expr) - }) - - return r.re.MatchString(s) -} - -func (r *lazyRegexp) FindStringSubmatch(s string) []string { - r.once.Do(func() { - r.re = regexp.MustCompile(r.expr) - }) - - return r.re.FindStringSubmatch(s) -} - -func (r *lazyRegexp) FindAllStringSubmatch(s string, n int) [][]string { - r.once.Do(func() { - r.re = regexp.MustCompile(r.expr) - }) - - return r.re.FindAllStringSubmatch(s, n) -} - -func (r *lazyRegexp) SubexpNames() []string { - r.once.Do(func() { - r.re = regexp.MustCompile(r.expr) - }) - - return r.re.SubexpNames() -} - -func (r *lazyRegexp) Match(b []byte) bool { - r.once.Do(func() { - r.re = regexp.MustCompile(r.expr) - }) - - return r.re.Match(b) -} - -func (r *lazyRegexp) String() string { - return r.expr -} - func Compile(expr string) (Regexp, error) { - _, err := regexp.Compile(expr) - if err != nil { - return nil, err - } - - return &lazyRegexp{expr: expr}, nil + return RegexCompiler(expr) } var _ Regexp = (*regexp.Regexp)(nil) diff --git a/internal/regexp/regex_test.go b/internal/regexp/regex_test.go index 1df583203..abf53fe8b 100644 --- a/internal/regexp/regex_test.go +++ b/internal/regexp/regex_test.go @@ -24,21 +24,3 @@ func TestMustCompile(t *testing.T) { }() MustCompile("[a-z]+") } - -func TestCompileCompilesJustOnce(t *testing.T) { - re, _ := Compile("[a-z]+") - lre := re.(*lazyRegexp) - - if lre.re != nil { - t.Fatalf("expected nil") - } - - m := re.Match([]byte("abc")) - if !m { - t.Fatalf("expected match") - } - - if lre.re == nil { - t.Fatalf("unexpected nil") - } -} From 22ba28d4ae2a401a8f30aec584ceee918841e382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Carlos=20Ch=C3=A1vez?= Date: Tue, 14 Nov 2023 13:35:13 +0100 Subject: [PATCH 3/3] chore: improves API. --- collection/collection.go | 1 - experimental/{ => regexp}/regexp.go | 6 +++++- experimental/regexp/regexptypes/types.go | 19 +++++++++++++++++ internal/collections/concat.go | 3 +-- internal/collections/concat_test.go | 1 - internal/collections/map.go | 3 +-- internal/collections/map_test.go | 1 - internal/collections/named.go | 3 +-- internal/collections/named_test.go | 1 - internal/collections/sized.go | 3 +-- internal/corazawaf/rule.go | 3 +-- internal/corazawaf/transaction_test.go | 3 +-- internal/corazawaf/waf.go | 3 +-- internal/operators/restpath.go | 3 +-- internal/operators/rx.go | 3 +-- internal/operators/rx_test.go | 3 +-- internal/operators/validate_nid.go | 3 +-- internal/regexp/regex.go | 27 ++++++++++++------------ internal/regexp/regex_test.go | 3 +++ internal/seclang/directives.go | 3 +-- internal/seclang/rules_test.go | 3 +-- 21 files changed, 53 insertions(+), 45 deletions(-) rename experimental/{ => regexp}/regexp.go (66%) create mode 100644 experimental/regexp/regexptypes/types.go diff --git a/collection/collection.go b/collection/collection.go index 8b00c5a0a..5a3eb9d9a 100644 --- a/collection/collection.go +++ b/collection/collection.go @@ -5,7 +5,6 @@ package collection import ( "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/types" ) diff --git a/experimental/regexp.go b/experimental/regexp/regexp.go similarity index 66% rename from experimental/regexp.go rename to experimental/regexp/regexp.go index f42ed774a..02a5b69a2 100644 --- a/experimental/regexp.go +++ b/experimental/regexp/regexp.go @@ -1,15 +1,19 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + package experimental import ( "fmt" + "github.com/corazawaf/coraza/v3/experimental/regexp/regexptypes" "github.com/corazawaf/coraza/v3/internal/regexp" ) // SetRegexpCompiler sets the regex compiler used by the WAF. This is specially // useful when we want to lazily compile regexes in a mono thread environment as // we don't need to synchronize the regex compilation. -func SetRegexpCompiler(fn func(expr string) (regexp.Regexp, error)) { +func SetRegexpCompiler(fn func(expr string) (regexptypes.Regexp, error)) { if fn == nil { fmt.Println("invalid regex compiler") return diff --git a/experimental/regexp/regexptypes/types.go b/experimental/regexp/regexptypes/types.go new file mode 100644 index 000000000..34697387a --- /dev/null +++ b/experimental/regexp/regexptypes/types.go @@ -0,0 +1,19 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +package regexptypes + +import "regexp" + +// Regexp is the interface that wraps the basic MatchString, FindStringSubmatch, +// FindAllStringSubmatch, SubexpNames, Match and String methods. +type Regexp interface { + MatchString(s string) bool + FindStringSubmatch(s string) []string + FindAllStringSubmatch(s string, n int) [][]string + SubexpNames() []string + Match(s []byte) bool + String() string +} + +var _ Regexp = (*regexp.Regexp)(nil) diff --git a/internal/collections/concat.go b/internal/collections/concat.go index 390e81eae..2b235527f 100644 --- a/internal/collections/concat.go +++ b/internal/collections/concat.go @@ -6,10 +6,9 @@ package collections import ( "strings" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/concat_test.go b/internal/collections/concat_test.go index efa437d10..b19223d91 100644 --- a/internal/collections/concat_test.go +++ b/internal/collections/concat_test.go @@ -8,7 +8,6 @@ import ( "testing" "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/map.go b/internal/collections/map.go index 6eda3d8e8..104b0ca16 100644 --- a/internal/collections/map.go +++ b/internal/collections/map.go @@ -6,10 +6,9 @@ package collections import ( "strings" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/map_test.go b/internal/collections/map_test.go index fb0a01bfd..0e1f5e136 100644 --- a/internal/collections/map_test.go +++ b/internal/collections/map_test.go @@ -18,7 +18,6 @@ import ( "testing" "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/named.go b/internal/collections/named.go index 6d4234c9a..a49a8a252 100644 --- a/internal/collections/named.go +++ b/internal/collections/named.go @@ -7,10 +7,9 @@ import ( "fmt" "strings" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/named_test.go b/internal/collections/named_test.go index 229a819bc..572b630e3 100644 --- a/internal/collections/named_test.go +++ b/internal/collections/named_test.go @@ -8,7 +8,6 @@ import ( "testing" "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/sized.go b/internal/collections/sized.go index 81571951f..67eb3ec23 100644 --- a/internal/collections/sized.go +++ b/internal/collections/sized.go @@ -8,10 +8,9 @@ import ( "strconv" "strings" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/corazawaf/rule.go b/internal/corazawaf/rule.go index 12f7ba3c8..659792050 100644 --- a/internal/corazawaf/rule.go +++ b/internal/corazawaf/rule.go @@ -11,12 +11,11 @@ import ( "sync" "unsafe" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/experimental/plugins/macro" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazarules" "github.com/corazawaf/coraza/v3/internal/memoize" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/corazawaf/transaction_test.go b/internal/corazawaf/transaction_test.go index 77c653e0e..530036a97 100644 --- a/internal/corazawaf/transaction_test.go +++ b/internal/corazawaf/transaction_test.go @@ -12,14 +12,13 @@ import ( "strings" "testing" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/debuglog" "github.com/corazawaf/coraza/v3/experimental/plugins/macro" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/collections" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/regexp" utils "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" diff --git a/internal/corazawaf/waf.go b/internal/corazawaf/waf.go index 9968795e3..b2365132a 100644 --- a/internal/corazawaf/waf.go +++ b/internal/corazawaf/waf.go @@ -13,12 +13,11 @@ import ( "strings" "time" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/debuglog" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/auditlog" "github.com/corazawaf/coraza/v3/internal/environment" + "github.com/corazawaf/coraza/v3/internal/regexp" stringutils "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/internal/sync" "github.com/corazawaf/coraza/v3/types" diff --git a/internal/operators/restpath.go b/internal/operators/restpath.go index 2e194fcda..9d86e6704 100644 --- a/internal/operators/restpath.go +++ b/internal/operators/restpath.go @@ -9,10 +9,9 @@ import ( "fmt" "strings" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" + "github.com/corazawaf/coraza/v3/internal/regexp" ) var rePathTokenRe = regexp.MustCompile(`\{([^\}]+)\}`) diff --git a/internal/operators/rx.go b/internal/operators/rx.go index 0499bb84d..25db5dd90 100644 --- a/internal/operators/rx.go +++ b/internal/operators/rx.go @@ -10,12 +10,11 @@ import ( "strconv" "unicode/utf8" - "github.com/corazawaf/coraza/v3/internal/regexp" - "rsc.io/binaryregexp" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" + "github.com/corazawaf/coraza/v3/internal/regexp" ) type rx struct { diff --git a/internal/operators/rx_test.go b/internal/operators/rx_test.go index be276edad..ffb0288e8 100644 --- a/internal/operators/rx_test.go +++ b/internal/operators/rx_test.go @@ -7,10 +7,9 @@ import ( "fmt" "testing" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazawaf" + "github.com/corazawaf/coraza/v3/internal/regexp" ) func TestRx(t *testing.T) { diff --git a/internal/operators/validate_nid.go b/internal/operators/validate_nid.go index 53b130ef5..a587c033a 100644 --- a/internal/operators/validate_nid.go +++ b/internal/operators/validate_nid.go @@ -10,10 +10,9 @@ import ( "strconv" "strings" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" + "github.com/corazawaf/coraza/v3/internal/regexp" ) type validateNidFunction = func(input string) bool diff --git a/internal/regexp/regex.go b/internal/regexp/regex.go index d928b84a0..ee2987303 100644 --- a/internal/regexp/regex.go +++ b/internal/regexp/regex.go @@ -1,32 +1,31 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + package regexp import ( "regexp" + + "github.com/corazawaf/coraza/v3/experimental/regexp/regexptypes" ) -var RegexCompiler func(expr string) (Regexp, error) +var RegexCompiler func(expr string) (regexptypes.Regexp, error) func init() { - RegexCompiler = func(expr string) (Regexp, error) { + RegexCompiler = func(expr string) (regexptypes.Regexp, error) { return regexp.Compile(expr) } } +type Regexp = regexptypes.Regexp + +// MustCompile is like Compile but panics if the expression cannot be parsed. +// It is not intented to use with user input e.g. rules because it panics and +// bypasses whatever logic provided by the users for regex compilation. func MustCompile(str string) *regexp.Regexp { return regexp.MustCompile(str) } -type Regexp interface { - MatchString(s string) bool - FindStringSubmatch(s string) []string - FindAllStringSubmatch(s string, n int) [][]string - SubexpNames() []string - Match(s []byte) bool - String() string -} - -func Compile(expr string) (Regexp, error) { +func Compile(expr string) (regexptypes.Regexp, error) { return RegexCompiler(expr) } - -var _ Regexp = (*regexp.Regexp)(nil) diff --git a/internal/regexp/regex_test.go b/internal/regexp/regex_test.go index abf53fe8b..d488bb97a 100644 --- a/internal/regexp/regex_test.go +++ b/internal/regexp/regex_test.go @@ -1,3 +1,6 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + package regexp import ( diff --git a/internal/seclang/directives.go b/internal/seclang/directives.go index 778515eff..f609e87e9 100644 --- a/internal/seclang/directives.go +++ b/internal/seclang/directives.go @@ -12,12 +12,11 @@ import ( "strconv" "strings" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/debuglog" "github.com/corazawaf/coraza/v3/internal/auditlog" "github.com/corazawaf/coraza/v3/internal/corazawaf" "github.com/corazawaf/coraza/v3/internal/memoize" + "github.com/corazawaf/coraza/v3/internal/regexp" utils "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" ) diff --git a/internal/seclang/rules_test.go b/internal/seclang/rules_test.go index 44a29a81d..88feb85dc 100644 --- a/internal/seclang/rules_test.go +++ b/internal/seclang/rules_test.go @@ -7,9 +7,8 @@ import ( "strings" "testing" - "github.com/corazawaf/coraza/v3/internal/regexp" - "github.com/corazawaf/coraza/v3/internal/corazawaf" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" )