Skip to content

Commit

Permalink
fix: removes multiline from default regex modifiers (#876)
Browse files Browse the repository at this point in the history
* removes multiline from default regex modifiers

* relies on coraza.rule.no_regex_multiline

* adds coraza.rule.no_regex_multiline to TagsMatrix

---------

Co-authored-by: Juan Pablo Tosso <jptosso@gmail.com>
  • Loading branch information
M4tteoP and jptosso authored Nov 18, 2024
1 parent 3bc5ef6 commit c5606ed
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 5 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ dictionaries to reduce memory consumption in deployments that launch several cor
instances. For more context check [this issue](https://github.com/corazawaf/coraza-caddy/issues/76)
* `no_fs_access` - indicates that the target environment has no access to FS in order to not leverage OS' filesystem related functionality e.g. file body buffers.
* `coraza.rule.case_sensitive_args_keys` - enables case-sensitive matching for ARGS keys, aligning Coraza behavior with RFC 3986 specification. It will be enabled by default in the next major version.
* `coraza.rule.no_regex_multiline` - disables enabling by default regexes multiline modifiers in `@rx` operator. It aligns with CRS expected behavior, reduces false positives and might improve performances. No multiline regexes by default will be enabled in the next major version. For more context check [this PR](https://github.com/corazawaf/coraza/pull/876)

## E2E Testing

Expand Down
8 changes: 8 additions & 0 deletions internal/operators/multilineregex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright 2024 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build coraza.rule.no_regex_multiline

package operators

var shouldNotUseMultilineRegexesOperatorByDefault = true
8 changes: 8 additions & 0 deletions internal/operators/multilineregex_default.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright 2024 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build !coraza.rule.no_regex_multiline

package operators

var shouldNotUseMultilineRegexesOperatorByDefault = false
17 changes: 13 additions & 4 deletions internal/operators/rx.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,19 @@ type rx struct {
var _ plugintypes.Operator = (*rx)(nil)

func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) {
// (?sm) enables multiline and dotall mode, required by some CRS rules and matching ModSec behavior, see
// - https://stackoverflow.com/a/27680233
// - https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E
data := fmt.Sprintf("(?sm)%s", options.Arguments)
var data string
if shouldNotUseMultilineRegexesOperatorByDefault {
// (?s) enables dotall mode, required by some CRS rules and matching ModSec behavior, see
// - https://github.com/google/re2/wiki/Syntax
// - Flag usage: https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E
data = fmt.Sprintf("(?s)%s", options.Arguments)
} else {
// TODO: deprecate multiline modifier set by default in Coraza v4
// CRS rules will explicitly set the multiline modifier when needed
// Having it enabled by default can lead to false positives and less performance
// See https://github.com/corazawaf/coraza/pull/876
data = fmt.Sprintf("(?sm)%s", options.Arguments)
}

if matchesArbitraryBytes(data) {
// Use binary regex matcher if expression matches non-utf8 bytes. The binary matcher does
Expand Down
119 changes: 119 additions & 0 deletions internal/operators/rx_no_multiline_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build coraza.rule.no_regex_multiline

package operators

import (
"fmt"
"testing"

"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/corazawaf/coraza/v3/internal/corazawaf"
)

func TestRx(t *testing.T) {
tests := []struct {
pattern string
input string
want bool
}{
{
pattern: "som(.*)ta",
input: "somedata",
want: true,
},
{
pattern: "som(.*)ta",
input: "notdata",
want: false,
},
{
pattern: "ハロー",
input: "ハローワールド",
want: true,
},
{
pattern: "ハロー",
input: "グッバイワールド",
want: false,
},
{
pattern: `\xac\xed\x00\x05`,
input: "\xac\xed\x00\x05t\x00\x04test",
want: true,
},
{
pattern: `\xac\xed\x00\x05`,
input: "\xac\xed\x00t\x00\x04test",
want: false,
},
{
// Requires dotall
pattern: `hello.*world`,
input: "hello\nworld",
want: true,
},
{
// Requires multiline disabled by default
pattern: `^hello.*world`,
input: "test\nhello\nworld",
want: false,
},
{
// Makes sure multiline can be enabled by the user
pattern: `(?m)^hello.*world`,
input: "test\nhello\nworld",
want: true,
},
{
// Makes sure, (?s) passed by the user does not
// break the regex.
pattern: `(?s)hello.*world`,
input: "hello\nworld",
want: true,
},
{
// Make sure user flags are also applied
pattern: `(?i)hello.*world`,
input: "testHELLO\nworld",
want: true,
},
{
// The so called DOLLAR_ENDONLY modifier in PCRE2 is meant to tweak the meaning of dollar '$'
// so that it matches only at the very end of the string (see: https://www.pcre.org/current/doc/html/pcre2pattern.html#SEC6)
// It seems that re2 already behaves like that by default.
pattern: `123$`,
input: "123\n",
want: false,
},
{
// Dollar endonly match
pattern: `123$`,
input: "test123",
want: true,
},
}

for _, tc := range tests {
tt := tc
t.Run(fmt.Sprintf("%s/%s", tt.pattern, tt.input), func(t *testing.T) {

opts := plugintypes.OperatorOptions{
Arguments: tt.pattern,
}
rx, err := newRX(opts)
if err != nil {
t.Error(err)
}
waf := corazawaf.NewWAF()
tx := waf.NewTransaction()
tx.Capture = true
res := rx.Evaluate(tx, tt.input)
if res != tt.want {
t.Errorf("want %v, got %v", tt.want, res)
}
})
}
}
2 changes: 2 additions & 0 deletions internal/operators/rx_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build !coraza.rule.no_regex_multiline

package operators

import (
Expand Down
12 changes: 11 additions & 1 deletion magefile.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ func Test() error {
return err
}

// Execute FTW tests with coraza.rule.no_regex_multiline as well
if err := sh.RunV("go", "test", "-tags=coraza.rule.no_regex_multiline", "./testing/coreruleset"); err != nil {
return err
}

if err := sh.RunV("go", "test", "-tags=coraza.rule.no_regex_multiline", "-run=^TestRx", "./..."); err != nil {
return err
}

if err := sh.RunV("go", "test", "-tags=coraza.rule.case_sensitive_args_keys", "-run=^TestCaseSensitive", "./..."); err != nil {
return err
}
Expand Down Expand Up @@ -174,7 +183,7 @@ func Coverage() error {
if err := sh.RunV("go", "test", tagsCmd, "-coverprofile=build/coverage-ftw.txt", "-covermode=atomic", "-coverpkg=./...", "./testing/coreruleset"); err != nil {
return err
}
// we run tinygo tag only if memoize_builders is is not enabled
// we run tinygo tag only if memoize_builders is not enabled
if !strings.Contains(tags, "memoize_builders") {
if tagsCmd != "" {
tagsCmd += ",tinygo"
Expand Down Expand Up @@ -271,6 +280,7 @@ func combinations(tags []string) []string {
func TagsMatrix() error {
tags := []string{
"coraza.rule.case_sensitive_args_keys",
"coraza.rule.no_regex_multiline",
"memoize_builders",
"coraza.rule.multiphase_valuation",
"no_fs_access",
Expand Down

0 comments on commit c5606ed

Please sign in to comment.